/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/RegAllocGreedy.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- RegAllocGreedy.cpp - greedy register allocator ---------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file defines the RAGreedy function pass for register allocation in |
11 | | // optimized builds. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "AllocationOrder.h" |
16 | | #include "InterferenceCache.h" |
17 | | #include "LiveDebugVariables.h" |
18 | | #include "RegAllocBase.h" |
19 | | #include "SpillPlacement.h" |
20 | | #include "Spiller.h" |
21 | | #include "SplitKit.h" |
22 | | #include "llvm/ADT/ArrayRef.h" |
23 | | #include "llvm/ADT/BitVector.h" |
24 | | #include "llvm/ADT/DenseMap.h" |
25 | | #include "llvm/ADT/IndexedMap.h" |
26 | | #include "llvm/ADT/SetVector.h" |
27 | | #include "llvm/ADT/SmallPtrSet.h" |
28 | | #include "llvm/ADT/SmallSet.h" |
29 | | #include "llvm/ADT/SmallVector.h" |
30 | | #include "llvm/ADT/Statistic.h" |
31 | | #include "llvm/ADT/StringRef.h" |
32 | | #include "llvm/Analysis/AliasAnalysis.h" |
33 | | #include "llvm/Analysis/OptimizationDiagnosticInfo.h" |
34 | | #include "llvm/CodeGen/CalcSpillWeights.h" |
35 | | #include "llvm/CodeGen/EdgeBundles.h" |
36 | | #include "llvm/CodeGen/LiveInterval.h" |
37 | | #include "llvm/CodeGen/LiveIntervalAnalysis.h" |
38 | | #include "llvm/CodeGen/LiveIntervalUnion.h" |
39 | | #include "llvm/CodeGen/LiveRangeEdit.h" |
40 | | #include "llvm/CodeGen/LiveRegMatrix.h" |
41 | | #include "llvm/CodeGen/LiveStackAnalysis.h" |
42 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
43 | | #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" |
44 | | #include "llvm/CodeGen/MachineDominators.h" |
45 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
46 | | #include "llvm/CodeGen/MachineFunction.h" |
47 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
48 | | #include "llvm/CodeGen/MachineInstr.h" |
49 | | #include "llvm/CodeGen/MachineLoopInfo.h" |
50 | | #include "llvm/CodeGen/MachineOperand.h" |
51 | | #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" |
52 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
53 | | #include "llvm/CodeGen/RegAllocRegistry.h" |
54 | | #include "llvm/CodeGen/RegisterClassInfo.h" |
55 | | #include "llvm/CodeGen/SlotIndexes.h" |
56 | | #include "llvm/CodeGen/VirtRegMap.h" |
57 | | #include "llvm/IR/Function.h" |
58 | | #include "llvm/IR/LLVMContext.h" |
59 | | #include "llvm/MC/MCRegisterInfo.h" |
60 | | #include "llvm/Pass.h" |
61 | | #include "llvm/Support/BlockFrequency.h" |
62 | | #include "llvm/Support/BranchProbability.h" |
63 | | #include "llvm/Support/CommandLine.h" |
64 | | #include "llvm/Support/Debug.h" |
65 | | #include "llvm/Support/MathExtras.h" |
66 | | #include "llvm/Support/Timer.h" |
67 | | #include "llvm/Support/raw_ostream.h" |
68 | | #include "llvm/Target/TargetInstrInfo.h" |
69 | | #include "llvm/Target/TargetMachine.h" |
70 | | #include "llvm/Target/TargetRegisterInfo.h" |
71 | | #include "llvm/Target/TargetSubtargetInfo.h" |
72 | | #include <algorithm> |
73 | | #include <cassert> |
74 | | #include <cstdint> |
75 | | #include <memory> |
76 | | #include <queue> |
77 | | #include <tuple> |
78 | | #include <utility> |
79 | | |
80 | | using namespace llvm; |
81 | | |
82 | 31.8k | #define DEBUG_TYPE "regalloc" |
83 | | |
84 | | STATISTIC(NumGlobalSplits, "Number of split global live ranges"); |
85 | | STATISTIC(NumLocalSplits, "Number of split local live ranges"); |
86 | | STATISTIC(NumEvicted, "Number of interferences evicted"); |
87 | | |
88 | | static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode( |
89 | | "split-spill-mode", cl::Hidden, |
90 | | cl::desc("Spill mode for splitting live ranges"), |
91 | | cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), |
92 | | clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), |
93 | | clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")), |
94 | | cl::init(SplitEditor::SM_Speed)); |
95 | | |
96 | | static cl::opt<unsigned> |
97 | | LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, |
98 | | cl::desc("Last chance recoloring max depth"), |
99 | | cl::init(5)); |
100 | | |
101 | | static cl::opt<unsigned> LastChanceRecoloringMaxInterference( |
102 | | "lcr-max-interf", cl::Hidden, |
103 | | cl::desc("Last chance recoloring maximum number of considered" |
104 | | " interference at a time"), |
105 | | cl::init(8)); |
106 | | |
107 | | static cl::opt<bool> |
108 | | ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, |
109 | | cl::desc("Exhaustive Search for registers bypassing the depth " |
110 | | "and interference cutoffs of last chance recoloring")); |
111 | | |
112 | | static cl::opt<bool> EnableLocalReassignment( |
113 | | "enable-local-reassign", cl::Hidden, |
114 | | cl::desc("Local reassignment can yield better allocation decisions, but " |
115 | | "may be compile time intensive"), |
116 | | cl::init(false)); |
117 | | |
118 | | static cl::opt<bool> EnableDeferredSpilling( |
119 | | "enable-deferred-spilling", cl::Hidden, |
120 | | cl::desc("Instead of spilling a variable right away, defer the actual " |
121 | | "code insertion to the end of the allocation. That way the " |
122 | | "allocator might still find a suitable coloring for this " |
123 | | "variable because of other evicted variables."), |
124 | | cl::init(false)); |
125 | | |
126 | | // FIXME: Find a good default for this flag and remove the flag. |
127 | | static cl::opt<unsigned> |
128 | | CSRFirstTimeCost("regalloc-csr-first-time-cost", |
129 | | cl::desc("Cost for first time use of callee-saved register."), |
130 | | cl::init(0), cl::Hidden); |
131 | | |
132 | | static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", |
133 | | createGreedyRegisterAllocator); |
134 | | |
135 | | namespace { |
136 | | |
137 | | class RAGreedy : public MachineFunctionPass, |
138 | | public RegAllocBase, |
139 | | private LiveRangeEdit::Delegate { |
140 | | // Convenient shortcuts. |
141 | | using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>; |
142 | | using SmallLISet = SmallPtrSet<LiveInterval *, 4>; |
143 | | using SmallVirtRegSet = SmallSet<unsigned, 16>; |
144 | | |
145 | | // context |
146 | | MachineFunction *MF; |
147 | | |
148 | | // Shortcuts to some useful interface. |
149 | | const TargetInstrInfo *TII; |
150 | | const TargetRegisterInfo *TRI; |
151 | | RegisterClassInfo RCI; |
152 | | |
153 | | // analyses |
154 | | SlotIndexes *Indexes; |
155 | | MachineBlockFrequencyInfo *MBFI; |
156 | | MachineDominatorTree *DomTree; |
157 | | MachineLoopInfo *Loops; |
158 | | MachineOptimizationRemarkEmitter *ORE; |
159 | | EdgeBundles *Bundles; |
160 | | SpillPlacement *SpillPlacer; |
161 | | LiveDebugVariables *DebugVars; |
162 | | AliasAnalysis *AA; |
163 | | |
164 | | // state |
165 | | std::unique_ptr<Spiller> SpillerInstance; |
166 | | PQueue Queue; |
167 | | unsigned NextCascade; |
168 | | |
169 | | // Live ranges pass through a number of stages as we try to allocate them. |
170 | | // Some of the stages may also create new live ranges: |
171 | | // |
172 | | // - Region splitting. |
173 | | // - Per-block splitting. |
174 | | // - Local splitting. |
175 | | // - Spilling. |
176 | | // |
177 | | // Ranges produced by one of the stages skip the previous stages when they are |
178 | | // dequeued. This improves performance because we can skip interference checks |
179 | | // that are unlikely to give any results. It also guarantees that the live |
180 | | // range splitting algorithm terminates, something that is otherwise hard to |
181 | | // ensure. |
182 | | enum LiveRangeStage { |
183 | | /// Newly created live range that has never been queued. |
184 | | RS_New, |
185 | | |
186 | | /// Only attempt assignment and eviction. Then requeue as RS_Split. |
187 | | RS_Assign, |
188 | | |
189 | | /// Attempt live range splitting if assignment is impossible. |
190 | | RS_Split, |
191 | | |
192 | | /// Attempt more aggressive live range splitting that is guaranteed to make |
193 | | /// progress. This is used for split products that may not be making |
194 | | /// progress. |
195 | | RS_Split2, |
196 | | |
197 | | /// Live range will be spilled. No more splitting will be attempted. |
198 | | RS_Spill, |
199 | | |
200 | | |
201 | | /// Live range is in memory. Because of other evictions, it might get moved |
202 | | /// in a register in the end. |
203 | | RS_Memory, |
204 | | |
205 | | /// There is nothing more we can do to this live range. Abort compilation |
206 | | /// if it can't be assigned. |
207 | | RS_Done |
208 | | }; |
209 | | |
210 | | // Enum CutOffStage to keep a track whether the register allocation failed |
211 | | // because of the cutoffs encountered in last chance recoloring. |
212 | | // Note: This is used as bitmask. New value should be next power of 2. |
213 | | enum CutOffStage { |
214 | | // No cutoffs encountered |
215 | | CO_None = 0, |
216 | | |
217 | | // lcr-max-depth cutoff encountered |
218 | | CO_Depth = 1, |
219 | | |
220 | | // lcr-max-interf cutoff encountered |
221 | | CO_Interf = 2 |
222 | | }; |
223 | | |
224 | | uint8_t CutOffInfo; |
225 | | |
226 | | #ifndef NDEBUG |
227 | | static const char *const StageName[]; |
228 | | #endif |
229 | | |
230 | | // RegInfo - Keep additional information about each live range. |
231 | | struct RegInfo { |
232 | | LiveRangeStage Stage = RS_New; |
233 | | |
234 | | // Cascade - Eviction loop prevention. See canEvictInterference(). |
235 | | unsigned Cascade = 0; |
236 | | |
237 | 31.8k | RegInfo() = default; |
238 | | }; |
239 | | |
240 | | IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo; |
241 | | |
242 | 31.4M | LiveRangeStage getStage(const LiveInterval &VirtReg) const { |
243 | 31.4M | return ExtraRegInfo[VirtReg.reg].Stage; |
244 | 31.4M | } |
245 | | |
246 | 466k | void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { |
247 | 466k | ExtraRegInfo.resize(MRI->getNumVirtRegs()); |
248 | 466k | ExtraRegInfo[VirtReg.reg].Stage = Stage; |
249 | 466k | } |
250 | | |
251 | | template<typename Iterator> |
252 | 281k | void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { |
253 | 281k | ExtraRegInfo.resize(MRI->getNumVirtRegs()); |
254 | 587k | for (;Begin != End587k ; ++Begin306k ) { |
255 | 306k | unsigned Reg = *Begin; |
256 | 306k | if (ExtraRegInfo[Reg].Stage == RS_New) |
257 | 305k | ExtraRegInfo[Reg].Stage = NewStage; |
258 | 306k | } |
259 | 281k | } RegAllocGreedy.cpp:void (anonymous namespace)::RAGreedy::setStage<unsigned int const*>(unsigned int const*, unsigned int const*, (anonymous namespace)::RAGreedy::LiveRangeStage) Line | Count | Source | 252 | 40 | void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { | 253 | 40 | ExtraRegInfo.resize(MRI->getNumVirtRegs()); | 254 | 126 | for (;Begin != End126 ; ++Begin86 ) { | 255 | 86 | unsigned Reg = *Begin; | 256 | 86 | if (ExtraRegInfo[Reg].Stage == RS_New) | 257 | 86 | ExtraRegInfo[Reg].Stage = NewStage; | 258 | 86 | } | 259 | 40 | } |
RegAllocGreedy.cpp:void (anonymous namespace)::RAGreedy::setStage<unsigned int*>(unsigned int*, unsigned int*, (anonymous namespace)::RAGreedy::LiveRangeStage) Line | Count | Source | 252 | 281k | void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { | 253 | 281k | ExtraRegInfo.resize(MRI->getNumVirtRegs()); | 254 | 587k | for (;Begin != End587k ; ++Begin306k ) { | 255 | 306k | unsigned Reg = *Begin; | 256 | 306k | if (ExtraRegInfo[Reg].Stage == RS_New) | 257 | 305k | ExtraRegInfo[Reg].Stage = NewStage; | 258 | 306k | } | 259 | 281k | } |
|
260 | | |
261 | | /// Cost of evicting interference. |
262 | | struct EvictionCost { |
263 | | unsigned BrokenHints = 0; ///< Total number of broken hints. |
264 | | float MaxWeight = 0; ///< Maximum spill weight evicted. |
265 | | |
266 | 20.2M | EvictionCost() = default; |
267 | | |
268 | 5.71M | bool isMax() const { return BrokenHints == ~0u; } |
269 | | |
270 | 1.00M | void setMax() { BrokenHints = ~0u; } |
271 | | |
272 | 1.34M | void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } |
273 | | |
274 | 13.4M | bool operator<(const EvictionCost &O) const { |
275 | 13.4M | return std::tie(BrokenHints, MaxWeight) < |
276 | 13.4M | std::tie(O.BrokenHints, O.MaxWeight); |
277 | 13.4M | } |
278 | | }; |
279 | | |
280 | | // splitting state. |
281 | | std::unique_ptr<SplitAnalysis> SA; |
282 | | std::unique_ptr<SplitEditor> SE; |
283 | | |
284 | | /// Cached per-block interference maps |
285 | | InterferenceCache IntfCache; |
286 | | |
287 | | /// All basic blocks where the current register has uses. |
288 | | SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints; |
289 | | |
290 | | /// Global live range splitting candidate info. |
291 | | struct GlobalSplitCandidate { |
292 | | // Register intended for assignment, or 0. |
293 | | unsigned PhysReg; |
294 | | |
295 | | // SplitKit interval index for this candidate. |
296 | | unsigned IntvIdx; |
297 | | |
298 | | // Interference for PhysReg. |
299 | | InterferenceCache::Cursor Intf; |
300 | | |
301 | | // Bundles where this candidate should be live. |
302 | | BitVector LiveBundles; |
303 | | SmallVector<unsigned, 8> ActiveBlocks; |
304 | | |
305 | 9.40M | void reset(InterferenceCache &Cache, unsigned Reg) { |
306 | 9.40M | PhysReg = Reg; |
307 | 9.40M | IntvIdx = 0; |
308 | 9.40M | Intf.setPhysReg(Cache, Reg); |
309 | 9.40M | LiveBundles.clear(); |
310 | 9.40M | ActiveBlocks.clear(); |
311 | 9.40M | } |
312 | | |
313 | | // Set B[i] = C for every live bundle where B[i] was NoCand. |
314 | 134k | unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) { |
315 | 134k | unsigned Count = 0; |
316 | 134k | for (unsigned i : LiveBundles.set_bits()) |
317 | 1.16M | if (1.16M B[i] == NoCand1.16M ) { |
318 | 1.09M | B[i] = C; |
319 | 1.09M | Count++; |
320 | 1.09M | } |
321 | 134k | return Count; |
322 | 134k | } |
323 | | }; |
324 | | |
325 | | /// Candidate info for each PhysReg in AllocationOrder. |
326 | | /// This vector never shrinks, but grows to the size of the largest register |
327 | | /// class. |
328 | | SmallVector<GlobalSplitCandidate, 32> GlobalCand; |
329 | | |
330 | | enum : unsigned { NoCand = ~0u }; |
331 | | |
332 | | /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to |
333 | | /// NoCand which indicates the stack interval. |
334 | | SmallVector<unsigned, 32> BundleCand; |
335 | | |
336 | | /// Callee-save register cost, calculated once per machine function. |
337 | | BlockFrequency CSRCost; |
338 | | |
339 | | /// Run or not the local reassignment heuristic. This information is |
340 | | /// obtained from the TargetSubtargetInfo. |
341 | | bool EnableLocalReassign; |
342 | | |
343 | | /// Set of broken hints that may be reconciled later because of eviction. |
344 | | SmallSetVector<LiveInterval *, 8> SetOfBrokenHints; |
345 | | |
346 | | public: |
347 | | RAGreedy(); |
348 | | |
349 | | /// Return the pass name. |
350 | 31.7k | StringRef getPassName() const override { return "Greedy Register Allocator"; } |
351 | | |
352 | | /// RAGreedy analysis usage. |
353 | | void getAnalysisUsage(AnalysisUsage &AU) const override; |
354 | | void releaseMemory() override; |
355 | 868k | Spiller &spiller() override { return *SpillerInstance; } |
356 | | void enqueue(LiveInterval *LI) override; |
357 | | LiveInterval *dequeue() override; |
358 | | unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override; |
359 | | void aboutToRemoveInterval(LiveInterval &) override; |
360 | | |
361 | | /// Perform register allocation. |
362 | | bool runOnMachineFunction(MachineFunction &mf) override; |
363 | | |
364 | 31.7k | MachineFunctionProperties getRequiredProperties() const override { |
365 | 31.7k | return MachineFunctionProperties().set( |
366 | 31.7k | MachineFunctionProperties::Property::NoPHIs); |
367 | 31.7k | } |
368 | | |
369 | | static char ID; |
370 | | |
371 | | private: |
372 | | unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &, |
373 | | SmallVirtRegSet &, unsigned = 0); |
374 | | |
375 | | bool LRE_CanEraseVirtReg(unsigned) override; |
376 | | void LRE_WillShrinkVirtReg(unsigned) override; |
377 | | void LRE_DidCloneVirtReg(unsigned, unsigned) override; |
378 | | void enqueue(PQueue &CurQueue, LiveInterval *LI); |
379 | | LiveInterval *dequeue(PQueue &CurQueue); |
380 | | |
381 | | BlockFrequency calcSpillCost(); |
382 | | bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); |
383 | | void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); |
384 | | void growRegion(GlobalSplitCandidate &Cand); |
385 | | BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&); |
386 | | bool calcCompactRegion(GlobalSplitCandidate&); |
387 | | void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); |
388 | | void calcGapWeights(unsigned, SmallVectorImpl<float>&); |
389 | | unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); |
390 | | bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); |
391 | | bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); |
392 | | void evictInterference(LiveInterval&, unsigned, |
393 | | SmallVectorImpl<unsigned>&); |
394 | | bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, |
395 | | SmallLISet &RecoloringCandidates, |
396 | | const SmallVirtRegSet &FixedRegisters); |
397 | | |
398 | | unsigned tryAssign(LiveInterval&, AllocationOrder&, |
399 | | SmallVectorImpl<unsigned>&); |
400 | | unsigned tryEvict(LiveInterval&, AllocationOrder&, |
401 | | SmallVectorImpl<unsigned>&, unsigned = ~0u); |
402 | | unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, |
403 | | SmallVectorImpl<unsigned>&); |
404 | | /// Calculate cost of region splitting. |
405 | | unsigned calculateRegionSplitCost(LiveInterval &VirtReg, |
406 | | AllocationOrder &Order, |
407 | | BlockFrequency &BestCost, |
408 | | unsigned &NumCands, bool IgnoreCSR); |
409 | | /// Perform region splitting. |
410 | | unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, |
411 | | bool HasCompact, |
412 | | SmallVectorImpl<unsigned> &NewVRegs); |
413 | | /// Check other options before using a callee-saved register for the first |
414 | | /// time. |
415 | | unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, |
416 | | unsigned PhysReg, unsigned &CostPerUseLimit, |
417 | | SmallVectorImpl<unsigned> &NewVRegs); |
418 | | void initializeCSRCost(); |
419 | | unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, |
420 | | SmallVectorImpl<unsigned>&); |
421 | | unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, |
422 | | SmallVectorImpl<unsigned>&); |
423 | | unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, |
424 | | SmallVectorImpl<unsigned>&); |
425 | | unsigned trySplit(LiveInterval&, AllocationOrder&, |
426 | | SmallVectorImpl<unsigned>&); |
427 | | unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, |
428 | | SmallVectorImpl<unsigned> &, |
429 | | SmallVirtRegSet &, unsigned); |
430 | | bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &, |
431 | | SmallVirtRegSet &, unsigned); |
432 | | void tryHintRecoloring(LiveInterval &); |
433 | | void tryHintsRecoloring(); |
434 | | |
435 | | /// Model the information carried by one end of a copy. |
436 | | struct HintInfo { |
437 | | /// The frequency of the copy. |
438 | | BlockFrequency Freq; |
439 | | /// The virtual register or physical register. |
440 | | unsigned Reg; |
441 | | /// Its currently assigned register. |
442 | | /// In case of a physical register Reg == PhysReg. |
443 | | unsigned PhysReg; |
444 | | |
445 | | HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg) |
446 | 3.38M | : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} |
447 | | }; |
448 | | using HintsInfo = SmallVector<HintInfo, 4>; |
449 | | |
450 | | BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned); |
451 | | void collectHintInfo(unsigned, HintsInfo &); |
452 | | |
453 | | bool isUnusedCalleeSavedReg(unsigned PhysReg) const; |
454 | | |
455 | | /// Compute and report the number of spills and reloads for a loop. |
456 | | void reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads, |
457 | | unsigned &FoldedReloads, unsigned &Spills, |
458 | | unsigned &FoldedSpills); |
459 | | |
460 | | /// Report the number of spills and reloads for each loop. |
461 | 587k | void reportNumberOfSplillsReloads() { |
462 | 256k | for (MachineLoop *L : *Loops) { |
463 | 256k | unsigned Reloads, FoldedReloads, Spills, FoldedSpills; |
464 | 256k | reportNumberOfSplillsReloads(L, Reloads, FoldedReloads, Spills, |
465 | 256k | FoldedSpills); |
466 | 256k | } |
467 | 587k | } |
468 | | }; |
469 | | |
470 | | } // end anonymous namespace |
471 | | |
472 | | char RAGreedy::ID = 0; |
473 | | char &llvm::RAGreedyID = RAGreedy::ID; |
474 | | |
475 | 36.7k | INITIALIZE_PASS_BEGIN36.7k (RAGreedy, "greedy",
|
476 | 36.7k | "Greedy Register Allocator", false, false) |
477 | 36.7k | INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) |
478 | 36.7k | INITIALIZE_PASS_DEPENDENCY(SlotIndexes) |
479 | 36.7k | INITIALIZE_PASS_DEPENDENCY(LiveIntervals) |
480 | 36.7k | INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) |
481 | 36.7k | INITIALIZE_PASS_DEPENDENCY(MachineScheduler) |
482 | 36.7k | INITIALIZE_PASS_DEPENDENCY(LiveStacks) |
483 | 36.7k | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) |
484 | 36.7k | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) |
485 | 36.7k | INITIALIZE_PASS_DEPENDENCY(VirtRegMap) |
486 | 36.7k | INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) |
487 | 36.7k | INITIALIZE_PASS_DEPENDENCY(EdgeBundles) |
488 | 36.7k | INITIALIZE_PASS_DEPENDENCY(SpillPlacement) |
489 | 36.7k | INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) |
490 | 36.7k | INITIALIZE_PASS_END(RAGreedy, "greedy", |
491 | | "Greedy Register Allocator", false, false) |
492 | | |
493 | | #ifndef NDEBUG |
494 | | const char *const RAGreedy::StageName[] = { |
495 | | "RS_New", |
496 | | "RS_Assign", |
497 | | "RS_Split", |
498 | | "RS_Split2", |
499 | | "RS_Spill", |
500 | | "RS_Memory", |
501 | | "RS_Done" |
502 | | }; |
503 | | #endif |
504 | | |
505 | | // Hysteresis to use when comparing floats. |
506 | | // This helps stabilize decisions based on float comparisons. |
507 | | const float Hysteresis = (2007 / 2048.0f); // 0.97998046875 |
508 | | |
509 | 31.8k | FunctionPass* llvm::createGreedyRegisterAllocator() { |
510 | 31.8k | return new RAGreedy(); |
511 | 31.8k | } |
512 | | |
513 | 31.8k | RAGreedy::RAGreedy(): MachineFunctionPass(ID) { |
514 | 31.8k | } |
515 | | |
516 | 31.7k | void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { |
517 | 31.7k | AU.setPreservesCFG(); |
518 | 31.7k | AU.addRequired<MachineBlockFrequencyInfo>(); |
519 | 31.7k | AU.addPreserved<MachineBlockFrequencyInfo>(); |
520 | 31.7k | AU.addRequired<AAResultsWrapperPass>(); |
521 | 31.7k | AU.addPreserved<AAResultsWrapperPass>(); |
522 | 31.7k | AU.addRequired<LiveIntervals>(); |
523 | 31.7k | AU.addPreserved<LiveIntervals>(); |
524 | 31.7k | AU.addRequired<SlotIndexes>(); |
525 | 31.7k | AU.addPreserved<SlotIndexes>(); |
526 | 31.7k | AU.addRequired<LiveDebugVariables>(); |
527 | 31.7k | AU.addPreserved<LiveDebugVariables>(); |
528 | 31.7k | AU.addRequired<LiveStacks>(); |
529 | 31.7k | AU.addPreserved<LiveStacks>(); |
530 | 31.7k | AU.addRequired<MachineDominatorTree>(); |
531 | 31.7k | AU.addPreserved<MachineDominatorTree>(); |
532 | 31.7k | AU.addRequired<MachineLoopInfo>(); |
533 | 31.7k | AU.addPreserved<MachineLoopInfo>(); |
534 | 31.7k | AU.addRequired<VirtRegMap>(); |
535 | 31.7k | AU.addPreserved<VirtRegMap>(); |
536 | 31.7k | AU.addRequired<LiveRegMatrix>(); |
537 | 31.7k | AU.addPreserved<LiveRegMatrix>(); |
538 | 31.7k | AU.addRequired<EdgeBundles>(); |
539 | 31.7k | AU.addRequired<SpillPlacement>(); |
540 | 31.7k | AU.addRequired<MachineOptimizationRemarkEmitterPass>(); |
541 | 31.7k | MachineFunctionPass::getAnalysisUsage(AU); |
542 | 31.7k | } |
543 | | |
544 | | //===----------------------------------------------------------------------===// |
545 | | // LiveRangeEdit delegate methods |
546 | | //===----------------------------------------------------------------------===// |
547 | | |
548 | 452k | bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { |
549 | 452k | LiveInterval &LI = LIS->getInterval(VirtReg); |
550 | 452k | if (VRM->hasPhys(VirtReg)452k ) { |
551 | 5.88k | Matrix->unassign(LI); |
552 | 5.88k | aboutToRemoveInterval(LI); |
553 | 5.88k | return true; |
554 | 5.88k | } |
555 | 446k | // Unassigned virtreg is probably in the priority queue. |
556 | 446k | // RegAllocBase will erase it after dequeueing. |
557 | 446k | // Nonetheless, clear the live-range so that the debug |
558 | 446k | // dump will show the right state for that VirtReg. |
559 | 446k | LI.clear(); |
560 | 446k | return false; |
561 | 446k | } |
562 | | |
563 | 213k | void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { |
564 | 213k | if (!VRM->hasPhys(VirtReg)) |
565 | 191k | return; |
566 | 21.5k | |
567 | 21.5k | // Register is assigned, put it back on the queue for reassignment. |
568 | 21.5k | LiveInterval &LI = LIS->getInterval(VirtReg); |
569 | 21.5k | Matrix->unassign(LI); |
570 | 21.5k | enqueue(&LI); |
571 | 21.5k | } |
572 | | |
573 | 642 | void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { |
574 | 642 | // Cloning a register we haven't even heard about yet? Just ignore it. |
575 | 642 | if (!ExtraRegInfo.inBounds(Old)) |
576 | 3 | return; |
577 | 639 | |
578 | 639 | // LRE may clone a virtual register because dead code elimination causes it to |
579 | 639 | // be split into connected components. The new components are much smaller |
580 | 639 | // than the original, so they should get a new chance at being assigned. |
581 | 639 | // same stage as the parent. |
582 | 639 | ExtraRegInfo[Old].Stage = RS_Assign; |
583 | 639 | ExtraRegInfo.grow(New); |
584 | 639 | ExtraRegInfo[New] = ExtraRegInfo[Old]; |
585 | 639 | } |
586 | | |
587 | 1.17M | void RAGreedy::releaseMemory() { |
588 | 1.17M | SpillerInstance.reset(); |
589 | 1.17M | ExtraRegInfo.clear(); |
590 | 1.17M | GlobalCand.clear(); |
591 | 1.17M | } |
592 | | |
593 | 11.1M | void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); } |
594 | | |
595 | 11.1M | void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { |
596 | 11.1M | // Prioritize live ranges by size, assigning larger ranges first. |
597 | 11.1M | // The queue holds (size, reg) pairs. |
598 | 11.1M | const unsigned Size = LI->getSize(); |
599 | 11.1M | const unsigned Reg = LI->reg; |
600 | 11.1M | assert(TargetRegisterInfo::isVirtualRegister(Reg) && |
601 | 11.1M | "Can only enqueue virtual registers"); |
602 | 11.1M | unsigned Prio; |
603 | 11.1M | |
604 | 11.1M | ExtraRegInfo.grow(Reg); |
605 | 11.1M | if (ExtraRegInfo[Reg].Stage == RS_New) |
606 | 9.84M | ExtraRegInfo[Reg].Stage = RS_Assign; |
607 | 11.1M | |
608 | 11.1M | if (ExtraRegInfo[Reg].Stage == RS_Split11.1M ) { |
609 | 314k | // Unsplit ranges that couldn't be allocated immediately are deferred until |
610 | 314k | // everything else has been allocated. |
611 | 314k | Prio = Size; |
612 | 11.1M | } else if (10.8M ExtraRegInfo[Reg].Stage == RS_Memory10.8M ) { |
613 | 0 | // Memory operand should be considered last. |
614 | 0 | // Change the priority such that Memory operand are assigned in |
615 | 0 | // the reverse order that they came in. |
616 | 0 | // TODO: Make this a member variable and probably do something about hints. |
617 | 0 | static unsigned MemOp = 0; |
618 | 0 | Prio = MemOp++; |
619 | 10.8M | } else { |
620 | 10.8M | // Giant live ranges fall back to the global assignment heuristic, which |
621 | 10.8M | // prevents excessive spilling in pathological cases. |
622 | 10.8M | bool ReverseLocal = TRI->reverseLocalAssignment(); |
623 | 10.8M | const TargetRegisterClass &RC = *MRI->getRegClass(Reg); |
624 | 10.8M | bool ForceGlobal = !ReverseLocal && |
625 | 10.8M | (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs()); |
626 | 10.8M | |
627 | 10.8M | if (ExtraRegInfo[Reg].Stage == RS_Assign && 10.8M !ForceGlobal10.4M && !LI->empty()8.77M && |
628 | 10.8M | LIS->intervalIsInOneMBB(*LI)8.77M ) { |
629 | 7.20M | // Allocate original local ranges in linear instruction order. Since they |
630 | 7.20M | // are singly defined, this produces optimal coloring in the absence of |
631 | 7.20M | // global interference and other constraints. |
632 | 7.20M | if (!ReverseLocal) |
633 | 7.20M | Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); |
634 | 0 | else { |
635 | 0 | // Allocating bottom up may allow many short LRGs to be assigned first |
636 | 0 | // to one of the cheap registers. This could be much faster for very |
637 | 0 | // large blocks on targets with many physical registers. |
638 | 0 | Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex()); |
639 | 0 | } |
640 | 7.20M | Prio |= RC.AllocationPriority << 24; |
641 | 10.8M | } else { |
642 | 3.67M | // Allocate global and split ranges in long->short order. Long ranges that |
643 | 3.67M | // don't fit should be spilled (or split) ASAP so they don't create |
644 | 3.67M | // interference. Mark a bit to prioritize global above local ranges. |
645 | 3.67M | Prio = (1u << 29) + Size; |
646 | 3.67M | } |
647 | 10.8M | // Mark a higher bit to prioritize global and local above RS_Split. |
648 | 10.8M | Prio |= (1u << 31); |
649 | 10.8M | |
650 | 10.8M | // Boost ranges that have a physical register hint. |
651 | 10.8M | if (VRM->hasKnownPreference(Reg)) |
652 | 4.48M | Prio |= (1u << 30); |
653 | 10.8M | } |
654 | 11.1M | // The virtual register number is a tie breaker for same-sized ranges. |
655 | 11.1M | // Give lower vreg numbers higher priority to assign them first. |
656 | 11.1M | CurQueue.push(std::make_pair(Prio, ~Reg)); |
657 | 11.1M | } |
658 | | |
659 | 11.7M | LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } |
660 | | |
661 | 11.7M | LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { |
662 | 11.7M | if (CurQueue.empty()) |
663 | 587k | return nullptr; |
664 | 11.1M | LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); |
665 | 11.1M | CurQueue.pop(); |
666 | 11.1M | return LI; |
667 | 11.1M | } |
668 | | |
669 | | //===----------------------------------------------------------------------===// |
670 | | // Direct Assignment |
671 | | //===----------------------------------------------------------------------===// |
672 | | |
673 | | /// tryAssign - Try to assign VirtReg to an available register. |
674 | | unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, |
675 | | AllocationOrder &Order, |
676 | 11.1M | SmallVectorImpl<unsigned> &NewVRegs) { |
677 | 11.1M | Order.rewind(); |
678 | 11.1M | unsigned PhysReg; |
679 | 95.9M | while ((PhysReg = Order.next())) |
680 | 94.7M | if (94.7M !Matrix->checkInterference(VirtReg, PhysReg)94.7M ) |
681 | 9.92M | break; |
682 | 11.1M | if (!PhysReg || 11.1M Order.isHint()9.92M ) |
683 | 4.18M | return PhysReg; |
684 | 7.00M | |
685 | 7.00M | // PhysReg is available, but there may be a better choice. |
686 | 7.00M | |
687 | 7.00M | // If we missed a simple hint, try to cheaply evict interference from the |
688 | 7.00M | // preferred register. |
689 | 7.00M | if (unsigned 7.00M Hint7.00M = MRI->getSimpleHint(VirtReg.reg)) |
690 | 1.71M | if (1.71M Order.isHint(Hint)1.71M ) { |
691 | 1.34M | DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); |
692 | 1.34M | EvictionCost MaxCost; |
693 | 1.34M | MaxCost.setBrokenHints(1); |
694 | 1.34M | if (canEvictInterference(VirtReg, Hint, true, MaxCost)1.34M ) { |
695 | 1.74k | evictInterference(VirtReg, Hint, NewVRegs); |
696 | 1.74k | return Hint; |
697 | 1.74k | } |
698 | 1.34M | // Record the missed hint, we may be able to recover |
699 | 1.34M | // at the end if the surrounding allocation changed. |
700 | 1.34M | SetOfBrokenHints.insert(&VirtReg); |
701 | 1.34M | } |
702 | 7.00M | |
703 | 7.00M | // Try to evict interference from a cheaper alternative. |
704 | 7.00M | unsigned Cost = TRI->getCostPerUse(PhysReg); |
705 | 7.00M | |
706 | 7.00M | // Most registers have 0 additional cost. |
707 | 7.00M | if (!Cost) |
708 | 6.95M | return PhysReg; |
709 | 43.3k | |
710 | 43.3k | DEBUG43.3k (dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost |
711 | 43.3k | << '\n'); |
712 | 43.3k | unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); |
713 | 43.3k | return CheapReg ? CheapReg19.2k : PhysReg24.1k ; |
714 | 11.1M | } |
715 | | |
716 | | //===----------------------------------------------------------------------===// |
717 | | // Interference eviction |
718 | | //===----------------------------------------------------------------------===// |
719 | | |
720 | 4.78M | unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { |
721 | 4.78M | AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); |
722 | 4.78M | unsigned PhysReg; |
723 | 163M | while ((PhysReg = Order.next())163M ) { |
724 | 158M | if (PhysReg == PrevReg) |
725 | 4.52M | continue; |
726 | 154M | |
727 | 154M | MCRegUnitIterator Units(PhysReg, TRI); |
728 | 154M | for (; Units.isValid()154M ; ++Units136k ) { |
729 | 154M | // Instantiate a "subquery", not to be confused with the Queries array. |
730 | 154M | LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]); |
731 | 154M | if (subQ.checkInterference()) |
732 | 154M | break; |
733 | 154M | } |
734 | 154M | // If no units have interference, break out with the current PhysReg. |
735 | 154M | if (!Units.isValid()) |
736 | 129k | break; |
737 | 158M | } |
738 | 4.78M | if (PhysReg) |
739 | 4.78M | DEBUG(dbgs() << "can reassign: " << VirtReg << " from " |
740 | 4.78M | << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI) |
741 | 4.78M | << '\n'); |
742 | 4.78M | return PhysReg; |
743 | 4.78M | } |
744 | | |
745 | | /// shouldEvict - determine if A should evict the assigned live range B. The |
746 | | /// eviction policy defined by this function together with the allocation order |
747 | | /// defined by enqueue() decides which registers ultimately end up being split |
748 | | /// and spilled. |
749 | | /// |
750 | | /// Cascade numbers are used to prevent infinite loops if this function is a |
751 | | /// cyclic relation. |
752 | | /// |
753 | | /// @param A The live range to be assigned. |
754 | | /// @param IsHint True when A is about to be assigned to its preferred |
755 | | /// register. |
756 | | /// @param B The live range to be evicted. |
757 | | /// @param BreaksHint True when B is already assigned to its preferred register. |
758 | | bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, |
759 | 11.1M | LiveInterval &B, bool BreaksHint) { |
760 | 11.1M | bool CanSplit = getStage(B) < RS_Spill; |
761 | 11.1M | |
762 | 11.1M | // Be fairly aggressive about following hints as long as the evictee can be |
763 | 11.1M | // split. |
764 | 11.1M | if (CanSplit && 11.1M IsHint11.1M && !BreaksHint2.88k ) |
765 | 2.88k | return true; |
766 | 11.1M | |
767 | 11.1M | if (11.1M A.weight > B.weight11.1M ) { |
768 | 5.71M | DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n'); |
769 | 5.71M | return true; |
770 | 5.71M | } |
771 | 5.48M | return false; |
772 | 5.48M | } |
773 | | |
774 | | /// canEvictInterference - Return true if all interferences between VirtReg and |
775 | | /// PhysReg can be evicted. |
776 | | /// |
777 | | /// @param VirtReg Live range that is about to be assigned. |
778 | | /// @param PhysReg Desired register for assignment. |
779 | | /// @param IsHint True when PhysReg is VirtReg's preferred register. |
780 | | /// @param MaxCost Only look for cheaper candidates and update with new cost |
781 | | /// when returning true. |
782 | | /// @returns True when interference can be evicted cheaper than MaxCost. |
783 | | bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, |
784 | 29.3M | bool IsHint, EvictionCost &MaxCost) { |
785 | 29.3M | // It is only possible to evict virtual register interference. |
786 | 29.3M | if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) |
787 | 11.4M | return false; |
788 | 17.8M | |
789 | 17.8M | bool IsLocal = LIS->intervalIsInOneMBB(VirtReg); |
790 | 17.8M | |
791 | 17.8M | // Find VirtReg's cascade number. This will be unassigned if VirtReg was never |
792 | 17.8M | // involved in an eviction before. If a cascade number was assigned, deny |
793 | 17.8M | // evicting anything with the same or a newer cascade number. This prevents |
794 | 17.8M | // infinite eviction loops. |
795 | 17.8M | // |
796 | 17.8M | // This works out so a register without a cascade number is allowed to evict |
797 | 17.8M | // anything, and it can be evicted by anything. |
798 | 17.8M | unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; |
799 | 17.8M | if (!Cascade) |
800 | 5.10M | Cascade = NextCascade; |
801 | 17.8M | |
802 | 17.8M | EvictionCost Cost; |
803 | 18.7M | for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid()18.7M ; ++Units891k ) { |
804 | 17.8M | LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); |
805 | 17.8M | // If there is 10 or more interferences, chances are one is heavier. |
806 | 17.8M | if (Q.collectInterferingVRegs(10) >= 10) |
807 | 198k | return false; |
808 | 17.6M | |
809 | 17.6M | // Check if any interfering live range is heavier than MaxWeight. |
810 | 18.7M | for (unsigned i = Q.interferingVRegs().size(); 17.6M i18.7M ; --i1.07M ) { |
811 | 17.8M | LiveInterval *Intf = Q.interferingVRegs()[i - 1]; |
812 | 17.8M | assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && |
813 | 17.8M | "Only expecting virtual register interference from query"); |
814 | 17.8M | // Never evict spill products. They cannot split or spill. |
815 | 17.8M | if (getStage(*Intf) == RS_Done) |
816 | 18.2k | return false; |
817 | 17.8M | // Once a live range becomes small enough, it is urgent that we find a |
818 | 17.8M | // register for it. This is indicated by an infinite spill weight. These |
819 | 17.8M | // urgent live ranges get to evict almost anything. |
820 | 17.8M | // |
821 | 17.8M | // Also allow urgent evictions of unspillable ranges from a strictly |
822 | 17.8M | // larger allocation order. |
823 | 17.8M | bool Urgent = !VirtReg.isSpillable() && |
824 | 64.6k | (Intf->isSpillable() || |
825 | 818 | RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) < |
826 | 64.6k | RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg))); |
827 | 17.8M | // Only evict older cascades or live ranges without a cascade. |
828 | 17.8M | unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; |
829 | 17.8M | if (Cascade <= IntfCascade17.8M ) { |
830 | 4.40M | if (!Urgent) |
831 | 4.40M | return false; |
832 | 42 | // We permit breaking cascades for urgent evictions. It should be the |
833 | 42 | // last resort, though, so make it really expensive. |
834 | 42 | Cost.BrokenHints += 10; |
835 | 42 | } |
836 | 17.8M | // Would this break a satisfied hint? |
837 | 13.4M | bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); |
838 | 13.4M | // Update eviction cost. |
839 | 13.4M | Cost.BrokenHints += BreaksHint; |
840 | 13.4M | Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); |
841 | 13.4M | // Abort if this would be too expensive. |
842 | 13.4M | if (!(Cost < MaxCost)) |
843 | 2.23M | return false; |
844 | 11.2M | if (11.2M Urgent11.2M ) |
845 | 17.9k | continue; |
846 | 11.1M | // Apply the eviction policy for non-urgent evictions. |
847 | 11.1M | if (11.1M !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)11.1M ) |
848 | 5.48M | return false; |
849 | 5.71M | // If !MaxCost.isMax(), then we're just looking for a cheap register. |
850 | 5.71M | // Evicting another local live range in this case could lead to suboptimal |
851 | 5.71M | // coloring. |
852 | 5.71M | if (5.71M !MaxCost.isMax() && 5.71M IsLocal5.05M && LIS->intervalIsInOneMBB(*Intf)4.81M && |
853 | 5.71M | (!EnableLocalReassign || 4.78M !canReassign(*Intf, PhysReg)4.78M )) { |
854 | 4.65M | return false; |
855 | 4.65M | } |
856 | 17.8M | } |
857 | 17.8M | } |
858 | 868k | MaxCost = Cost; |
859 | 868k | return true; |
860 | 29.3M | } |
861 | | |
862 | | /// evictInterference - Evict any interferring registers that prevent VirtReg |
863 | | /// from being assigned to Physreg. This assumes that canEvictInterference |
864 | | /// returned true. |
865 | | void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, |
866 | 568k | SmallVectorImpl<unsigned> &NewVRegs) { |
867 | 568k | // Make sure that VirtReg has a cascade number, and assign that cascade |
868 | 568k | // number to every evicted register. These live ranges than then only be |
869 | 568k | // evicted by a newer cascade, preventing infinite loops. |
870 | 568k | unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; |
871 | 568k | if (!Cascade) |
872 | 253k | Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; |
873 | 568k | |
874 | 568k | DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) |
875 | 568k | << " interference: Cascade " << Cascade << '\n'); |
876 | 568k | |
877 | 568k | // Collect all interfering virtregs first. |
878 | 568k | SmallVector<LiveInterval*, 8> Intfs; |
879 | 1.15M | for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid()1.15M ; ++Units582k ) { |
880 | 582k | LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); |
881 | 582k | // We usually have the interfering VRegs cached so collectInterferingVRegs() |
882 | 582k | // should be fast, we may need to recalculate if when different physregs |
883 | 582k | // overlap the same register unit so we had different SubRanges queried |
884 | 582k | // against it. |
885 | 582k | Q.collectInterferingVRegs(); |
886 | 582k | ArrayRef<LiveInterval*> IVR = Q.interferingVRegs(); |
887 | 582k | Intfs.append(IVR.begin(), IVR.end()); |
888 | 582k | } |
889 | 568k | |
890 | 568k | // Evict them second. This will invalidate the queries. |
891 | 1.15M | for (unsigned i = 0, e = Intfs.size(); i != e1.15M ; ++i584k ) { |
892 | 584k | LiveInterval *Intf = Intfs[i]; |
893 | 584k | // The same VirtReg may be present in multiple RegUnits. Skip duplicates. |
894 | 584k | if (!VRM->hasPhys(Intf->reg)) |
895 | 12.6k | continue; |
896 | 571k | Matrix->unassign(*Intf); |
897 | 571k | assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || |
898 | 571k | VirtReg.isSpillable() < Intf->isSpillable()) && |
899 | 571k | "Cannot decrease cascade number, illegal eviction"); |
900 | 571k | ExtraRegInfo[Intf->reg].Cascade = Cascade; |
901 | 571k | ++NumEvicted; |
902 | 571k | NewVRegs.push_back(Intf->reg); |
903 | 571k | } |
904 | 568k | } |
905 | | |
906 | | /// Returns true if the given \p PhysReg is a callee saved register and has not |
907 | | /// been used for allocation yet. |
908 | 7.20M | bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const { |
909 | 7.20M | unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg); |
910 | 7.20M | if (CSR == 0) |
911 | 5.35M | return false; |
912 | 1.84M | |
913 | 1.84M | return !Matrix->isPhysRegUsed(PhysReg); |
914 | 1.84M | } |
915 | | |
916 | | /// tryEvict - Try to evict all interferences for a physreg. |
917 | | /// @param VirtReg Currently unassigned virtual register. |
918 | | /// @param Order Physregs to try. |
919 | | /// @return Physreg to assign VirtReg, or 0. |
920 | | unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, |
921 | | AllocationOrder &Order, |
922 | | SmallVectorImpl<unsigned> &NewVRegs, |
923 | 1.00M | unsigned CostPerUseLimit) { |
924 | 1.00M | NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription, |
925 | 1.00M | TimePassesIsEnabled); |
926 | 1.00M | |
927 | 1.00M | // Keep track of the cheapest interference seen so far. |
928 | 1.00M | EvictionCost BestCost; |
929 | 1.00M | BestCost.setMax(); |
930 | 1.00M | unsigned BestPhys = 0; |
931 | 1.00M | unsigned OrderLimit = Order.getOrder().size(); |
932 | 1.00M | |
933 | 1.00M | // When we are just looking for a reduced cost per use, don't break any |
934 | 1.00M | // hints, and only evict smaller spill weights. |
935 | 1.00M | if (CostPerUseLimit < ~0u1.00M ) { |
936 | 43.6k | BestCost.BrokenHints = 0; |
937 | 43.6k | BestCost.MaxWeight = VirtReg.weight; |
938 | 43.6k | |
939 | 43.6k | // Check of any registers in RC are below CostPerUseLimit. |
940 | 43.6k | const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); |
941 | 43.6k | unsigned MinCost = RegClassInfo.getMinCost(RC); |
942 | 43.6k | if (MinCost >= CostPerUseLimit43.6k ) { |
943 | 1 | DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost |
944 | 1 | << ", no cheaper registers to be found.\n"); |
945 | 1 | return 0; |
946 | 1 | } |
947 | 43.6k | |
948 | 43.6k | // It is normal for register classes to have a long tail of registers with |
949 | 43.6k | // the same cost. We don't need to look at them if they're too expensive. |
950 | 43.6k | if (43.6k TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit43.6k ) { |
951 | 39.6k | OrderLimit = RegClassInfo.getLastCostChange(RC); |
952 | 39.6k | DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); |
953 | 39.6k | } |
954 | 43.6k | } |
955 | 1.00M | |
956 | 1.00M | Order.rewind(); |
957 | 29.1M | while (unsigned PhysReg29.1M = Order.next(OrderLimit)) { |
958 | 28.1M | if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) |
959 | 143k | continue; |
960 | 28.0M | // The first use of a callee-saved register in a function has cost 1. |
961 | 28.0M | // Don't start using a CSR when the CostPerUseLimit is low. |
962 | 28.0M | if (28.0M CostPerUseLimit == 1 && 28.0M isUnusedCalleeSavedReg(PhysReg)303k ) { |
963 | 31.8k | DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " |
964 | 31.8k | << PrintReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI) |
965 | 31.8k | << '\n'); |
966 | 31.8k | continue; |
967 | 31.8k | } |
968 | 27.9M | |
969 | 27.9M | if (27.9M !canEvictInterference(VirtReg, PhysReg, false, BestCost)27.9M ) |
970 | 27.1M | continue; |
971 | 867k | |
972 | 867k | // Best so far. |
973 | 867k | BestPhys = PhysReg; |
974 | 867k | |
975 | 867k | // Stop if the hint can be used. |
976 | 867k | if (Order.isHint()) |
977 | 6.22k | break; |
978 | 28.1M | } |
979 | 1.00M | |
980 | 1.00M | if (!BestPhys) |
981 | 434k | return 0; |
982 | 567k | |
983 | 567k | evictInterference(VirtReg, BestPhys, NewVRegs); |
984 | 567k | return BestPhys; |
985 | 567k | } |
986 | | |
987 | | //===----------------------------------------------------------------------===// |
988 | | // Region Splitting |
989 | | //===----------------------------------------------------------------------===// |
990 | | |
991 | | /// addSplitConstraints - Fill out the SplitConstraints vector based on the |
992 | | /// interference pattern in Physreg and its aliases. Add the constraints to |
993 | | /// SpillPlacement and return the static cost of this split in Cost, assuming |
994 | | /// that all preferences in SplitConstraints are met. |
995 | | /// Return false if there are no bundles with positive bias. |
996 | | bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, |
997 | 9.40M | BlockFrequency &Cost) { |
998 | 9.40M | ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); |
999 | 9.40M | |
1000 | 9.40M | // Reset interference dependent info. |
1001 | 9.40M | SplitConstraints.resize(UseBlocks.size()); |
1002 | 9.40M | BlockFrequency StaticCost = 0; |
1003 | 49.2M | for (unsigned i = 0; i != UseBlocks.size()49.2M ; ++i39.8M ) { |
1004 | 39.8M | const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; |
1005 | 39.8M | SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; |
1006 | 39.8M | |
1007 | 39.8M | BC.Number = BI.MBB->getNumber(); |
1008 | 39.8M | Intf.moveToBlock(BC.Number); |
1009 | 39.8M | BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg28.4M : SpillPlacement::DontCare11.3M ; |
1010 | 39.8M | BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg33.3M : SpillPlacement::DontCare6.53M ; |
1011 | 39.8M | BC.ChangesValue = BI.FirstDef.isValid(); |
1012 | 39.8M | |
1013 | 39.8M | if (!Intf.hasInterference()) |
1014 | 17.4M | continue; |
1015 | 22.3M | |
1016 | 22.3M | // Number of spill code instructions to insert. |
1017 | 22.3M | unsigned Ins = 0; |
1018 | 22.3M | |
1019 | 22.3M | // Interference for the live-in value. |
1020 | 22.3M | if (BI.LiveIn22.3M ) { |
1021 | 16.5M | if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)16.5M ) { |
1022 | 5.08M | BC.Entry = SpillPlacement::MustSpill; |
1023 | 5.08M | ++Ins; |
1024 | 16.5M | } else if (11.4M Intf.first() < BI.FirstInstr11.4M ) { |
1025 | 3.19M | BC.Entry = SpillPlacement::PrefSpill; |
1026 | 3.19M | ++Ins; |
1027 | 11.4M | } else if (8.28M Intf.first() < BI.LastInstr8.28M ) { |
1028 | 754k | ++Ins; |
1029 | 754k | } |
1030 | 16.5M | } |
1031 | 22.3M | |
1032 | 22.3M | // Interference for the live-out value. |
1033 | 22.3M | if (BI.LiveOut22.3M ) { |
1034 | 18.1M | if (Intf.last() >= SA->getLastSplitPoint(BC.Number)18.1M ) { |
1035 | 6.97M | BC.Exit = SpillPlacement::MustSpill; |
1036 | 6.97M | ++Ins; |
1037 | 18.1M | } else if (11.1M Intf.last() > BI.LastInstr11.1M ) { |
1038 | 8.06M | BC.Exit = SpillPlacement::PrefSpill; |
1039 | 8.06M | ++Ins; |
1040 | 11.1M | } else if (3.12M Intf.last() > BI.FirstInstr3.12M ) { |
1041 | 704k | ++Ins; |
1042 | 704k | } |
1043 | 18.1M | } |
1044 | 22.3M | |
1045 | 22.3M | // Accumulate the total frequency of inserted spill code. |
1046 | 47.1M | while (Ins--) |
1047 | 24.7M | StaticCost += SpillPlacer->getBlockFrequency(BC.Number); |
1048 | 39.8M | } |
1049 | 9.40M | Cost = StaticCost; |
1050 | 9.40M | |
1051 | 9.40M | // Add constraints for use-blocks. Note that these are the only constraints |
1052 | 9.40M | // that may add a positive bias, it is downhill from here. |
1053 | 9.40M | SpillPlacer->addConstraints(SplitConstraints); |
1054 | 9.40M | return SpillPlacer->scanActiveBundles(); |
1055 | 9.40M | } |
1056 | | |
1057 | | /// addThroughConstraints - Add constraints and links to SpillPlacer from the |
1058 | | /// live-through blocks in Blocks. |
1059 | | void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, |
1060 | 17.2M | ArrayRef<unsigned> Blocks) { |
1061 | 17.2M | const unsigned GroupSize = 8; |
1062 | 17.2M | SpillPlacement::BlockConstraint BCS[GroupSize]; |
1063 | 17.2M | unsigned TBS[GroupSize]; |
1064 | 17.2M | unsigned B = 0, T = 0; |
1065 | 17.2M | |
1066 | 149M | for (unsigned i = 0; i != Blocks.size()149M ; ++i131M ) { |
1067 | 131M | unsigned Number = Blocks[i]; |
1068 | 131M | Intf.moveToBlock(Number); |
1069 | 131M | |
1070 | 131M | if (!Intf.hasInterference()131M ) { |
1071 | 107M | assert(T < GroupSize && "Array overflow"); |
1072 | 107M | TBS[T] = Number; |
1073 | 107M | if (++T == GroupSize107M ) { |
1074 | 7.19M | SpillPlacer->addLinks(makeArrayRef(TBS, T)); |
1075 | 7.19M | T = 0; |
1076 | 7.19M | } |
1077 | 107M | continue; |
1078 | 107M | } |
1079 | 24.8M | |
1080 | 131M | assert(B < GroupSize && "Array overflow"); |
1081 | 24.8M | BCS[B].Number = Number; |
1082 | 24.8M | |
1083 | 24.8M | // Interference for the live-in value. |
1084 | 24.8M | if (Intf.first() <= Indexes->getMBBStartIdx(Number)) |
1085 | 3.19M | BCS[B].Entry = SpillPlacement::MustSpill; |
1086 | 24.8M | else |
1087 | 21.6M | BCS[B].Entry = SpillPlacement::PrefSpill; |
1088 | 24.8M | |
1089 | 24.8M | // Interference for the live-out value. |
1090 | 24.8M | if (Intf.last() >= SA->getLastSplitPoint(Number)) |
1091 | 4.58M | BCS[B].Exit = SpillPlacement::MustSpill; |
1092 | 24.8M | else |
1093 | 20.2M | BCS[B].Exit = SpillPlacement::PrefSpill; |
1094 | 24.8M | |
1095 | 24.8M | if (++B == GroupSize24.8M ) { |
1096 | 606k | SpillPlacer->addConstraints(makeArrayRef(BCS, B)); |
1097 | 606k | B = 0; |
1098 | 606k | } |
1099 | 131M | } |
1100 | 17.2M | |
1101 | 17.2M | SpillPlacer->addConstraints(makeArrayRef(BCS, B)); |
1102 | 17.2M | SpillPlacer->addLinks(makeArrayRef(TBS, T)); |
1103 | 17.2M | } |
1104 | | |
1105 | 4.38M | void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { |
1106 | 4.38M | // Keep track of through blocks that have not been added to SpillPlacer. |
1107 | 4.38M | BitVector Todo = SA->getThroughBlocks(); |
1108 | 4.38M | SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; |
1109 | 4.38M | unsigned AddedTo = 0; |
1110 | | #ifndef NDEBUG |
1111 | | unsigned Visited = 0; |
1112 | | #endif |
1113 | | |
1114 | 21.8M | while (true21.8M ) { |
1115 | 21.8M | ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); |
1116 | 21.8M | // Find new through blocks in the periphery of PrefRegBundles. |
1117 | 81.8M | for (int i = 0, e = NewBundles.size(); i != e81.8M ; ++i59.9M ) { |
1118 | 59.9M | unsigned Bundle = NewBundles[i]; |
1119 | 59.9M | // Look at all blocks connected to Bundle in the full graph. |
1120 | 59.9M | ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); |
1121 | 59.9M | for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); |
1122 | 308M | I != E308M ; ++I248M ) { |
1123 | 248M | unsigned Block = *I; |
1124 | 248M | if (!Todo.test(Block)) |
1125 | 114M | continue; |
1126 | 134M | Todo.reset(Block); |
1127 | 134M | // This is a new through block. Add it to SpillPlacer later. |
1128 | 134M | ActiveBlocks.push_back(Block); |
1129 | | #ifndef NDEBUG |
1130 | | ++Visited; |
1131 | | #endif |
1132 | | } |
1133 | 59.9M | } |
1134 | 21.8M | // Any new blocks to add? |
1135 | 21.8M | if (ActiveBlocks.size() == AddedTo) |
1136 | 4.38M | break; |
1137 | 17.4M | |
1138 | 17.4M | // Compute through constraints from the interference, or assume that all |
1139 | 17.4M | // through blocks prefer spilling when forming compact regions. |
1140 | 17.4M | auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); |
1141 | 17.4M | if (Cand.PhysReg) |
1142 | 17.2M | addThroughConstraints(Cand.Intf, NewBlocks); |
1143 | 17.4M | else |
1144 | 17.4M | // Provide a strong negative bias on through blocks to prevent unwanted |
1145 | 17.4M | // liveness on loop backedges. |
1146 | 212k | SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); |
1147 | 21.8M | AddedTo = ActiveBlocks.size(); |
1148 | 21.8M | |
1149 | 21.8M | // Perhaps iterating can enable more bundles? |
1150 | 21.8M | SpillPlacer->iterate(); |
1151 | 21.8M | } |
1152 | 4.38M | DEBUG(dbgs() << ", v=" << Visited); |
1153 | 4.38M | } |
1154 | | |
1155 | | /// calcCompactRegion - Compute the set of edge bundles that should be live |
1156 | | /// when splitting the current live range into compact regions. Compact |
1157 | | /// regions can be computed without looking at interference. They are the |
1158 | | /// regions formed by removing all the live-through blocks from the live range. |
1159 | | /// |
1160 | | /// Returns false if the current live range is already compact, or if the |
1161 | | /// compact regions would form single block regions anyway. |
1162 | 225k | bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { |
1163 | 225k | // Without any through blocks, the live range is already compact. |
1164 | 225k | if (!SA->getNumThroughBlocks()) |
1165 | 11.2k | return false; |
1166 | 213k | |
1167 | 213k | // Compact regions don't correspond to any physreg. |
1168 | 213k | Cand.reset(IntfCache, 0); |
1169 | 213k | |
1170 | 213k | DEBUG(dbgs() << "Compact region bundles"); |
1171 | 213k | |
1172 | 213k | // Use the spill placer to determine the live bundles. GrowRegion pretends |
1173 | 213k | // that all the through blocks have interference when PhysReg is unset. |
1174 | 213k | SpillPlacer->prepare(Cand.LiveBundles); |
1175 | 213k | |
1176 | 213k | // The static split cost will be zero since Cand.Intf reports no interference. |
1177 | 213k | BlockFrequency Cost; |
1178 | 213k | if (!addSplitConstraints(Cand.Intf, Cost)213k ) { |
1179 | 768 | DEBUG(dbgs() << ", none.\n"); |
1180 | 768 | return false; |
1181 | 768 | } |
1182 | 213k | |
1183 | 213k | growRegion(Cand); |
1184 | 213k | SpillPlacer->finish(); |
1185 | 213k | |
1186 | 213k | if (!Cand.LiveBundles.any()213k ) { |
1187 | 158k | DEBUG(dbgs() << ", none.\n"); |
1188 | 158k | return false; |
1189 | 158k | } |
1190 | 54.6k | |
1191 | 54.6k | DEBUG54.6k ({ |
1192 | 54.6k | for (int i : Cand.LiveBundles.set_bits()) |
1193 | 54.6k | dbgs() << " EB#" << i; |
1194 | 54.6k | dbgs() << ".\n"; |
1195 | 54.6k | }); |
1196 | 54.6k | return true; |
1197 | 54.6k | } |
1198 | | |
1199 | | /// calcSpillCost - Compute how expensive it would be to split the live range in |
1200 | | /// SA around all use blocks instead of forming bundle regions. |
1201 | 170k | BlockFrequency RAGreedy::calcSpillCost() { |
1202 | 170k | BlockFrequency Cost = 0; |
1203 | 170k | ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); |
1204 | 615k | for (unsigned i = 0; i != UseBlocks.size()615k ; ++i445k ) { |
1205 | 445k | const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; |
1206 | 445k | unsigned Number = BI.MBB->getNumber(); |
1207 | 445k | // We normally only need one spill instruction - a load or a store. |
1208 | 445k | Cost += SpillPlacer->getBlockFrequency(Number); |
1209 | 445k | |
1210 | 445k | // Unless the value is redefined in the block. |
1211 | 445k | if (BI.LiveIn && 445k BI.LiveOut264k && BI.FirstDef213k ) |
1212 | 2.55k | Cost += SpillPlacer->getBlockFrequency(Number); |
1213 | 445k | } |
1214 | 170k | return Cost; |
1215 | 170k | } |
1216 | | |
1217 | | /// calcGlobalSplitCost - Return the global split cost of following the split |
1218 | | /// pattern in LiveBundles. This cost should be added to the local cost of the |
1219 | | /// interference pattern in SplitConstraints. |
1220 | | /// |
1221 | 3.62M | BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { |
1222 | 3.62M | BlockFrequency GlobalCost = 0; |
1223 | 3.62M | const BitVector &LiveBundles = Cand.LiveBundles; |
1224 | 3.62M | ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); |
1225 | 19.2M | for (unsigned i = 0; i != UseBlocks.size()19.2M ; ++i15.6M ) { |
1226 | 15.6M | const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; |
1227 | 15.6M | SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; |
1228 | 15.6M | bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)]; |
1229 | 15.6M | bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)]; |
1230 | 15.6M | unsigned Ins = 0; |
1231 | 15.6M | |
1232 | 15.6M | if (BI.LiveIn) |
1233 | 11.5M | Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); |
1234 | 15.6M | if (BI.LiveOut) |
1235 | 13.1M | Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); |
1236 | 20.8M | while (Ins--) |
1237 | 5.18M | GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); |
1238 | 15.6M | } |
1239 | 3.62M | |
1240 | 129M | for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e129M ; ++i126M ) { |
1241 | 126M | unsigned Number = Cand.ActiveBlocks[i]; |
1242 | 126M | bool RegIn = LiveBundles[Bundles->getBundle(Number, false)]; |
1243 | 126M | bool RegOut = LiveBundles[Bundles->getBundle(Number, true)]; |
1244 | 126M | if (!RegIn && 126M !RegOut40.8M ) |
1245 | 27.3M | continue; |
1246 | 98.8M | if (98.8M RegIn && 98.8M RegOut85.4M ) { |
1247 | 76.3M | // We need double spill code if this block has interference. |
1248 | 76.3M | Cand.Intf.moveToBlock(Number); |
1249 | 76.3M | if (Cand.Intf.hasInterference()76.3M ) { |
1250 | 3.89M | GlobalCost += SpillPlacer->getBlockFrequency(Number); |
1251 | 3.89M | GlobalCost += SpillPlacer->getBlockFrequency(Number); |
1252 | 3.89M | } |
1253 | 76.3M | continue; |
1254 | 76.3M | } |
1255 | 22.5M | // live-in / stack-out or stack-in live-out. |
1256 | 22.5M | GlobalCost += SpillPlacer->getBlockFrequency(Number); |
1257 | 22.5M | } |
1258 | 3.62M | return GlobalCost; |
1259 | 3.62M | } |
1260 | | |
1261 | | /// splitAroundRegion - Split the current live range around the regions |
1262 | | /// determined by BundleCand and GlobalCand. |
1263 | | /// |
1264 | | /// Before calling this function, GlobalCand and BundleCand must be initialized |
1265 | | /// so each bundle is assigned to a valid candidate, or NoCand for the |
1266 | | /// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor |
1267 | | /// objects must be initialized for the current live range, and intervals |
1268 | | /// created for the used candidates. |
1269 | | /// |
1270 | | /// @param LREdit The LiveRangeEdit object handling the current split. |
1271 | | /// @param UsedCands List of used GlobalCand entries. Every BundleCand value |
1272 | | /// must appear in this list. |
1273 | | void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, |
1274 | 80.5k | ArrayRef<unsigned> UsedCands) { |
1275 | 80.5k | // These are the intervals created for new global ranges. We may create more |
1276 | 80.5k | // intervals for local ranges. |
1277 | 80.5k | const unsigned NumGlobalIntvs = LREdit.size(); |
1278 | 80.5k | DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n"); |
1279 | 80.5k | assert(NumGlobalIntvs && "No global intervals configured"); |
1280 | 80.5k | |
1281 | 80.5k | // Isolate even single instructions when dealing with a proper sub-class. |
1282 | 80.5k | // That guarantees register class inflation for the stack interval because it |
1283 | 80.5k | // is all copies. |
1284 | 80.5k | unsigned Reg = SA->getParent().reg; |
1285 | 80.5k | bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); |
1286 | 80.5k | |
1287 | 80.5k | // First handle all the blocks with uses. |
1288 | 80.5k | ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); |
1289 | 560k | for (unsigned i = 0; i != UseBlocks.size()560k ; ++i480k ) { |
1290 | 480k | const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; |
1291 | 480k | unsigned Number = BI.MBB->getNumber(); |
1292 | 480k | unsigned IntvIn = 0, IntvOut = 0; |
1293 | 480k | SlotIndex IntfIn, IntfOut; |
1294 | 480k | if (BI.LiveIn480k ) { |
1295 | 383k | unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)]; |
1296 | 383k | if (CandIn != NoCand383k ) { |
1297 | 315k | GlobalSplitCandidate &Cand = GlobalCand[CandIn]; |
1298 | 315k | IntvIn = Cand.IntvIdx; |
1299 | 315k | Cand.Intf.moveToBlock(Number); |
1300 | 315k | IntfIn = Cand.Intf.first(); |
1301 | 315k | } |
1302 | 383k | } |
1303 | 480k | if (BI.LiveOut480k ) { |
1304 | 391k | unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)]; |
1305 | 391k | if (CandOut != NoCand391k ) { |
1306 | 332k | GlobalSplitCandidate &Cand = GlobalCand[CandOut]; |
1307 | 332k | IntvOut = Cand.IntvIdx; |
1308 | 332k | Cand.Intf.moveToBlock(Number); |
1309 | 332k | IntfOut = Cand.Intf.last(); |
1310 | 332k | } |
1311 | 391k | } |
1312 | 480k | |
1313 | 480k | // Create separate intervals for isolated blocks with multiple uses. |
1314 | 480k | if (!IntvIn && 480k !IntvOut165k ) { |
1315 | 58.1k | DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); |
1316 | 58.1k | if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) |
1317 | 14.4k | SE->splitSingleBlock(BI); |
1318 | 58.1k | continue; |
1319 | 58.1k | } |
1320 | 422k | |
1321 | 422k | if (422k IntvIn && 422k IntvOut315k ) |
1322 | 225k | SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); |
1323 | 196k | else if (196k IntvIn196k ) |
1324 | 89.6k | SE->splitRegInBlock(BI, IntvIn, IntfIn); |
1325 | 196k | else |
1326 | 106k | SE->splitRegOutBlock(BI, IntvOut, IntfOut); |
1327 | 480k | } |
1328 | 80.5k | |
1329 | 80.5k | // Handle live-through blocks. The relevant live-through blocks are stored in |
1330 | 80.5k | // the ActiveBlocks list with each candidate. We need to filter out |
1331 | 80.5k | // duplicates. |
1332 | 80.5k | BitVector Todo = SA->getThroughBlocks(); |
1333 | 166k | for (unsigned c = 0; c != UsedCands.size()166k ; ++c86.4k ) { |
1334 | 86.4k | ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks; |
1335 | 2.68M | for (unsigned i = 0, e = Blocks.size(); i != e2.68M ; ++i2.60M ) { |
1336 | 2.60M | unsigned Number = Blocks[i]; |
1337 | 2.60M | if (!Todo.test(Number)) |
1338 | 112k | continue; |
1339 | 2.48M | Todo.reset(Number); |
1340 | 2.48M | |
1341 | 2.48M | unsigned IntvIn = 0, IntvOut = 0; |
1342 | 2.48M | SlotIndex IntfIn, IntfOut; |
1343 | 2.48M | |
1344 | 2.48M | unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)]; |
1345 | 2.48M | if (CandIn != NoCand2.48M ) { |
1346 | 1.89M | GlobalSplitCandidate &Cand = GlobalCand[CandIn]; |
1347 | 1.89M | IntvIn = Cand.IntvIdx; |
1348 | 1.89M | Cand.Intf.moveToBlock(Number); |
1349 | 1.89M | IntfIn = Cand.Intf.first(); |
1350 | 1.89M | } |
1351 | 2.48M | |
1352 | 2.48M | unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)]; |
1353 | 2.48M | if (CandOut != NoCand2.48M ) { |
1354 | 1.88M | GlobalSplitCandidate &Cand = GlobalCand[CandOut]; |
1355 | 1.88M | IntvOut = Cand.IntvIdx; |
1356 | 1.88M | Cand.Intf.moveToBlock(Number); |
1357 | 1.88M | IntfOut = Cand.Intf.last(); |
1358 | 1.88M | } |
1359 | 2.48M | if (!IntvIn && 2.48M !IntvOut594k ) |
1360 | 494k | continue; |
1361 | 1.99M | SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); |
1362 | 1.99M | } |
1363 | 86.4k | } |
1364 | 80.5k | |
1365 | 80.5k | ++NumGlobalSplits; |
1366 | 80.5k | |
1367 | 80.5k | SmallVector<unsigned, 8> IntvMap; |
1368 | 80.5k | SE->finish(&IntvMap); |
1369 | 80.5k | DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); |
1370 | 80.5k | |
1371 | 80.5k | ExtraRegInfo.resize(MRI->getNumVirtRegs()); |
1372 | 80.5k | unsigned OrigBlocks = SA->getNumLiveBlocks(); |
1373 | 80.5k | |
1374 | 80.5k | // Sort out the new intervals created by splitting. We get four kinds: |
1375 | 80.5k | // - Remainder intervals should not be split again. |
1376 | 80.5k | // - Candidate intervals can be assigned to Cand.PhysReg. |
1377 | 80.5k | // - Block-local splits are candidates for local splitting. |
1378 | 80.5k | // - DCE leftovers should go back on the queue. |
1379 | 312k | for (unsigned i = 0, e = LREdit.size(); i != e312k ; ++i232k ) { |
1380 | 232k | LiveInterval &Reg = LIS->getInterval(LREdit.get(i)); |
1381 | 232k | |
1382 | 232k | // Ignore old intervals from DCE. |
1383 | 232k | if (getStage(Reg) != RS_New) |
1384 | 0 | continue; |
1385 | 232k | |
1386 | 232k | // Remainder interval. Don't try splitting again, spill if it doesn't |
1387 | 232k | // allocate. |
1388 | 232k | if (232k IntvMap[i] == 0232k ) { |
1389 | 99.0k | setStage(Reg, RS_Spill); |
1390 | 99.0k | continue; |
1391 | 99.0k | } |
1392 | 133k | |
1393 | 133k | // Global intervals. Allow repeated splitting as long as the number of live |
1394 | 133k | // blocks is strictly decreasing. |
1395 | 133k | if (133k IntvMap[i] < NumGlobalIntvs133k ) { |
1396 | 105k | if (SA->countLiveBlocks(&Reg) >= OrigBlocks105k ) { |
1397 | 10.2k | DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks |
1398 | 10.2k | << " blocks as original.\n"); |
1399 | 10.2k | // Don't allow repeated splitting as a safe guard against looping. |
1400 | 10.2k | setStage(Reg, RS_Split2); |
1401 | 10.2k | } |
1402 | 105k | continue; |
1403 | 105k | } |
1404 | 232k | |
1405 | 232k | // Other intervals are treated as new. This includes local intervals created |
1406 | 232k | // for blocks with multiple uses, and anything created by DCE. |
1407 | 232k | } |
1408 | 80.5k | |
1409 | 80.5k | if (VerifyEnabled) |
1410 | 7 | MF->verify(this, "After splitting live range around region"); |
1411 | 80.5k | } |
1412 | | |
1413 | | unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, |
1414 | 225k | SmallVectorImpl<unsigned> &NewVRegs) { |
1415 | 225k | unsigned NumCands = 0; |
1416 | 225k | BlockFrequency BestCost; |
1417 | 225k | |
1418 | 225k | // Check if we can split this live range around a compact region. |
1419 | 225k | bool HasCompact = calcCompactRegion(GlobalCand.front()); |
1420 | 225k | if (HasCompact225k ) { |
1421 | 54.6k | // Yes, keep GlobalCand[0] as the compact region candidate. |
1422 | 54.6k | NumCands = 1; |
1423 | 54.6k | BestCost = BlockFrequency::getMaxFrequency(); |
1424 | 225k | } else { |
1425 | 170k | // No benefit from the compact region, our fallback will be per-block |
1426 | 170k | // splitting. Make sure we find a solution that is cheaper than spilling. |
1427 | 170k | BestCost = calcSpillCost(); |
1428 | 170k | DEBUG(dbgs() << "Cost of isolating all blocks = "; |
1429 | 170k | MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); |
1430 | 170k | } |
1431 | 225k | |
1432 | 225k | unsigned BestCand = |
1433 | 225k | calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, |
1434 | 225k | false/*IgnoreCSR*/); |
1435 | 225k | |
1436 | 225k | // No solutions found, fall back to single block splitting. |
1437 | 225k | if (!HasCompact && 225k BestCand == NoCand170k ) |
1438 | 145k | return 0; |
1439 | 79.8k | |
1440 | 79.8k | return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); |
1441 | 79.8k | } |
1442 | | |
1443 | | unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, |
1444 | | AllocationOrder &Order, |
1445 | | BlockFrequency &BestCost, |
1446 | | unsigned &NumCands, |
1447 | 354k | bool IgnoreCSR) { |
1448 | 354k | unsigned BestCand = NoCand; |
1449 | 354k | Order.rewind(); |
1450 | 10.4M | while (unsigned PhysReg10.4M = Order.next()) { |
1451 | 10.0M | if (IgnoreCSR && 10.0M isUnusedCalleeSavedReg(PhysReg)3.75M ) |
1452 | 906k | continue; |
1453 | 9.19M | |
1454 | 9.19M | // Discard bad candidates before we run out of interference cache cursors. |
1455 | 9.19M | // This will only affect register classes with a lot of registers (>32). |
1456 | 9.19M | if (9.19M NumCands == IntfCache.getMaxCursors()9.19M ) { |
1457 | 3.11k | unsigned WorstCount = ~0u; |
1458 | 3.11k | unsigned Worst = 0; |
1459 | 102k | for (unsigned i = 0; i != NumCands102k ; ++i99.7k ) { |
1460 | 99.7k | if (i == BestCand || 99.7k !GlobalCand[i].PhysReg99.2k ) |
1461 | 878 | continue; |
1462 | 98.8k | unsigned Count = GlobalCand[i].LiveBundles.count(); |
1463 | 98.8k | if (Count < WorstCount98.8k ) { |
1464 | 3.20k | Worst = i; |
1465 | 3.20k | WorstCount = Count; |
1466 | 3.20k | } |
1467 | 99.7k | } |
1468 | 3.11k | --NumCands; |
1469 | 3.11k | GlobalCand[Worst] = GlobalCand[NumCands]; |
1470 | 3.11k | if (BestCand == NumCands) |
1471 | 0 | BestCand = Worst; |
1472 | 3.11k | } |
1473 | 9.19M | |
1474 | 9.19M | if (GlobalCand.size() <= NumCands) |
1475 | 0 | GlobalCand.resize(NumCands+1); |
1476 | 9.19M | GlobalSplitCandidate &Cand = GlobalCand[NumCands]; |
1477 | 9.19M | Cand.reset(IntfCache, PhysReg); |
1478 | 9.19M | |
1479 | 9.19M | SpillPlacer->prepare(Cand.LiveBundles); |
1480 | 9.19M | BlockFrequency Cost; |
1481 | 9.19M | if (!addSplitConstraints(Cand.Intf, Cost)9.19M ) { |
1482 | 2.13M | DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); |
1483 | 2.13M | continue; |
1484 | 2.13M | } |
1485 | 7.05M | DEBUG7.05M (dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = "; |
1486 | 7.05M | MBFI->printBlockFreq(dbgs(), Cost)); |
1487 | 7.05M | if (Cost >= BestCost7.05M ) { |
1488 | 2.88M | DEBUG({ |
1489 | 2.88M | if (BestCand == NoCand) |
1490 | 2.88M | dbgs() << " worse than no bundles\n"; |
1491 | 2.88M | else |
1492 | 2.88M | dbgs() << " worse than " |
1493 | 2.88M | << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; |
1494 | 2.88M | }); |
1495 | 2.88M | continue; |
1496 | 2.88M | } |
1497 | 4.17M | growRegion(Cand); |
1498 | 4.17M | |
1499 | 4.17M | SpillPlacer->finish(); |
1500 | 4.17M | |
1501 | 4.17M | // No live bundles, defer to splitSingleBlocks(). |
1502 | 4.17M | if (!Cand.LiveBundles.any()4.17M ) { |
1503 | 550k | DEBUG(dbgs() << " no bundles.\n"); |
1504 | 550k | continue; |
1505 | 550k | } |
1506 | 3.62M | |
1507 | 3.62M | Cost += calcGlobalSplitCost(Cand); |
1508 | 3.62M | DEBUG({ |
1509 | 3.62M | dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) |
1510 | 3.62M | << " with bundles"; |
1511 | 3.62M | for (int i : Cand.LiveBundles.set_bits()) |
1512 | 3.62M | dbgs() << " EB#" << i; |
1513 | 3.62M | dbgs() << ".\n"; |
1514 | 3.62M | }); |
1515 | 3.62M | if (Cost < BestCost3.62M ) { |
1516 | 223k | BestCand = NumCands; |
1517 | 223k | BestCost = Cost; |
1518 | 223k | } |
1519 | 10.0M | ++NumCands; |
1520 | 10.0M | } |
1521 | 354k | return BestCand; |
1522 | 354k | } |
1523 | | |
1524 | | unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, |
1525 | | bool HasCompact, |
1526 | 80.5k | SmallVectorImpl<unsigned> &NewVRegs) { |
1527 | 80.5k | SmallVector<unsigned, 8> UsedCands; |
1528 | 80.5k | // Prepare split editor. |
1529 | 80.5k | LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); |
1530 | 80.5k | SE->reset(LREdit, SplitSpillMode); |
1531 | 80.5k | |
1532 | 80.5k | // Assign all edge bundles to the preferred candidate, or NoCand. |
1533 | 80.5k | BundleCand.assign(Bundles->getNumBundles(), NoCand); |
1534 | 80.5k | |
1535 | 80.5k | // Assign bundles for the best candidate region. |
1536 | 80.5k | if (BestCand != NoCand80.5k ) { |
1537 | 79.7k | GlobalSplitCandidate &Cand = GlobalCand[BestCand]; |
1538 | 79.7k | if (unsigned B79.7k = Cand.getBundles(BundleCand, BestCand)) { |
1539 | 79.7k | UsedCands.push_back(BestCand); |
1540 | 79.7k | Cand.IntvIdx = SE->openIntv(); |
1541 | 79.7k | DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in " |
1542 | 79.7k | << B << " bundles, intv " << Cand.IntvIdx << ".\n"); |
1543 | 79.7k | (void)B; |
1544 | 79.7k | } |
1545 | 79.7k | } |
1546 | 80.5k | |
1547 | 80.5k | // Assign bundles for the compact region. |
1548 | 80.5k | if (HasCompact80.5k ) { |
1549 | 54.6k | GlobalSplitCandidate &Cand = GlobalCand.front(); |
1550 | 54.6k | assert(!Cand.PhysReg && "Compact region has no physreg"); |
1551 | 54.6k | if (unsigned B54.6k = Cand.getBundles(BundleCand, 0)) { |
1552 | 6.62k | UsedCands.push_back(0); |
1553 | 6.62k | Cand.IntvIdx = SE->openIntv(); |
1554 | 6.62k | DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv " |
1555 | 6.62k | << Cand.IntvIdx << ".\n"); |
1556 | 6.62k | (void)B; |
1557 | 6.62k | } |
1558 | 54.6k | } |
1559 | 80.5k | |
1560 | 80.5k | splitAroundRegion(LREdit, UsedCands); |
1561 | 80.5k | return 0; |
1562 | 80.5k | } |
1563 | | |
1564 | | //===----------------------------------------------------------------------===// |
1565 | | // Per-Block Splitting |
1566 | | //===----------------------------------------------------------------------===// |
1567 | | |
1568 | | /// tryBlockSplit - Split a global live range around every block with uses. This |
1569 | | /// creates a lot of local live ranges, that will be split by tryLocalSplit if |
1570 | | /// they don't allocate. |
1571 | | unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, |
1572 | 145k | SmallVectorImpl<unsigned> &NewVRegs) { |
1573 | 145k | assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); |
1574 | 145k | unsigned Reg = VirtReg.reg; |
1575 | 145k | bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); |
1576 | 145k | LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); |
1577 | 145k | SE->reset(LREdit, SplitSpillMode); |
1578 | 145k | ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); |
1579 | 496k | for (unsigned i = 0; i != UseBlocks.size()496k ; ++i350k ) { |
1580 | 350k | const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; |
1581 | 350k | if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) |
1582 | 32.2k | SE->splitSingleBlock(BI); |
1583 | 350k | } |
1584 | 145k | // No blocks were split. |
1585 | 145k | if (LREdit.empty()) |
1586 | 120k | return 0; |
1587 | 25.1k | |
1588 | 25.1k | // We did split for some blocks. |
1589 | 25.1k | SmallVector<unsigned, 8> IntvMap; |
1590 | 25.1k | SE->finish(&IntvMap); |
1591 | 25.1k | |
1592 | 25.1k | // Tell LiveDebugVariables about the new ranges. |
1593 | 25.1k | DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); |
1594 | 25.1k | |
1595 | 25.1k | ExtraRegInfo.resize(MRI->getNumVirtRegs()); |
1596 | 25.1k | |
1597 | 25.1k | // Sort out the new intervals created by splitting. The remainder interval |
1598 | 25.1k | // goes straight to spilling, the new local ranges get to stay RS_New. |
1599 | 82.5k | for (unsigned i = 0, e = LREdit.size(); i != e82.5k ; ++i57.3k ) { |
1600 | 57.3k | LiveInterval &LI = LIS->getInterval(LREdit.get(i)); |
1601 | 57.3k | if (getStage(LI) == RS_New && 57.3k IntvMap[i] == 057.3k ) |
1602 | 25.1k | setStage(LI, RS_Spill); |
1603 | 57.3k | } |
1604 | 25.1k | |
1605 | 25.1k | if (VerifyEnabled) |
1606 | 17 | MF->verify(this, "After splitting live range around basic blocks"); |
1607 | 145k | return 0; |
1608 | 145k | } |
1609 | | |
1610 | | //===----------------------------------------------------------------------===// |
1611 | | // Per-Instruction Splitting |
1612 | | //===----------------------------------------------------------------------===// |
1613 | | |
1614 | | /// Get the number of allocatable registers that match the constraints of \p Reg |
1615 | | /// on \p MI and that are also in \p SuperRC. |
1616 | | static unsigned getNumAllocatableRegsForConstraints( |
1617 | | const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC, |
1618 | | const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, |
1619 | 62 | const RegisterClassInfo &RCI) { |
1620 | 62 | assert(SuperRC && "Invalid register class"); |
1621 | 62 | |
1622 | 62 | const TargetRegisterClass *ConstrainedRC = |
1623 | 62 | MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI, |
1624 | 62 | /* ExploreBundle */ true); |
1625 | 62 | if (!ConstrainedRC) |
1626 | 0 | return 0; |
1627 | 62 | return RCI.getNumAllocatableRegs(ConstrainedRC); |
1628 | 62 | } |
1629 | | |
1630 | | /// tryInstructionSplit - Split a live range around individual instructions. |
1631 | | /// This is normally not worthwhile since the spiller is doing essentially the |
1632 | | /// same thing. However, when the live range is in a constrained register |
1633 | | /// class, it may help to insert copies such that parts of the live range can |
1634 | | /// be moved to a larger register class. |
1635 | | /// |
1636 | | /// This is similar to spilling to a larger register class. |
1637 | | unsigned |
1638 | | RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, |
1639 | 65.9k | SmallVectorImpl<unsigned> &NewVRegs) { |
1640 | 65.9k | const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); |
1641 | 65.9k | // There is no point to this if there are no larger sub-classes. |
1642 | 65.9k | if (!RegClassInfo.isProperSubClass(CurRC)) |
1643 | 65.8k | return 0; |
1644 | 40 | |
1645 | 40 | // Always enable split spill mode, since we're effectively spilling to a |
1646 | 40 | // register. |
1647 | 40 | LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); |
1648 | 40 | SE->reset(LREdit, SplitEditor::SM_Size); |
1649 | 40 | |
1650 | 40 | ArrayRef<SlotIndex> Uses = SA->getUseSlots(); |
1651 | 40 | if (Uses.size() <= 1) |
1652 | 0 | return 0; |
1653 | 40 | |
1654 | 40 | DEBUG40 (dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); |
1655 | 40 | |
1656 | 40 | const TargetRegisterClass *SuperRC = |
1657 | 40 | TRI->getLargestLegalSuperClass(CurRC, *MF); |
1658 | 40 | unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); |
1659 | 40 | // Split around every non-copy instruction if this split will relax |
1660 | 40 | // the constraints on the virtual register. |
1661 | 40 | // Otherwise, splitting just inserts uncoalescable copies that do not help |
1662 | 40 | // the allocation. |
1663 | 140 | for (unsigned i = 0; i != Uses.size()140 ; ++i100 ) { |
1664 | 100 | if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) |
1665 | 100 | if (100 MI->isFullCopy() || |
1666 | 62 | SuperRCNumAllocatableRegs == |
1667 | 62 | getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII, |
1668 | 100 | TRI, RCI)) { |
1669 | 54 | DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); |
1670 | 100 | continue; |
1671 | 100 | } |
1672 | 46 | SE->openIntv(); |
1673 | 46 | SlotIndex SegStart = SE->enterIntvBefore(Uses[i]); |
1674 | 46 | SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]); |
1675 | 46 | SE->useIntv(SegStart, SegStop); |
1676 | 46 | } |
1677 | 40 | |
1678 | 40 | if (LREdit.empty()40 ) { |
1679 | 0 | DEBUG(dbgs() << "All uses were copies.\n"); |
1680 | 0 | return 0; |
1681 | 0 | } |
1682 | 40 | |
1683 | 40 | SmallVector<unsigned, 8> IntvMap; |
1684 | 40 | SE->finish(&IntvMap); |
1685 | 40 | DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); |
1686 | 40 | ExtraRegInfo.resize(MRI->getNumVirtRegs()); |
1687 | 40 | |
1688 | 40 | // Assign all new registers to RS_Spill. This was the last chance. |
1689 | 40 | setStage(LREdit.begin(), LREdit.end(), RS_Spill); |
1690 | 40 | return 0; |
1691 | 40 | } |
1692 | | |
1693 | | //===----------------------------------------------------------------------===// |
1694 | | // Local Splitting |
1695 | | //===----------------------------------------------------------------------===// |
1696 | | |
1697 | | /// calcGapWeights - Compute the maximum spill weight that needs to be evicted |
1698 | | /// in order to use PhysReg between two entries in SA->UseSlots. |
1699 | | /// |
1700 | | /// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1]. |
1701 | | /// |
1702 | | void RAGreedy::calcGapWeights(unsigned PhysReg, |
1703 | 561k | SmallVectorImpl<float> &GapWeight) { |
1704 | 561k | assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); |
1705 | 561k | const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); |
1706 | 561k | ArrayRef<SlotIndex> Uses = SA->getUseSlots(); |
1707 | 561k | const unsigned NumGaps = Uses.size()-1; |
1708 | 561k | |
1709 | 561k | // Start and end points for the interference check. |
1710 | 561k | SlotIndex StartIdx = |
1711 | 561k | BI.LiveIn ? BI.FirstInstr.getBaseIndex()0 : BI.FirstInstr561k ; |
1712 | 561k | SlotIndex StopIdx = |
1713 | 561k | BI.LiveOut ? BI.LastInstr.getBoundaryIndex()0 : BI.LastInstr561k ; |
1714 | 561k | |
1715 | 561k | GapWeight.assign(NumGaps, 0.0f); |
1716 | 561k | |
1717 | 561k | // Add interference from each overlapping register. |
1718 | 1.15M | for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid()1.15M ; ++Units591k ) { |
1719 | 591k | if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units) |
1720 | 591k | .checkInterference()) |
1721 | 118k | continue; |
1722 | 472k | |
1723 | 472k | // We know that VirtReg is a continuous interval from FirstInstr to |
1724 | 472k | // LastInstr, so we don't need InterferenceQuery. |
1725 | 472k | // |
1726 | 472k | // Interference that overlaps an instruction is counted in both gaps |
1727 | 472k | // surrounding the instruction. The exception is interference before |
1728 | 472k | // StartIdx and after StopIdx. |
1729 | 472k | // |
1730 | 472k | LiveIntervalUnion::SegmentIter IntI = |
1731 | 472k | Matrix->getLiveUnions()[*Units] .find(StartIdx); |
1732 | 2.83M | for (unsigned Gap = 0; IntI.valid() && 2.83M IntI.start() < StopIdx2.77M ; ++IntI2.36M ) { |
1733 | 2.67M | // Skip the gaps before IntI. |
1734 | 3.17M | while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) |
1735 | 498k | if (498k ++Gap == NumGaps498k ) |
1736 | 0 | break; |
1737 | 2.67M | if (Gap == NumGaps) |
1738 | 0 | break; |
1739 | 2.67M | |
1740 | 2.67M | // Update the gaps covered by IntI. |
1741 | 2.67M | const float weight = IntI.value()->weight; |
1742 | 3.39M | for (; Gap != NumGaps3.39M ; ++Gap722k ) { |
1743 | 3.08M | GapWeight[Gap] = std::max(GapWeight[Gap], weight); |
1744 | 3.08M | if (Uses[Gap+1].getBaseIndex() >= IntI.stop()) |
1745 | 2.36M | break; |
1746 | 3.08M | } |
1747 | 2.67M | if (Gap == NumGaps) |
1748 | 312k | break; |
1749 | 2.67M | } |
1750 | 591k | } |
1751 | 561k | |
1752 | 561k | // Add fixed interference. |
1753 | 1.15M | for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid()1.15M ; ++Units591k ) { |
1754 | 591k | const LiveRange &LR = LIS->getRegUnit(*Units); |
1755 | 591k | LiveRange::const_iterator I = LR.find(StartIdx); |
1756 | 591k | LiveRange::const_iterator E = LR.end(); |
1757 | 591k | |
1758 | 591k | // Same loop as above. Mark any overlapped gaps as HUGE_VALF. |
1759 | 1.26M | for (unsigned Gap = 0; I != E && 1.26M I->start < StopIdx721k ; ++I673k ) { |
1760 | 838k | while (Uses[Gap+1].getBoundaryIndex() < I->start) |
1761 | 155k | if (155k ++Gap == NumGaps155k ) |
1762 | 0 | break; |
1763 | 682k | if (Gap == NumGaps) |
1764 | 0 | break; |
1765 | 682k | |
1766 | 731k | for (; 682k Gap != NumGaps731k ; ++Gap49.4k ) { |
1767 | 722k | GapWeight[Gap] = huge_valf; |
1768 | 722k | if (Uses[Gap+1].getBaseIndex() >= I->end) |
1769 | 673k | break; |
1770 | 722k | } |
1771 | 682k | if (Gap == NumGaps) |
1772 | 9.28k | break; |
1773 | 682k | } |
1774 | 591k | } |
1775 | 561k | } |
1776 | | |
1777 | | /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only |
1778 | | /// basic block. |
1779 | | /// |
1780 | | unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, |
1781 | 88.4k | SmallVectorImpl<unsigned> &NewVRegs) { |
1782 | 88.4k | assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); |
1783 | 88.4k | const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); |
1784 | 88.4k | |
1785 | 88.4k | // Note that it is possible to have an interval that is live-in or live-out |
1786 | 88.4k | // while only covering a single block - A phi-def can use undef values from |
1787 | 88.4k | // predecessors, and the block could be a single-block loop. |
1788 | 88.4k | // We don't bother doing anything clever about such a case, we simply assume |
1789 | 88.4k | // that the interval is continuous from FirstInstr to LastInstr. We should |
1790 | 88.4k | // make sure that we don't do anything illegal to such an interval, though. |
1791 | 88.4k | |
1792 | 88.4k | ArrayRef<SlotIndex> Uses = SA->getUseSlots(); |
1793 | 88.4k | if (Uses.size() <= 2) |
1794 | 64.4k | return 0; |
1795 | 24.0k | const unsigned NumGaps = Uses.size()-1; |
1796 | 24.0k | |
1797 | 24.0k | DEBUG({ |
1798 | 24.0k | dbgs() << "tryLocalSplit: "; |
1799 | 24.0k | for (unsigned i = 0, e = Uses.size(); i != e; ++i) |
1800 | 24.0k | dbgs() << ' ' << Uses[i]; |
1801 | 24.0k | dbgs() << '\n'; |
1802 | 24.0k | }); |
1803 | 24.0k | |
1804 | 24.0k | // If VirtReg is live across any register mask operands, compute a list of |
1805 | 24.0k | // gaps with register masks. |
1806 | 24.0k | SmallVector<unsigned, 8> RegMaskGaps; |
1807 | 24.0k | if (Matrix->checkRegMaskInterference(VirtReg)24.0k ) { |
1808 | 10.4k | // Get regmask slots for the whole block. |
1809 | 10.4k | ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); |
1810 | 10.4k | DEBUG(dbgs() << RMS.size() << " regmasks in block:"); |
1811 | 10.4k | // Constrain to VirtReg's live range. |
1812 | 10.4k | unsigned ri = std::lower_bound(RMS.begin(), RMS.end(), |
1813 | 10.4k | Uses.front().getRegSlot()) - RMS.begin(); |
1814 | 10.4k | unsigned re = RMS.size(); |
1815 | 66.4k | for (unsigned i = 0; i != NumGaps && 66.4k ri != re56.3k ; ++i56.0k ) { |
1816 | 56.0k | // Look for Uses[i] <= RMS <= Uses[i+1]. |
1817 | 56.0k | assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i])); |
1818 | 56.0k | if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri])) |
1819 | 6.02k | continue; |
1820 | 49.9k | // Skip a regmask on the same instruction as the last use. It doesn't |
1821 | 49.9k | // overlap the live range. |
1822 | 49.9k | if (49.9k SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && 49.9k i+1 == NumGaps14 ) |
1823 | 3 | break; |
1824 | 49.9k | DEBUG49.9k (dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]); |
1825 | 49.9k | RegMaskGaps.push_back(i); |
1826 | 49.9k | // Advance ri to the next gap. A regmask on one of the uses counts in |
1827 | 49.9k | // both gaps. |
1828 | 210k | while (ri != re && 210k SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1])209k ) |
1829 | 160k | ++ri; |
1830 | 56.0k | } |
1831 | 10.4k | DEBUG(dbgs() << '\n'); |
1832 | 10.4k | } |
1833 | 24.0k | |
1834 | 24.0k | // Since we allow local split results to be split again, there is a risk of |
1835 | 24.0k | // creating infinite loops. It is tempting to require that the new live |
1836 | 24.0k | // ranges have less instructions than the original. That would guarantee |
1837 | 24.0k | // convergence, but it is too strict. A live range with 3 instructions can be |
1838 | 24.0k | // split 2+3 (including the COPY), and we want to allow that. |
1839 | 24.0k | // |
1840 | 24.0k | // Instead we use these rules: |
1841 | 24.0k | // |
1842 | 24.0k | // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the |
1843 | 24.0k | // noop split, of course). |
1844 | 24.0k | // 2. Require progress be made for ranges with getStage() == RS_Split2. All |
1845 | 24.0k | // the new ranges must have fewer instructions than before the split. |
1846 | 24.0k | // 3. New ranges with the same number of instructions are marked RS_Split2, |
1847 | 24.0k | // smaller ranges are marked RS_New. |
1848 | 24.0k | // |
1849 | 24.0k | // These rules allow a 3 -> 2+3 split once, which we need. They also prevent |
1850 | 24.0k | // excessive splitting and infinite loops. |
1851 | 24.0k | // |
1852 | 24.0k | bool ProgressRequired = getStage(VirtReg) >= RS_Split2; |
1853 | 24.0k | |
1854 | 24.0k | // Best split candidate. |
1855 | 24.0k | unsigned BestBefore = NumGaps; |
1856 | 24.0k | unsigned BestAfter = 0; |
1857 | 24.0k | float BestDiff = 0; |
1858 | 24.0k | |
1859 | 24.0k | const float blockFreq = |
1860 | 24.0k | SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * |
1861 | 24.0k | (1.0f / MBFI->getEntryFreq()); |
1862 | 24.0k | SmallVector<float, 8> GapWeight; |
1863 | 24.0k | |
1864 | 24.0k | Order.rewind(); |
1865 | 585k | while (unsigned PhysReg585k = Order.next()) { |
1866 | 561k | // Keep track of the largest spill weight that would need to be evicted in |
1867 | 561k | // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. |
1868 | 561k | calcGapWeights(PhysReg, GapWeight); |
1869 | 561k | |
1870 | 561k | // Remove any gaps with regmask clobbers. |
1871 | 561k | if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) |
1872 | 611k | for (unsigned i = 0, e = RegMaskGaps.size(); 142k i != e611k ; ++i469k ) |
1873 | 469k | GapWeight[RegMaskGaps[i]] = huge_valf; |
1874 | 561k | |
1875 | 561k | // Try to find the best sequence of gaps to close. |
1876 | 561k | // The new spill weight must be larger than any gap interference. |
1877 | 561k | |
1878 | 561k | // We will split before Uses[SplitBefore] and after Uses[SplitAfter]. |
1879 | 561k | unsigned SplitBefore = 0, SplitAfter = 1; |
1880 | 561k | |
1881 | 561k | // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]). |
1882 | 561k | // It is the spill weight that needs to be evicted. |
1883 | 561k | float MaxGap = GapWeight[0]; |
1884 | 561k | |
1885 | 1.80M | while (true1.80M ) { |
1886 | 1.80M | // Live before/after split? |
1887 | 1.00M | const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; |
1888 | 585k | const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; |
1889 | 1.80M | |
1890 | 1.80M | DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' ' |
1891 | 1.80M | << Uses[SplitBefore] << '-' << Uses[SplitAfter] |
1892 | 1.80M | << " i=" << MaxGap); |
1893 | 1.80M | |
1894 | 1.80M | // Stop before the interval gets so big we wouldn't be making progress. |
1895 | 1.80M | if (!LiveBefore && 1.80M !LiveAfter1.00M ) { |
1896 | 335k | DEBUG(dbgs() << " all\n"); |
1897 | 335k | break; |
1898 | 335k | } |
1899 | 1.46M | // Should the interval be extended or shrunk? |
1900 | 1.46M | bool Shrink = true; |
1901 | 1.46M | |
1902 | 1.46M | // How many gaps would the new range have? |
1903 | 1.46M | unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter; |
1904 | 1.46M | |
1905 | 1.46M | // Legally, without causing looping? |
1906 | 17.1k | bool Legal = !ProgressRequired || NewGaps < NumGaps; |
1907 | 1.46M | |
1908 | 1.46M | if (Legal && 1.46M MaxGap < huge_valf1.45M ) { |
1909 | 917k | // Estimate the new spill weight. Each instruction reads or writes the |
1910 | 917k | // register. Conservatively assume there are no read-modify-write |
1911 | 917k | // instructions. |
1912 | 917k | // |
1913 | 917k | // Try to guess the size of the new interval. |
1914 | 917k | const float EstWeight = normalizeSpillWeight( |
1915 | 917k | blockFreq * (NewGaps + 1), |
1916 | 917k | Uses[SplitBefore].distance(Uses[SplitAfter]) + |
1917 | 917k | (LiveBefore + LiveAfter) * SlotIndex::InstrDist, |
1918 | 917k | 1); |
1919 | 917k | // Would this split be possible to allocate? |
1920 | 917k | // Never allocate all gaps, we wouldn't be making progress. |
1921 | 917k | DEBUG(dbgs() << " w=" << EstWeight); |
1922 | 917k | if (EstWeight * Hysteresis >= MaxGap917k ) { |
1923 | 608k | Shrink = false; |
1924 | 608k | float Diff = EstWeight - MaxGap; |
1925 | 608k | if (Diff > BestDiff608k ) { |
1926 | 358k | DEBUG(dbgs() << " (best)"); |
1927 | 358k | BestDiff = Hysteresis * Diff; |
1928 | 358k | BestBefore = SplitBefore; |
1929 | 358k | BestAfter = SplitAfter; |
1930 | 358k | } |
1931 | 608k | } |
1932 | 917k | } |
1933 | 1.46M | |
1934 | 1.46M | // Try to shrink. |
1935 | 1.46M | if (Shrink1.46M ) { |
1936 | 857k | if (++SplitBefore < SplitAfter857k ) { |
1937 | 134k | DEBUG(dbgs() << " shrink\n"); |
1938 | 134k | // Recompute the max when necessary. |
1939 | 134k | if (GapWeight[SplitBefore - 1] >= MaxGap134k ) { |
1940 | 55.7k | MaxGap = GapWeight[SplitBefore]; |
1941 | 649k | for (unsigned i = SplitBefore + 1; i != SplitAfter649k ; ++i593k ) |
1942 | 593k | MaxGap = std::max(MaxGap, GapWeight[i]); |
1943 | 55.7k | } |
1944 | 134k | continue; |
1945 | 134k | } |
1946 | 722k | MaxGap = 0; |
1947 | 722k | } |
1948 | 1.46M | |
1949 | 1.46M | // Try to extend the interval. |
1950 | 1.33M | if (1.33M SplitAfter >= NumGaps1.33M ) { |
1951 | 225k | DEBUG(dbgs() << " end\n"); |
1952 | 225k | break; |
1953 | 225k | } |
1954 | 1.10M | |
1955 | 1.10M | DEBUG1.10M (dbgs() << " extend\n"); |
1956 | 1.10M | MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]); |
1957 | 1.10M | } |
1958 | 561k | } |
1959 | 24.0k | |
1960 | 24.0k | // Didn't find any candidates? |
1961 | 24.0k | if (BestBefore == NumGaps) |
1962 | 1.48k | return 0; |
1963 | 22.5k | |
1964 | 22.5k | DEBUG22.5k (dbgs() << "Best local split range: " << Uses[BestBefore] |
1965 | 22.5k | << '-' << Uses[BestAfter] << ", " << BestDiff |
1966 | 22.5k | << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); |
1967 | 22.5k | |
1968 | 22.5k | LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); |
1969 | 22.5k | SE->reset(LREdit); |
1970 | 22.5k | |
1971 | 22.5k | SE->openIntv(); |
1972 | 22.5k | SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); |
1973 | 22.5k | SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); |
1974 | 22.5k | SE->useIntv(SegStart, SegStop); |
1975 | 22.5k | SmallVector<unsigned, 8> IntvMap; |
1976 | 22.5k | SE->finish(&IntvMap); |
1977 | 22.5k | DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); |
1978 | 22.5k | |
1979 | 22.5k | // If the new range has the same number of instructions as before, mark it as |
1980 | 22.5k | // RS_Split2 so the next split will be forced to make progress. Otherwise, |
1981 | 22.5k | // leave the new intervals as RS_New so they can compete. |
1982 | 16.8k | bool LiveBefore = BestBefore != 0 || BI.LiveIn; |
1983 | 3.88k | bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; |
1984 | 22.5k | unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter; |
1985 | 22.5k | if (NewGaps >= NumGaps22.5k ) { |
1986 | 17.3k | DEBUG(dbgs() << "Tagging non-progress ranges: "); |
1987 | 17.3k | assert(!ProgressRequired && "Didn't make progress when it was required."); |
1988 | 52.5k | for (unsigned i = 0, e = IntvMap.size(); i != e52.5k ; ++i35.2k ) |
1989 | 35.2k | if (35.2k IntvMap[i] == 135.2k ) { |
1990 | 17.3k | setStage(LIS->getInterval(LREdit.get(i)), RS_Split2); |
1991 | 17.3k | DEBUG(dbgs() << PrintReg(LREdit.get(i))); |
1992 | 35.2k | } |
1993 | 17.3k | DEBUG(dbgs() << '\n'); |
1994 | 17.3k | } |
1995 | 88.4k | ++NumLocalSplits; |
1996 | 88.4k | |
1997 | 88.4k | return 0; |
1998 | 88.4k | } |
1999 | | |
2000 | | //===----------------------------------------------------------------------===// |
2001 | | // Live Range Splitting |
2002 | | //===----------------------------------------------------------------------===// |
2003 | | |
2004 | | /// trySplit - Try to split VirtReg or one of its interferences, making it |
2005 | | /// assignable. |
2006 | | /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. |
2007 | | unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, |
2008 | 314k | SmallVectorImpl<unsigned>&NewVRegs) { |
2009 | 314k | // Ranges must be Split2 or less. |
2010 | 314k | if (getStage(VirtReg) >= RS_Spill) |
2011 | 0 | return 0; |
2012 | 314k | |
2013 | 314k | // Local intervals are handled separately. |
2014 | 314k | if (314k LIS->intervalIsInOneMBB(VirtReg)314k ) { |
2015 | 88.4k | NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName, |
2016 | 88.4k | TimerGroupDescription, TimePassesIsEnabled); |
2017 | 88.4k | SA->analyze(&VirtReg); |
2018 | 88.4k | unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); |
2019 | 88.4k | if (PhysReg || 88.4k !NewVRegs.empty()88.4k ) |
2020 | 22.5k | return PhysReg; |
2021 | 65.9k | return tryInstructionSplit(VirtReg, Order, NewVRegs); |
2022 | 65.9k | } |
2023 | 225k | |
2024 | 225k | NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName, |
2025 | 225k | TimerGroupDescription, TimePassesIsEnabled); |
2026 | 225k | |
2027 | 225k | SA->analyze(&VirtReg); |
2028 | 225k | |
2029 | 225k | // FIXME: SplitAnalysis may repair broken live ranges coming from the |
2030 | 225k | // coalescer. That may cause the range to become allocatable which means that |
2031 | 225k | // tryRegionSplit won't be making progress. This check should be replaced with |
2032 | 225k | // an assertion when the coalescer is fixed. |
2033 | 225k | if (SA->didRepairRange()225k ) { |
2034 | 0 | // VirtReg has changed, so all cached queries are invalid. |
2035 | 0 | Matrix->invalidateVirtRegs(); |
2036 | 0 | if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) |
2037 | 0 | return PhysReg; |
2038 | 225k | } |
2039 | 225k | |
2040 | 225k | // First try to split around a region spanning multiple blocks. RS_Split2 |
2041 | 225k | // ranges already made dubious progress with region splitting, so they go |
2042 | 225k | // straight to single block splitting. |
2043 | 225k | if (225k getStage(VirtReg) < RS_Split2225k ) { |
2044 | 225k | unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); |
2045 | 225k | if (PhysReg || 225k !NewVRegs.empty()225k ) |
2046 | 79.8k | return PhysReg; |
2047 | 145k | } |
2048 | 145k | |
2049 | 145k | // Then isolate blocks. |
2050 | 145k | return tryBlockSplit(VirtReg, Order, NewVRegs); |
2051 | 145k | } |
2052 | | |
2053 | | //===----------------------------------------------------------------------===// |
2054 | | // Last Chance Recoloring |
2055 | | //===----------------------------------------------------------------------===// |
2056 | | |
2057 | | /// Return true if \p reg has any tied def operand. |
2058 | 97 | static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) { |
2059 | 97 | for (const MachineOperand &MO : MRI->def_operands(reg)) |
2060 | 97 | if (97 MO.isTied()97 ) |
2061 | 0 | return true; |
2062 | 97 | |
2063 | 97 | return false; |
2064 | 97 | } |
2065 | | |
2066 | | /// mayRecolorAllInterferences - Check if the virtual registers that |
2067 | | /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be |
2068 | | /// recolored to free \p PhysReg. |
2069 | | /// When true is returned, \p RecoloringCandidates has been augmented with all |
2070 | | /// the live intervals that need to be recolored in order to free \p PhysReg |
2071 | | /// for \p VirtReg. |
2072 | | /// \p FixedRegisters contains all the virtual registers that cannot be |
2073 | | /// recolored. |
2074 | | bool |
2075 | | RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, |
2076 | | SmallLISet &RecoloringCandidates, |
2077 | 101 | const SmallVirtRegSet &FixedRegisters) { |
2078 | 101 | const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); |
2079 | 101 | |
2080 | 108 | for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid()108 ; ++Units7 ) { |
2081 | 104 | LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); |
2082 | 104 | // If there is LastChanceRecoloringMaxInterference or more interferences, |
2083 | 104 | // chances are one would not be recolorable. |
2084 | 104 | if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= |
2085 | 104 | LastChanceRecoloringMaxInterference && !ExhaustiveSearch0 ) { |
2086 | 0 | DEBUG(dbgs() << "Early abort: too many interferences.\n"); |
2087 | 0 | CutOffInfo |= CO_Interf; |
2088 | 0 | return false; |
2089 | 0 | } |
2090 | 111 | for (unsigned i = Q.interferingVRegs().size(); 104 i111 ; --i7 ) { |
2091 | 104 | LiveInterval *Intf = Q.interferingVRegs()[i - 1]; |
2092 | 104 | // If Intf is done and sit on the same register class as VirtReg, |
2093 | 104 | // it would not be recolorable as it is in the same state as VirtReg. |
2094 | 104 | // However, if VirtReg has tied defs and Intf doesn't, then |
2095 | 104 | // there is still a point in examining if it can be recolorable. |
2096 | 104 | if (((getStage(*Intf) == RS_Done && |
2097 | 97 | MRI->getRegClass(Intf->reg) == CurRC) && |
2098 | 97 | !(hasTiedDef(MRI, VirtReg.reg) && 97 !hasTiedDef(MRI, Intf->reg)0 )) || |
2099 | 104 | FixedRegisters.count(Intf->reg)7 ) { |
2100 | 97 | DEBUG(dbgs() << "Early abort: the interference is not recolorable.\n"); |
2101 | 97 | return false; |
2102 | 97 | } |
2103 | 7 | RecoloringCandidates.insert(Intf); |
2104 | 7 | } |
2105 | 104 | } |
2106 | 4 | return true; |
2107 | 101 | } |
2108 | | |
2109 | | /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring |
2110 | | /// its interferences. |
2111 | | /// Last chance recoloring chooses a color for \p VirtReg and recolors every |
2112 | | /// virtual register that was using it. The recoloring process may recursively |
2113 | | /// use the last chance recoloring. Therefore, when a virtual register has been |
2114 | | /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot |
2115 | | /// be last-chance-recolored again during this recoloring "session". |
2116 | | /// E.g., |
2117 | | /// Let |
2118 | | /// vA can use {R1, R2 } |
2119 | | /// vB can use { R2, R3} |
2120 | | /// vC can use {R1 } |
2121 | | /// Where vA, vB, and vC cannot be split anymore (they are reloads for |
2122 | | /// instance) and they all interfere. |
2123 | | /// |
2124 | | /// vA is assigned R1 |
2125 | | /// vB is assigned R2 |
2126 | | /// vC tries to evict vA but vA is already done. |
2127 | | /// Regular register allocation fails. |
2128 | | /// |
2129 | | /// Last chance recoloring kicks in: |
2130 | | /// vC does as if vA was evicted => vC uses R1. |
2131 | | /// vC is marked as fixed. |
2132 | | /// vA needs to find a color. |
2133 | | /// None are available. |
2134 | | /// vA cannot evict vC: vC is a fixed virtual register now. |
2135 | | /// vA does as if vB was evicted => vA uses R2. |
2136 | | /// vB needs to find a color. |
2137 | | /// R3 is available. |
2138 | | /// Recoloring => vC = R1, vA = R2, vB = R3 |
2139 | | /// |
2140 | | /// \p Order defines the preferred allocation order for \p VirtReg. |
2141 | | /// \p NewRegs will contain any new virtual register that have been created |
2142 | | /// (split, spill) during the process and that must be assigned. |
2143 | | /// \p FixedRegisters contains all the virtual registers that cannot be |
2144 | | /// recolored. |
2145 | | /// \p Depth gives the current depth of the last chance recoloring. |
2146 | | /// \return a physical register that can be used for VirtReg or ~0u if none |
2147 | | /// exists. |
2148 | | unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, |
2149 | | AllocationOrder &Order, |
2150 | | SmallVectorImpl<unsigned> &NewVRegs, |
2151 | | SmallVirtRegSet &FixedRegisters, |
2152 | 95 | unsigned Depth) { |
2153 | 95 | DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); |
2154 | 95 | // Ranges must be Done. |
2155 | 95 | assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && |
2156 | 95 | "Last chance recoloring should really be last chance"); |
2157 | 95 | // Set the max depth to LastChanceRecoloringMaxDepth. |
2158 | 95 | // We may want to reconsider that if we end up with a too large search space |
2159 | 95 | // for target with hundreds of registers. |
2160 | 95 | // Indeed, in that case we may want to cut the search space earlier. |
2161 | 95 | if (Depth >= LastChanceRecoloringMaxDepth && 95 !ExhaustiveSearch0 ) { |
2162 | 0 | DEBUG(dbgs() << "Abort because max depth has been reached.\n"); |
2163 | 0 | CutOffInfo |= CO_Depth; |
2164 | 0 | return ~0u; |
2165 | 0 | } |
2166 | 95 | |
2167 | 95 | // Set of Live intervals that will need to be recolored. |
2168 | 95 | SmallLISet RecoloringCandidates; |
2169 | 95 | // Record the original mapping virtual register to physical register in case |
2170 | 95 | // the recoloring fails. |
2171 | 95 | DenseMap<unsigned, unsigned> VirtRegToPhysReg; |
2172 | 95 | // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in |
2173 | 95 | // this recoloring "session". |
2174 | 95 | FixedRegisters.insert(VirtReg.reg); |
2175 | 95 | SmallVector<unsigned, 4> CurrentNewVRegs; |
2176 | 95 | |
2177 | 95 | Order.rewind(); |
2178 | 897 | while (unsigned PhysReg897 = Order.next()) { |
2179 | 802 | DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " |
2180 | 802 | << PrintReg(PhysReg, TRI) << '\n'); |
2181 | 802 | RecoloringCandidates.clear(); |
2182 | 802 | VirtRegToPhysReg.clear(); |
2183 | 802 | CurrentNewVRegs.clear(); |
2184 | 802 | |
2185 | 802 | // It is only possible to recolor virtual register interference. |
2186 | 802 | if (Matrix->checkInterference(VirtReg, PhysReg) > |
2187 | 802 | LiveRegMatrix::IK_VirtReg) { |
2188 | 701 | DEBUG(dbgs() << "Some interferences are not with virtual registers.\n"); |
2189 | 701 | |
2190 | 701 | continue; |
2191 | 701 | } |
2192 | 101 | |
2193 | 101 | // Early give up on this PhysReg if it is obvious we cannot recolor all |
2194 | 101 | // the interferences. |
2195 | 101 | if (101 !mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates, |
2196 | 101 | FixedRegisters)) { |
2197 | 97 | DEBUG(dbgs() << "Some interferences cannot be recolored.\n"); |
2198 | 97 | continue; |
2199 | 97 | } |
2200 | 4 | |
2201 | 4 | // RecoloringCandidates contains all the virtual registers that interfer |
2202 | 4 | // with VirtReg on PhysReg (or one of its aliases). |
2203 | 4 | // Enqueue them for recoloring and perform the actual recoloring. |
2204 | 4 | PQueue RecoloringQueue; |
2205 | 4 | for (SmallLISet::iterator It = RecoloringCandidates.begin(), |
2206 | 4 | EndIt = RecoloringCandidates.end(); |
2207 | 8 | It != EndIt8 ; ++It4 ) { |
2208 | 4 | unsigned ItVirtReg = (*It)->reg; |
2209 | 4 | enqueue(RecoloringQueue, *It); |
2210 | 4 | assert(VRM->hasPhys(ItVirtReg) && |
2211 | 4 | "Interferences are supposed to be with allocated vairables"); |
2212 | 4 | |
2213 | 4 | // Record the current allocation. |
2214 | 4 | VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); |
2215 | 4 | // unset the related struct. |
2216 | 4 | Matrix->unassign(**It); |
2217 | 4 | } |
2218 | 4 | |
2219 | 4 | // Do as if VirtReg was assigned to PhysReg so that the underlying |
2220 | 4 | // recoloring has the right information about the interferes and |
2221 | 4 | // available colors. |
2222 | 4 | Matrix->assign(VirtReg, PhysReg); |
2223 | 4 | |
2224 | 4 | // Save the current recoloring state. |
2225 | 4 | // If we cannot recolor all the interferences, we will have to start again |
2226 | 4 | // at this point for the next physical register. |
2227 | 4 | SmallVirtRegSet SaveFixedRegisters(FixedRegisters); |
2228 | 4 | if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs, |
2229 | 4 | FixedRegisters, Depth)) { |
2230 | 0 | // Push the queued vregs into the main queue. |
2231 | 0 | for (unsigned NewVReg : CurrentNewVRegs) |
2232 | 0 | NewVRegs.push_back(NewVReg); |
2233 | 0 | // Do not mess up with the global assignment process. |
2234 | 0 | // I.e., VirtReg must be unassigned. |
2235 | 0 | Matrix->unassign(VirtReg); |
2236 | 0 | return PhysReg; |
2237 | 0 | } |
2238 | 4 | |
2239 | 4 | DEBUG4 (dbgs() << "Fail to assign: " << VirtReg << " to " |
2240 | 4 | << PrintReg(PhysReg, TRI) << '\n'); |
2241 | 4 | |
2242 | 4 | // The recoloring attempt failed, undo the changes. |
2243 | 4 | FixedRegisters = SaveFixedRegisters; |
2244 | 4 | Matrix->unassign(VirtReg); |
2245 | 4 | |
2246 | 4 | // For a newly created vreg which is also in RecoloringCandidates, |
2247 | 4 | // don't add it to NewVRegs because its physical register will be restored |
2248 | 4 | // below. Other vregs in CurrentNewVRegs are created by calling |
2249 | 4 | // selectOrSplit and should be added into NewVRegs. |
2250 | 4 | for (SmallVectorImpl<unsigned>::iterator Next = CurrentNewVRegs.begin(), |
2251 | 4 | End = CurrentNewVRegs.end(); |
2252 | 6 | Next != End6 ; ++Next2 ) { |
2253 | 2 | if (RecoloringCandidates.count(&LIS->getInterval(*Next))) |
2254 | 2 | continue; |
2255 | 0 | NewVRegs.push_back(*Next); |
2256 | 0 | } |
2257 | 4 | |
2258 | 4 | for (SmallLISet::iterator It = RecoloringCandidates.begin(), |
2259 | 4 | EndIt = RecoloringCandidates.end(); |
2260 | 8 | It != EndIt8 ; ++It4 ) { |
2261 | 4 | unsigned ItVirtReg = (*It)->reg; |
2262 | 4 | if (VRM->hasPhys(ItVirtReg)) |
2263 | 0 | Matrix->unassign(**It); |
2264 | 4 | unsigned ItPhysReg = VirtRegToPhysReg[ItVirtReg]; |
2265 | 4 | Matrix->assign(**It, ItPhysReg); |
2266 | 4 | } |
2267 | 802 | } |
2268 | 95 | |
2269 | 95 | // Last chance recoloring did not worked either, give up. |
2270 | 95 | return ~0u; |
2271 | 95 | } |
2272 | | |
2273 | | /// tryRecoloringCandidates - Try to assign a new color to every register |
2274 | | /// in \RecoloringQueue. |
2275 | | /// \p NewRegs will contain any new virtual register created during the |
2276 | | /// recoloring process. |
2277 | | /// \p FixedRegisters[in/out] contains all the registers that have been |
2278 | | /// recolored. |
2279 | | /// \return true if all virtual registers in RecoloringQueue were successfully |
2280 | | /// recolored, false otherwise. |
2281 | | bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, |
2282 | | SmallVectorImpl<unsigned> &NewVRegs, |
2283 | | SmallVirtRegSet &FixedRegisters, |
2284 | 4 | unsigned Depth) { |
2285 | 4 | while (!RecoloringQueue.empty()4 ) { |
2286 | 4 | LiveInterval *LI = dequeue(RecoloringQueue); |
2287 | 4 | DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); |
2288 | 4 | unsigned PhysReg; |
2289 | 4 | PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); |
2290 | 4 | // When splitting happens, the live-range may actually be empty. |
2291 | 4 | // In that case, this is okay to continue the recoloring even |
2292 | 4 | // if we did not find an alternative color for it. Indeed, |
2293 | 4 | // there will not be anything to color for LI in the end. |
2294 | 4 | if (PhysReg == ~0u || 4 (!PhysReg && 2 !LI->empty()2 )) |
2295 | 4 | return false; |
2296 | 0 |
|
2297 | 0 | if (0 !PhysReg0 ) { |
2298 | 0 | assert(LI->empty() && "Only empty live-range do not require a register"); |
2299 | 0 | DEBUG(dbgs() << "Recoloring of " << *LI << " succeeded. Empty LI.\n"); |
2300 | 0 | continue; |
2301 | 0 | } |
2302 | 0 | DEBUG0 (dbgs() << "Recoloring of " << *LI |
2303 | 0 | << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n'); |
2304 | 0 |
|
2305 | 0 | Matrix->assign(*LI, PhysReg); |
2306 | 0 | FixedRegisters.insert(LI->reg); |
2307 | 0 | } |
2308 | 0 | return true; |
2309 | 4 | } |
2310 | | |
2311 | | //===----------------------------------------------------------------------===// |
2312 | | // Main Entry Point |
2313 | | //===----------------------------------------------------------------------===// |
2314 | | |
2315 | | unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, |
2316 | 11.1M | SmallVectorImpl<unsigned> &NewVRegs) { |
2317 | 11.1M | CutOffInfo = CO_None; |
2318 | 11.1M | LLVMContext &Ctx = MF->getFunction()->getContext(); |
2319 | 11.1M | SmallVirtRegSet FixedRegisters; |
2320 | 11.1M | unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); |
2321 | 11.1M | if (Reg == ~0U && 11.1M (CutOffInfo != CO_None)93 ) { |
2322 | 0 | uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); |
2323 | 0 | if (CutOffEncountered == CO_Depth) |
2324 | 0 | Ctx.emitError("register allocation failed: maximum depth for recoloring " |
2325 | 0 | "reached. Use -fexhaustive-register-search to skip " |
2326 | 0 | "cutoffs"); |
2327 | 0 | else if (0 CutOffEncountered == CO_Interf0 ) |
2328 | 0 | Ctx.emitError("register allocation failed: maximum interference for " |
2329 | 0 | "recoloring reached. Use -fexhaustive-register-search " |
2330 | 0 | "to skip cutoffs"); |
2331 | 0 | else if (0 CutOffEncountered == (CO_Depth | CO_Interf)0 ) |
2332 | 0 | Ctx.emitError("register allocation failed: maximum interference and " |
2333 | 0 | "depth for recoloring reached. Use " |
2334 | 0 | "-fexhaustive-register-search to skip cutoffs"); |
2335 | 0 | } |
2336 | 11.1M | return Reg; |
2337 | 11.1M | } |
2338 | | |
2339 | | /// Using a CSR for the first time has a cost because it causes push|pop |
2340 | | /// to be added to prologue|epilogue. Splitting a cold section of the live |
2341 | | /// range can have lower cost than using the CSR for the first time; |
2342 | | /// Spilling a live range in the cold path can have lower cost than using |
2343 | | /// the CSR for the first time. Returns the physical register if we decide |
2344 | | /// to use the CSR; otherwise return 0. |
2345 | | unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, |
2346 | | AllocationOrder &Order, |
2347 | | unsigned PhysReg, |
2348 | | unsigned &CostPerUseLimit, |
2349 | 129k | SmallVectorImpl<unsigned> &NewVRegs) { |
2350 | 129k | if (getStage(VirtReg) == RS_Spill && 129k VirtReg.isSpillable()253 ) { |
2351 | 253 | // We choose spill over using the CSR for the first time if the spill cost |
2352 | 253 | // is lower than CSRCost. |
2353 | 253 | SA->analyze(&VirtReg); |
2354 | 253 | if (calcSpillCost() >= CSRCost) |
2355 | 2 | return PhysReg; |
2356 | 251 | |
2357 | 251 | // We are going to spill, set CostPerUseLimit to 1 to make sure that |
2358 | 251 | // we will not use a callee-saved register in tryEvict. |
2359 | 251 | CostPerUseLimit = 1; |
2360 | 251 | return 0; |
2361 | 251 | } |
2362 | 129k | if (129k getStage(VirtReg) < RS_Split129k ) { |
2363 | 129k | // We choose pre-splitting over using the CSR for the first time if |
2364 | 129k | // the cost of splitting is lower than CSRCost. |
2365 | 129k | SA->analyze(&VirtReg); |
2366 | 129k | unsigned NumCands = 0; |
2367 | 129k | BlockFrequency BestCost = CSRCost; // Don't modify CSRCost. |
2368 | 129k | unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, |
2369 | 129k | NumCands, true /*IgnoreCSR*/); |
2370 | 129k | if (BestCand == NoCand) |
2371 | 129k | // Use the CSR if we can't find a region split below CSRCost. |
2372 | 128k | return PhysReg; |
2373 | 682 | |
2374 | 682 | // Perform the actual pre-splitting. |
2375 | 682 | doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); |
2376 | 682 | return 0; |
2377 | 682 | } |
2378 | 52 | return PhysReg; |
2379 | 52 | } |
2380 | | |
2381 | 25.0k | void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) { |
2382 | 25.0k | // Do not keep invalid information around. |
2383 | 25.0k | SetOfBrokenHints.remove(&LI); |
2384 | 25.0k | } |
2385 | | |
2386 | 587k | void RAGreedy::initializeCSRCost() { |
2387 | 587k | // We use the larger one out of the command-line option and the value report |
2388 | 587k | // by TRI. |
2389 | 587k | CSRCost = BlockFrequency( |
2390 | 587k | std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); |
2391 | 587k | if (!CSRCost.getFrequency()) |
2392 | 116k | return; |
2393 | 471k | |
2394 | 471k | // Raw cost is relative to Entry == 2^14; scale it appropriately. |
2395 | 471k | uint64_t ActualEntry = MBFI->getEntryFreq(); |
2396 | 471k | if (!ActualEntry471k ) { |
2397 | 0 | CSRCost = 0; |
2398 | 0 | return; |
2399 | 0 | } |
2400 | 471k | uint64_t FixedEntry = 1 << 14; |
2401 | 471k | if (ActualEntry < FixedEntry) |
2402 | 441k | CSRCost *= BranchProbability(ActualEntry, FixedEntry); |
2403 | 29.8k | else if (29.8k ActualEntry <= UINT32_MAX29.8k ) |
2404 | 29.8k | // Invert the fraction and divide. |
2405 | 18.2k | CSRCost /= BranchProbability(FixedEntry, ActualEntry); |
2406 | 29.8k | else |
2407 | 29.8k | // Can't use BranchProbability in general, since it takes 32-bit numbers. |
2408 | 11.5k | CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); |
2409 | 587k | } |
2410 | | |
2411 | | /// \brief Collect the hint info for \p Reg. |
2412 | | /// The results are stored into \p Out. |
2413 | | /// \p Out is not cleared before being populated. |
2414 | 1.31M | void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { |
2415 | 6.43M | for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { |
2416 | 6.43M | if (!Instr.isFullCopy()) |
2417 | 3.05M | continue; |
2418 | 3.38M | // Look for the other end of the copy. |
2419 | 3.38M | unsigned OtherReg = Instr.getOperand(0).getReg(); |
2420 | 3.38M | if (OtherReg == Reg3.38M ) { |
2421 | 948k | OtherReg = Instr.getOperand(1).getReg(); |
2422 | 948k | if (OtherReg == Reg) |
2423 | 0 | continue; |
2424 | 3.38M | } |
2425 | 3.38M | // Get the current assignment. |
2426 | 3.38M | unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg) |
2427 | 3.20M | ? OtherReg |
2428 | 172k | : VRM->getPhys(OtherReg); |
2429 | 6.43M | // Push the collected information. |
2430 | 6.43M | Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg, |
2431 | 6.43M | OtherPhysReg)); |
2432 | 6.43M | } |
2433 | 1.31M | } |
2434 | | |
2435 | | /// \brief Using the given \p List, compute the cost of the broken hints if |
2436 | | /// \p PhysReg was used. |
2437 | | /// \return The cost of \p List for \p PhysReg. |
2438 | | BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, |
2439 | 9.11k | unsigned PhysReg) { |
2440 | 9.11k | BlockFrequency Cost = 0; |
2441 | 19.7k | for (const HintInfo &Info : List) { |
2442 | 19.7k | if (Info.PhysReg != PhysReg) |
2443 | 12.6k | Cost += Info.Freq; |
2444 | 19.7k | } |
2445 | 9.11k | return Cost; |
2446 | 9.11k | } |
2447 | | |
2448 | | /// \brief Using the register assigned to \p VirtReg, try to recolor |
2449 | | /// all the live ranges that are copy-related with \p VirtReg. |
2450 | | /// The recoloring is then propagated to all the live-ranges that have |
2451 | | /// been recolored and so on, until no more copies can be coalesced or |
2452 | | /// it is not profitable. |
2453 | | /// For a given live range, profitability is determined by the sum of the |
2454 | | /// frequencies of the non-identity copies it would introduce with the old |
2455 | | /// and new register. |
2456 | 1.28M | void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { |
2457 | 1.28M | // We have a broken hint, check if it is possible to fix it by |
2458 | 1.28M | // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted |
2459 | 1.28M | // some register and PhysReg may be available for the other live-ranges. |
2460 | 1.28M | SmallSet<unsigned, 4> Visited; |
2461 | 1.28M | SmallVector<unsigned, 2> RecoloringCandidates; |
2462 | 1.28M | HintsInfo Info; |
2463 | 1.28M | unsigned Reg = VirtReg.reg; |
2464 | 1.28M | unsigned PhysReg = VRM->getPhys(Reg); |
2465 | 1.28M | // Start the recoloring algorithm from the input live-interval, then |
2466 | 1.28M | // it will propagate to the ones that are copy-related with it. |
2467 | 1.28M | Visited.insert(Reg); |
2468 | 1.28M | RecoloringCandidates.push_back(Reg); |
2469 | 1.28M | |
2470 | 1.28M | DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '(' |
2471 | 1.28M | << PrintReg(PhysReg, TRI) << ")\n"); |
2472 | 1.28M | |
2473 | 2.92M | do { |
2474 | 2.92M | Reg = RecoloringCandidates.pop_back_val(); |
2475 | 2.92M | |
2476 | 2.92M | // We cannot recolor physical register. |
2477 | 2.92M | if (TargetRegisterInfo::isPhysicalRegister(Reg)) |
2478 | 1.52M | continue; |
2479 | 1.39M | |
2480 | 2.92M | assert(VRM->hasPhys(Reg) && "We have unallocated variable!!"); |
2481 | 1.39M | |
2482 | 1.39M | // Get the live interval mapped with this virtual register to be able |
2483 | 1.39M | // to check for the interference with the new color. |
2484 | 1.39M | LiveInterval &LI = LIS->getInterval(Reg); |
2485 | 1.39M | unsigned CurrPhys = VRM->getPhys(Reg); |
2486 | 1.39M | // Check that the new color matches the register class constraints and |
2487 | 1.39M | // that it is free for this live range. |
2488 | 1.39M | if (CurrPhys != PhysReg && 1.39M (!MRI->getRegClass(Reg)->contains(PhysReg) || |
2489 | 85.2k | Matrix->checkInterference(LI, PhysReg))) |
2490 | 80.9k | continue; |
2491 | 1.31M | |
2492 | 1.31M | DEBUG1.31M (dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI) |
2493 | 1.31M | << ") is recolorable.\n"); |
2494 | 1.31M | |
2495 | 1.31M | // Gather the hint info. |
2496 | 1.31M | Info.clear(); |
2497 | 1.31M | collectHintInfo(Reg, Info); |
2498 | 1.31M | // Check if recoloring the live-range will increase the cost of the |
2499 | 1.31M | // non-identity copies. |
2500 | 1.31M | if (CurrPhys != PhysReg1.31M ) { |
2501 | 4.55k | DEBUG(dbgs() << "Checking profitability:\n"); |
2502 | 4.55k | BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys); |
2503 | 4.55k | BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg); |
2504 | 4.55k | DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency() |
2505 | 4.55k | << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n'); |
2506 | 4.55k | if (OldCopiesCost < NewCopiesCost4.55k ) { |
2507 | 162 | DEBUG(dbgs() << "=> Not profitable.\n"); |
2508 | 162 | continue; |
2509 | 162 | } |
2510 | 4.39k | // At this point, the cost is either cheaper or equal. If it is |
2511 | 4.39k | // equal, we consider this is profitable because it may expose |
2512 | 4.39k | // more recoloring opportunities. |
2513 | 4.39k | DEBUG4.39k (dbgs() << "=> Profitable.\n"); |
2514 | 4.39k | // Recolor the live-range. |
2515 | 4.39k | Matrix->unassign(LI); |
2516 | 4.39k | Matrix->assign(LI, PhysReg); |
2517 | 4.39k | } |
2518 | 1.31M | // Push all copy-related live-ranges to keep reconciling the broken |
2519 | 1.31M | // hints. |
2520 | 1.31M | for (const HintInfo &HI : Info) 1.31M { |
2521 | 3.38M | if (Visited.insert(HI.Reg).second) |
2522 | 1.63M | RecoloringCandidates.push_back(HI.Reg); |
2523 | 3.38M | } |
2524 | 2.92M | } while (!RecoloringCandidates.empty()); |
2525 | 1.28M | } |
2526 | | |
2527 | | /// \brief Try to recolor broken hints. |
2528 | | /// Broken hints may be repaired by recoloring when an evicted variable |
2529 | | /// freed up a register for a larger live-range. |
2530 | | /// Consider the following example: |
2531 | | /// BB1: |
2532 | | /// a = |
2533 | | /// b = |
2534 | | /// BB2: |
2535 | | /// ... |
2536 | | /// = b |
2537 | | /// = a |
2538 | | /// Let us assume b gets split: |
2539 | | /// BB1: |
2540 | | /// a = |
2541 | | /// b = |
2542 | | /// BB2: |
2543 | | /// c = b |
2544 | | /// ... |
2545 | | /// d = c |
2546 | | /// = d |
2547 | | /// = a |
2548 | | /// Because of how the allocation work, b, c, and d may be assigned different |
2549 | | /// colors. Now, if a gets evicted later: |
2550 | | /// BB1: |
2551 | | /// a = |
2552 | | /// st a, SpillSlot |
2553 | | /// b = |
2554 | | /// BB2: |
2555 | | /// c = b |
2556 | | /// ... |
2557 | | /// d = c |
2558 | | /// = d |
2559 | | /// e = ld SpillSlot |
2560 | | /// = e |
2561 | | /// This is likely that we can assign the same register for b, c, and d, |
2562 | | /// getting rid of 2 copies. |
2563 | 587k | void RAGreedy::tryHintsRecoloring() { |
2564 | 1.43M | for (LiveInterval *LI : SetOfBrokenHints) { |
2565 | 1.43M | assert(TargetRegisterInfo::isVirtualRegister(LI->reg) && |
2566 | 1.43M | "Recoloring is possible only for virtual registers"); |
2567 | 1.43M | // Some dead defs may be around (e.g., because of debug uses). |
2568 | 1.43M | // Ignore those. |
2569 | 1.43M | if (!VRM->hasPhys(LI->reg)) |
2570 | 148k | continue; |
2571 | 1.28M | tryHintRecoloring(*LI); |
2572 | 1.28M | } |
2573 | 587k | } |
2574 | | |
2575 | | unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, |
2576 | | SmallVectorImpl<unsigned> &NewVRegs, |
2577 | | SmallVirtRegSet &FixedRegisters, |
2578 | 11.1M | unsigned Depth) { |
2579 | 11.1M | unsigned CostPerUseLimit = ~0u; |
2580 | 11.1M | // First try assigning a free register. |
2581 | 11.1M | AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); |
2582 | 11.1M | if (unsigned PhysReg11.1M = tryAssign(VirtReg, Order, NewVRegs)) { |
2583 | 9.92M | // When NewVRegs is not empty, we may have made decisions such as evicting |
2584 | 9.92M | // a virtual register, go with the earlier decisions and use the physical |
2585 | 9.92M | // register. |
2586 | 9.92M | if (CSRCost.getFrequency() && 9.92M isUnusedCalleeSavedReg(PhysReg)3.14M && |
2587 | 9.92M | NewVRegs.empty()129k ) { |
2588 | 129k | unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, |
2589 | 129k | CostPerUseLimit, NewVRegs); |
2590 | 129k | if (CSRReg || 129k !NewVRegs.empty()933 ) |
2591 | 129k | // Return now if we decide to use a CSR or create new vregs due to |
2592 | 129k | // pre-splitting. |
2593 | 129k | return CSRReg; |
2594 | 9.92M | } else |
2595 | 9.79M | return PhysReg; |
2596 | 1.27M | } |
2597 | 1.27M | |
2598 | 1.27M | LiveRangeStage Stage = getStage(VirtReg); |
2599 | 1.27M | DEBUG(dbgs() << StageName[Stage] |
2600 | 1.27M | << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); |
2601 | 1.27M | |
2602 | 1.27M | // Try to evict a less worthy live range, but only for ranges from the primary |
2603 | 1.27M | // queue. The RS_Split ranges already failed to do this, and they should not |
2604 | 1.27M | // get a second chance until they have been split. |
2605 | 1.27M | if (Stage != RS_Split) |
2606 | 958k | if (unsigned 958k PhysReg958k = |
2607 | 547k | tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) { |
2608 | 547k | unsigned Hint = MRI->getSimpleHint(VirtReg.reg); |
2609 | 547k | // If VirtReg has a hint and that hint is broken record this |
2610 | 547k | // virtual register as a recoloring candidate for broken hint. |
2611 | 547k | // Indeed, since we evicted a variable in its neighborhood it is |
2612 | 547k | // likely we can at least partially recolor some of the |
2613 | 547k | // copy-related live-ranges. |
2614 | 547k | if (Hint && 547k Hint != PhysReg220k ) |
2615 | 220k | SetOfBrokenHints.insert(&VirtReg); |
2616 | 958k | return PhysReg; |
2617 | 958k | } |
2618 | 723k | |
2619 | 1.27M | assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs"); |
2620 | 723k | |
2621 | 723k | // The first time we see a live range, don't try to split or spill. |
2622 | 723k | // Wait until the second time, when all smaller ranges have been allocated. |
2623 | 723k | // This gives a better picture of the interference to split around. |
2624 | 723k | if (Stage < RS_Split723k ) { |
2625 | 314k | setStage(VirtReg, RS_Split); |
2626 | 314k | DEBUG(dbgs() << "wait for second round\n"); |
2627 | 314k | NewVRegs.push_back(VirtReg.reg); |
2628 | 314k | return 0; |
2629 | 314k | } |
2630 | 408k | |
2631 | 408k | if (408k Stage < RS_Spill408k ) { |
2632 | 314k | // Try splitting VirtReg or interferences. |
2633 | 314k | unsigned NewVRegSizeBefore = NewVRegs.size(); |
2634 | 314k | unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); |
2635 | 314k | if (PhysReg || 314k (NewVRegs.size() - NewVRegSizeBefore)314k ) |
2636 | 127k | return PhysReg; |
2637 | 281k | } |
2638 | 281k | |
2639 | 281k | // If we couldn't allocate a register from spilling, there is probably some |
2640 | 281k | // invalid inline assembly. The base class will report it. |
2641 | 281k | if (281k Stage >= RS_Done || 281k !VirtReg.isSpillable()281k ) |
2642 | 95 | return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, |
2643 | 95 | Depth); |
2644 | 281k | |
2645 | 281k | // Finally spill VirtReg itself. |
2646 | 281k | if (281k EnableDeferredSpilling && 281k getStage(VirtReg) < RS_Memory0 ) { |
2647 | 0 | // TODO: This is experimental and in particular, we do not model |
2648 | 0 | // the live range splitting done by spilling correctly. |
2649 | 0 | // We would need a deep integration with the spiller to do the |
2650 | 0 | // right thing here. Anyway, that is still good for early testing. |
2651 | 0 | setStage(VirtReg, RS_Memory); |
2652 | 0 | DEBUG(dbgs() << "Do as if this register is in memory\n"); |
2653 | 0 | NewVRegs.push_back(VirtReg.reg); |
2654 | 281k | } else { |
2655 | 281k | NamedRegionTimer T("spill", "Spiller", TimerGroupName, |
2656 | 281k | TimerGroupDescription, TimePassesIsEnabled); |
2657 | 281k | LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); |
2658 | 281k | spiller().spill(LRE); |
2659 | 281k | setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); |
2660 | 281k | |
2661 | 281k | if (VerifyEnabled) |
2662 | 32 | MF->verify(this, "After spilling"); |
2663 | 281k | } |
2664 | 11.1M | |
2665 | 11.1M | // The live virtual register requesting allocation was spilled, so tell |
2666 | 11.1M | // the caller not to allocate anything during this round. |
2667 | 11.1M | return 0; |
2668 | 11.1M | } |
2669 | | |
2670 | | void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads, |
2671 | | unsigned &FoldedReloads, |
2672 | | unsigned &Spills, |
2673 | 364k | unsigned &FoldedSpills) { |
2674 | 364k | Reloads = 0; |
2675 | 364k | FoldedReloads = 0; |
2676 | 364k | Spills = 0; |
2677 | 364k | FoldedSpills = 0; |
2678 | 364k | |
2679 | 364k | // Sum up the spill and reloads in subloops. |
2680 | 108k | for (MachineLoop *SubLoop : *L) { |
2681 | 108k | unsigned SubReloads; |
2682 | 108k | unsigned SubFoldedReloads; |
2683 | 108k | unsigned SubSpills; |
2684 | 108k | unsigned SubFoldedSpills; |
2685 | 108k | |
2686 | 108k | reportNumberOfSplillsReloads(SubLoop, SubReloads, SubFoldedReloads, |
2687 | 108k | SubSpills, SubFoldedSpills); |
2688 | 108k | Reloads += SubReloads; |
2689 | 108k | FoldedReloads += SubFoldedReloads; |
2690 | 108k | Spills += SubSpills; |
2691 | 108k | FoldedSpills += SubFoldedSpills; |
2692 | 108k | } |
2693 | 364k | |
2694 | 364k | const MachineFrameInfo &MFI = MF->getFrameInfo(); |
2695 | 364k | const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); |
2696 | 364k | int FI; |
2697 | 364k | |
2698 | 364k | for (MachineBasicBlock *MBB : L->getBlocks()) |
2699 | 364k | // Handle blocks that were not included in subloops. |
2700 | 1.65M | if (1.65M Loops->getLoopFor(MBB) == L1.65M ) |
2701 | 1.19M | for (MachineInstr &MI : *MBB) 1.19M { |
2702 | 8.36M | const MachineMemOperand *MMO; |
2703 | 8.36M | |
2704 | 8.36M | if (TII->isLoadFromStackSlot(MI, FI) && 8.36M MFI.isSpillSlotObjectIndex(FI)132k ) |
2705 | 114k | ++Reloads; |
2706 | 8.24M | else if (8.24M TII->hasLoadFromStackSlot(MI, MMO, FI) && |
2707 | 3.19k | MFI.isSpillSlotObjectIndex(FI)) |
2708 | 823 | ++FoldedReloads; |
2709 | 8.24M | else if (8.24M TII->isStoreToStackSlot(MI, FI) && |
2710 | 41.7k | MFI.isSpillSlotObjectIndex(FI)) |
2711 | 32.0k | ++Spills; |
2712 | 8.21M | else if (8.21M TII->hasStoreToStackSlot(MI, MMO, FI) && |
2713 | 1.51k | MFI.isSpillSlotObjectIndex(FI)) |
2714 | 78 | ++FoldedSpills; |
2715 | 1.65M | } |
2716 | 364k | |
2717 | 364k | if (Reloads || 364k FoldedReloads332k || Spills332k || FoldedSpills332k ) { |
2718 | 31.8k | using namespace ore; |
2719 | 31.8k | |
2720 | 31.8k | MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload", |
2721 | 31.8k | L->getStartLoc(), L->getHeader()); |
2722 | 31.8k | if (Spills) |
2723 | 11.9k | R << NV("NumSpills", Spills) << " spills "; |
2724 | 31.8k | if (FoldedSpills) |
2725 | 63 | R << NV("NumFoldedSpills", FoldedSpills) << " folded spills "; |
2726 | 31.8k | if (Reloads) |
2727 | 31.6k | R << NV("NumReloads", Reloads) << " reloads "; |
2728 | 31.8k | if (FoldedReloads) |
2729 | 510 | R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads "; |
2730 | 31.8k | ORE->emit(R << "generated in loop"); |
2731 | 31.8k | } |
2732 | 364k | } |
2733 | | |
2734 | 587k | bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { |
2735 | 587k | DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" |
2736 | 587k | << "********** Function: " << mf.getName() << '\n'); |
2737 | 587k | |
2738 | 587k | MF = &mf; |
2739 | 587k | TRI = MF->getSubtarget().getRegisterInfo(); |
2740 | 587k | TII = MF->getSubtarget().getInstrInfo(); |
2741 | 587k | RCI.runOnMachineFunction(mf); |
2742 | 587k | |
2743 | 587k | EnableLocalReassign = EnableLocalReassignment || |
2744 | 587k | MF->getSubtarget().enableRALocalReassignment( |
2745 | 587k | MF->getTarget().getOptLevel()); |
2746 | 587k | |
2747 | 587k | if (VerifyEnabled) |
2748 | 5 | MF->verify(this, "Before greedy register allocator"); |
2749 | 587k | |
2750 | 587k | RegAllocBase::init(getAnalysis<VirtRegMap>(), |
2751 | 587k | getAnalysis<LiveIntervals>(), |
2752 | 587k | getAnalysis<LiveRegMatrix>()); |
2753 | 587k | Indexes = &getAnalysis<SlotIndexes>(); |
2754 | 587k | MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); |
2755 | 587k | DomTree = &getAnalysis<MachineDominatorTree>(); |
2756 | 587k | ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); |
2757 | 587k | SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); |
2758 | 587k | Loops = &getAnalysis<MachineLoopInfo>(); |
2759 | 587k | Bundles = &getAnalysis<EdgeBundles>(); |
2760 | 587k | SpillPlacer = &getAnalysis<SpillPlacement>(); |
2761 | 587k | DebugVars = &getAnalysis<LiveDebugVariables>(); |
2762 | 587k | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
2763 | 587k | |
2764 | 587k | initializeCSRCost(); |
2765 | 587k | |
2766 | 587k | calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI); |
2767 | 587k | |
2768 | 587k | DEBUG(LIS->dump()); |
2769 | 587k | |
2770 | 587k | SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); |
2771 | 587k | SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI)); |
2772 | 587k | ExtraRegInfo.clear(); |
2773 | 587k | ExtraRegInfo.resize(MRI->getNumVirtRegs()); |
2774 | 587k | NextCascade = 1; |
2775 | 587k | IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); |
2776 | 587k | GlobalCand.resize(32); // This will grow as needed. |
2777 | 587k | SetOfBrokenHints.clear(); |
2778 | 587k | |
2779 | 587k | allocatePhysRegs(); |
2780 | 587k | tryHintsRecoloring(); |
2781 | 587k | postOptimization(); |
2782 | 587k | reportNumberOfSplillsReloads(); |
2783 | 587k | |
2784 | 587k | releaseMemory(); |
2785 | 587k | return true; |
2786 | 587k | } |