/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/RegisterCoalescer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file implements the generic RegisterCoalescer interface which |
11 | | // is used as the common interface used by all clients and |
12 | | // implementations of register coalescing. |
13 | | // |
14 | | //===----------------------------------------------------------------------===// |
15 | | |
16 | | #include "RegisterCoalescer.h" |
17 | | #include "llvm/ADT/ArrayRef.h" |
18 | | #include "llvm/ADT/BitVector.h" |
19 | | #include "llvm/ADT/STLExtras.h" |
20 | | #include "llvm/ADT/SmallPtrSet.h" |
21 | | #include "llvm/ADT/SmallVector.h" |
22 | | #include "llvm/ADT/Statistic.h" |
23 | | #include "llvm/Analysis/AliasAnalysis.h" |
24 | | #include "llvm/CodeGen/LiveInterval.h" |
25 | | #include "llvm/CodeGen/LiveIntervalAnalysis.h" |
26 | | #include "llvm/CodeGen/LiveRangeEdit.h" |
27 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
28 | | #include "llvm/CodeGen/MachineFunction.h" |
29 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
30 | | #include "llvm/CodeGen/MachineInstr.h" |
31 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
32 | | #include "llvm/CodeGen/MachineLoopInfo.h" |
33 | | #include "llvm/CodeGen/MachineOperand.h" |
34 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
35 | | #include "llvm/CodeGen/Passes.h" |
36 | | #include "llvm/CodeGen/RegisterClassInfo.h" |
37 | | #include "llvm/CodeGen/SlotIndexes.h" |
38 | | #include "llvm/IR/DebugLoc.h" |
39 | | #include "llvm/Pass.h" |
40 | | #include "llvm/MC/LaneBitmask.h" |
41 | | #include "llvm/MC/MCInstrDesc.h" |
42 | | #include "llvm/MC/MCRegisterInfo.h" |
43 | | #include "llvm/Support/CommandLine.h" |
44 | | #include "llvm/Support/Compiler.h" |
45 | | #include "llvm/Support/Debug.h" |
46 | | #include "llvm/Support/ErrorHandling.h" |
47 | | #include "llvm/Support/raw_ostream.h" |
48 | | #include "llvm/Target/TargetInstrInfo.h" |
49 | | #include "llvm/Target/TargetOpcodes.h" |
50 | | #include "llvm/Target/TargetRegisterInfo.h" |
51 | | #include "llvm/Target/TargetSubtargetInfo.h" |
52 | | #include <algorithm> |
53 | | #include <cassert> |
54 | | #include <iterator> |
55 | | #include <limits> |
56 | | #include <tuple> |
57 | | #include <utility> |
58 | | #include <vector> |
59 | | |
60 | | using namespace llvm; |
61 | | |
62 | | #define DEBUG_TYPE "regalloc" |
63 | | |
64 | | STATISTIC(numJoins , "Number of interval joins performed"); |
65 | | STATISTIC(numCrossRCs , "Number of cross class joins performed"); |
66 | | STATISTIC(numCommutes , "Number of instruction commuting performed"); |
67 | | STATISTIC(numExtends , "Number of copies extended"); |
68 | | STATISTIC(NumReMats , "Number of instructions re-materialized"); |
69 | | STATISTIC(NumInflated , "Number of register classes inflated"); |
70 | | STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested"); |
71 | | STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved"); |
72 | | |
73 | | static cl::opt<bool> |
74 | | EnableJoining("join-liveintervals", |
75 | | cl::desc("Coalesce copies (default=true)"), |
76 | | cl::init(true)); |
77 | | |
78 | | static cl::opt<bool> UseTerminalRule("terminal-rule", |
79 | | cl::desc("Apply the terminal rule"), |
80 | | cl::init(false), cl::Hidden); |
81 | | |
82 | | /// Temporary flag to test critical edge unsplitting. |
83 | | static cl::opt<bool> |
84 | | EnableJoinSplits("join-splitedges", |
85 | | cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden); |
86 | | |
87 | | /// Temporary flag to test global copy optimization. |
88 | | static cl::opt<cl::boolOrDefault> |
89 | | EnableGlobalCopies("join-globalcopies", |
90 | | cl::desc("Coalesce copies that span blocks (default=subtarget)"), |
91 | | cl::init(cl::BOU_UNSET), cl::Hidden); |
92 | | |
93 | | static cl::opt<bool> |
94 | | VerifyCoalescing("verify-coalescing", |
95 | | cl::desc("Verify machine instrs before and after register coalescing"), |
96 | | cl::Hidden); |
97 | | |
98 | | namespace { |
99 | | |
100 | | class RegisterCoalescer : public MachineFunctionPass, |
101 | | private LiveRangeEdit::Delegate { |
102 | | MachineFunction* MF; |
103 | | MachineRegisterInfo* MRI; |
104 | | const TargetRegisterInfo* TRI; |
105 | | const TargetInstrInfo* TII; |
106 | | LiveIntervals *LIS; |
107 | | const MachineLoopInfo* Loops; |
108 | | AliasAnalysis *AA; |
109 | | RegisterClassInfo RegClassInfo; |
110 | | |
111 | | /// A LaneMask to remember on which subregister live ranges we need to call |
112 | | /// shrinkToUses() later. |
113 | | LaneBitmask ShrinkMask; |
114 | | |
115 | | /// True if the main range of the currently coalesced intervals should be |
116 | | /// checked for smaller live intervals. |
117 | | bool ShrinkMainRange; |
118 | | |
119 | | /// \brief True if the coalescer should aggressively coalesce global copies |
120 | | /// in favor of keeping local copies. |
121 | | bool JoinGlobalCopies; |
122 | | |
123 | | /// \brief True if the coalescer should aggressively coalesce fall-thru |
124 | | /// blocks exclusively containing copies. |
125 | | bool JoinSplitEdges; |
126 | | |
127 | | /// Copy instructions yet to be coalesced. |
128 | | SmallVector<MachineInstr*, 8> WorkList; |
129 | | SmallVector<MachineInstr*, 8> LocalWorkList; |
130 | | |
131 | | /// Set of instruction pointers that have been erased, and |
132 | | /// that may be present in WorkList. |
133 | | SmallPtrSet<MachineInstr*, 8> ErasedInstrs; |
134 | | |
135 | | /// Dead instructions that are about to be deleted. |
136 | | SmallVector<MachineInstr*, 8> DeadDefs; |
137 | | |
138 | | /// Virtual registers to be considered for register class inflation. |
139 | | SmallVector<unsigned, 8> InflateRegs; |
140 | | |
141 | | /// Recursively eliminate dead defs in DeadDefs. |
142 | | void eliminateDeadDefs(); |
143 | | |
144 | | /// LiveRangeEdit callback for eliminateDeadDefs(). |
145 | | void LRE_WillEraseInstruction(MachineInstr *MI) override; |
146 | | |
147 | | /// Coalesce the LocalWorkList. |
148 | | void coalesceLocals(); |
149 | | |
150 | | /// Join compatible live intervals |
151 | | void joinAllIntervals(); |
152 | | |
153 | | /// Coalesce copies in the specified MBB, putting |
154 | | /// copies that cannot yet be coalesced into WorkList. |
155 | | void copyCoalesceInMBB(MachineBasicBlock *MBB); |
156 | | |
157 | | /// Tries to coalesce all copies in CurrList. Returns true if any progress |
158 | | /// was made. |
159 | | bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList); |
160 | | |
161 | | /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the |
162 | | /// src/dst of the copy instruction CopyMI. This returns true if the copy |
163 | | /// was successfully coalesced away. If it is not currently possible to |
164 | | /// coalesce this interval, but it may be possible if other things get |
165 | | /// coalesced, then it returns true by reference in 'Again'. |
166 | | bool joinCopy(MachineInstr *TheCopy, bool &Again); |
167 | | |
168 | | /// Attempt to join these two intervals. On failure, this |
169 | | /// returns false. The output "SrcInt" will not have been modified, so we |
170 | | /// can use this information below to update aliases. |
171 | | bool joinIntervals(CoalescerPair &CP); |
172 | | |
173 | | /// Attempt joining two virtual registers. Return true on success. |
174 | | bool joinVirtRegs(CoalescerPair &CP); |
175 | | |
176 | | /// Attempt joining with a reserved physreg. |
177 | | bool joinReservedPhysReg(CoalescerPair &CP); |
178 | | |
179 | | /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI. |
180 | | /// Subranges in @p LI which only partially interfere with the desired |
181 | | /// LaneMask are split as necessary. @p LaneMask are the lanes that |
182 | | /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange |
183 | | /// lanemasks already adjusted to the coalesced register. |
184 | | void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, |
185 | | LaneBitmask LaneMask, CoalescerPair &CP); |
186 | | |
187 | | /// Join the liveranges of two subregisters. Joins @p RRange into |
188 | | /// @p LRange, @p RRange may be invalid afterwards. |
189 | | void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, |
190 | | LaneBitmask LaneMask, const CoalescerPair &CP); |
191 | | |
192 | | /// We found a non-trivially-coalescable copy. If the source value number is |
193 | | /// defined by a copy from the destination reg see if we can merge these two |
194 | | /// destination reg valno# into a single value number, eliminating a copy. |
195 | | /// This returns true if an interval was modified. |
196 | | bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); |
197 | | |
198 | | /// Return true if there are definitions of IntB |
199 | | /// other than BValNo val# that can reach uses of AValno val# of IntA. |
200 | | bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, |
201 | | VNInfo *AValNo, VNInfo *BValNo); |
202 | | |
203 | | /// We found a non-trivially-coalescable copy. |
204 | | /// If the source value number is defined by a commutable instruction and |
205 | | /// its other operand is coalesced to the copy dest register, see if we |
206 | | /// can transform the copy into a noop by commuting the definition. |
207 | | /// This returns true if an interval was modified. |
208 | | bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); |
209 | | |
210 | | /// We found a copy which can be moved to its less frequent predecessor. |
211 | | bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI); |
212 | | |
213 | | /// If the source of a copy is defined by a |
214 | | /// trivial computation, replace the copy by rematerialize the definition. |
215 | | bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI, |
216 | | bool &IsDefCopy); |
217 | | |
218 | | /// Return true if a copy involving a physreg should be joined. |
219 | | bool canJoinPhys(const CoalescerPair &CP); |
220 | | |
221 | | /// Replace all defs and uses of SrcReg to DstReg and update the subregister |
222 | | /// number if it is not zero. If DstReg is a physical register and the |
223 | | /// existing subregister number of the def / use being updated is not zero, |
224 | | /// make sure to set it to the correct physical subregister. |
225 | | void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); |
226 | | |
227 | | /// If the given machine operand reads only undefined lanes add an undef |
228 | | /// flag. |
229 | | /// This can happen when undef uses were previously concealed by a copy |
230 | | /// which we coalesced. Example: |
231 | | /// %vreg0:sub0<def,read-undef> = ... |
232 | | /// %vreg1 = COPY %vreg0 <-- Coalescing COPY reveals undef |
233 | | /// = use %vreg1:sub1 <-- hidden undef use |
234 | | void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, |
235 | | MachineOperand &MO, unsigned SubRegIdx); |
236 | | |
237 | | /// Handle copies of undef values. |
238 | | /// Returns true if @p CopyMI was a copy of an undef value and eliminated. |
239 | | bool eliminateUndefCopy(MachineInstr *CopyMI); |
240 | | |
241 | | /// Check whether or not we should apply the terminal rule on the |
242 | | /// destination (Dst) of \p Copy. |
243 | | /// When the terminal rule applies, Copy is not profitable to |
244 | | /// coalesce. |
245 | | /// Dst is terminal if it has exactly one affinity (Dst, Src) and |
246 | | /// at least one interference (Dst, Dst2). If Dst is terminal, the |
247 | | /// terminal rule consists in checking that at least one of |
248 | | /// interfering node, say Dst2, has an affinity of equal or greater |
249 | | /// weight with Src. |
250 | | /// In that case, Dst2 and Dst will not be able to be both coalesced |
251 | | /// with Src. Since Dst2 exposes more coalescing opportunities than |
252 | | /// Dst, we can drop \p Copy. |
253 | | bool applyTerminalRule(const MachineInstr &Copy) const; |
254 | | |
255 | | /// Wrapper method for \see LiveIntervals::shrinkToUses. |
256 | | /// This method does the proper fixing of the live-ranges when the afore |
257 | | /// mentioned method returns true. |
258 | | void shrinkToUses(LiveInterval *LI, |
259 | 732k | SmallVectorImpl<MachineInstr * > *Dead = nullptr) { |
260 | 732k | if (LIS->shrinkToUses(LI, Dead)732k ) { |
261 | 112 | /// Check whether or not \p LI is composed by multiple connected |
262 | 112 | /// components and if that is the case, fix that. |
263 | 112 | SmallVector<LiveInterval*, 8> SplitLIs; |
264 | 112 | LIS->splitSeparateComponents(*LI, SplitLIs); |
265 | 112 | } |
266 | 732k | } |
267 | | |
268 | | /// Wrapper Method to do all the necessary work when an Instruction is |
269 | | /// deleted. |
270 | | /// Optimizations should use this to make sure that deleted instructions |
271 | | /// are always accounted for. |
272 | 1.03M | void deleteInstr(MachineInstr* MI) { |
273 | 1.03M | ErasedInstrs.insert(MI); |
274 | 1.03M | LIS->RemoveMachineInstrFromMaps(*MI); |
275 | 1.03M | MI->eraseFromParent(); |
276 | 1.03M | } |
277 | | |
278 | | public: |
279 | | static char ID; ///< Class identification, replacement for typeinfo |
280 | | |
281 | 32.0k | RegisterCoalescer() : MachineFunctionPass(ID) { |
282 | 32.0k | initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); |
283 | 32.0k | } |
284 | | |
285 | | void getAnalysisUsage(AnalysisUsage &AU) const override; |
286 | | |
287 | | void releaseMemory() override; |
288 | | |
289 | | /// This is the pass entry point. |
290 | | bool runOnMachineFunction(MachineFunction&) override; |
291 | | |
292 | | /// Implement the dump method. |
293 | | void print(raw_ostream &O, const Module* = nullptr) const override; |
294 | | }; |
295 | | |
296 | | } // end anonymous namespace |
297 | | |
298 | | char RegisterCoalescer::ID = 0; |
299 | | |
300 | | char &llvm::RegisterCoalescerID = RegisterCoalescer::ID; |
301 | | |
302 | 36.7k | INITIALIZE_PASS_BEGIN36.7k (RegisterCoalescer, "simple-register-coalescing",
|
303 | 36.7k | "Simple Register Coalescing", false, false) |
304 | 36.7k | INITIALIZE_PASS_DEPENDENCY(LiveIntervals) |
305 | 36.7k | INITIALIZE_PASS_DEPENDENCY(SlotIndexes) |
306 | 36.7k | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) |
307 | 36.7k | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
308 | 36.7k | INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", |
309 | | "Simple Register Coalescing", false, false) |
310 | | |
311 | | static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, |
312 | | unsigned &Src, unsigned &Dst, |
313 | 35.1M | unsigned &SrcSub, unsigned &DstSub) { |
314 | 35.1M | if (MI->isCopy()35.1M ) { |
315 | 30.5M | Dst = MI->getOperand(0).getReg(); |
316 | 30.5M | DstSub = MI->getOperand(0).getSubReg(); |
317 | 30.5M | Src = MI->getOperand(1).getReg(); |
318 | 30.5M | SrcSub = MI->getOperand(1).getSubReg(); |
319 | 35.1M | } else if (4.53M MI->isSubregToReg()4.53M ) { |
320 | 1.70M | Dst = MI->getOperand(0).getReg(); |
321 | 1.70M | DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(), |
322 | 1.70M | MI->getOperand(3).getImm()); |
323 | 1.70M | Src = MI->getOperand(2).getReg(); |
324 | 1.70M | SrcSub = MI->getOperand(2).getSubReg(); |
325 | 1.70M | } else |
326 | 2.82M | return false; |
327 | 32.2M | return true; |
328 | 32.2M | } |
329 | | |
330 | | /// Return true if this block should be vacated by the coalescer to eliminate |
331 | | /// branches. The important cases to handle in the coalescer are critical edges |
332 | | /// split during phi elimination which contain only copies. Simple blocks that |
333 | | /// contain non-branches should also be vacated, but this can be handled by an |
334 | | /// earlier pass similar to early if-conversion. |
335 | 0 | static bool isSplitEdge(const MachineBasicBlock *MBB) { |
336 | 0 | if (MBB->pred_size() != 1 || 0 MBB->succ_size() != 10 ) |
337 | 0 | return false; |
338 | 0 |
|
339 | 0 | for (const auto &MI : *MBB) 0 { |
340 | 0 | if (!MI.isCopyLike() && 0 !MI.isUnconditionalBranch()0 ) |
341 | 0 | return false; |
342 | 0 | } |
343 | 0 | return true; |
344 | 0 | } |
345 | | |
346 | 18.5M | bool CoalescerPair::setRegisters(const MachineInstr *MI) { |
347 | 18.5M | SrcReg = DstReg = 0; |
348 | 18.5M | SrcIdx = DstIdx = 0; |
349 | 18.5M | NewRC = nullptr; |
350 | 18.5M | Flipped = CrossClass = false; |
351 | 18.5M | |
352 | 18.5M | unsigned Src, Dst, SrcSub, DstSub; |
353 | 18.5M | if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) |
354 | 137 | return false; |
355 | 18.5M | Partial = SrcSub || 18.5M DstSub17.9M ; |
356 | 18.5M | |
357 | 18.5M | // If one register is a physreg, it must be Dst. |
358 | 18.5M | if (TargetRegisterInfo::isPhysicalRegister(Src)18.5M ) { |
359 | 3.97M | if (TargetRegisterInfo::isPhysicalRegister(Dst)) |
360 | 294k | return false; |
361 | 3.67M | std::swap(Src, Dst); |
362 | 3.67M | std::swap(SrcSub, DstSub); |
363 | 3.67M | Flipped = true; |
364 | 3.67M | } |
365 | 18.5M | |
366 | 18.2M | const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); |
367 | 18.2M | |
368 | 18.2M | if (TargetRegisterInfo::isPhysicalRegister(Dst)18.2M ) { |
369 | 11.8M | // Eliminate DstSub on a physreg. |
370 | 11.8M | if (DstSub11.8M ) { |
371 | 2 | Dst = TRI.getSubReg(Dst, DstSub); |
372 | 2 | if (!Dst2 ) return false0 ; |
373 | 2 | DstSub = 0; |
374 | 2 | } |
375 | 11.8M | |
376 | 11.8M | // Eliminate SrcSub by picking a corresponding Dst superregister. |
377 | 11.8M | if (11.8M SrcSub11.8M ) { |
378 | 85.5k | Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); |
379 | 85.5k | if (!Dst85.5k ) return false11.8k ; |
380 | 11.8M | } else if (11.8M !MRI.getRegClass(Src)->contains(Dst)11.8M ) { |
381 | 138k | return false; |
382 | 138k | } |
383 | 6.33M | } else { |
384 | 6.33M | // Both registers are virtual. |
385 | 6.33M | const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); |
386 | 6.33M | const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); |
387 | 6.33M | |
388 | 6.33M | // Both registers have subreg indices. |
389 | 6.33M | if (SrcSub && 6.33M DstSub554k ) { |
390 | 107k | // Copies between different sub-registers are never coalescable. |
391 | 107k | if (Src == Dst && 107k SrcSub != DstSub3.11k ) |
392 | 3.11k | return false; |
393 | 104k | |
394 | 104k | NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub, |
395 | 104k | SrcIdx, DstIdx); |
396 | 104k | if (!NewRC) |
397 | 538 | return false; |
398 | 6.22M | } else if (6.22M DstSub6.22M ) { |
399 | 805k | // SrcReg will be merged with a sub-register of DstReg. |
400 | 805k | SrcIdx = DstSub; |
401 | 805k | NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); |
402 | 6.22M | } else if (5.42M SrcSub5.42M ) { |
403 | 447k | // DstReg will be merged with a sub-register of SrcReg. |
404 | 447k | DstIdx = SrcSub; |
405 | 447k | NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub); |
406 | 5.42M | } else { |
407 | 4.97M | // This is a straight copy without sub-registers. |
408 | 4.97M | NewRC = TRI.getCommonSubClass(DstRC, SrcRC); |
409 | 4.97M | } |
410 | 6.33M | |
411 | 6.33M | // The combined constraint may be impossible to satisfy. |
412 | 6.33M | if (6.33M !NewRC6.33M ) |
413 | 76.2k | return false; |
414 | 6.25M | |
415 | 6.25M | // Prefer SrcReg to be a sub-register of DstReg. |
416 | 6.25M | // FIXME: Coalescer should support subregs symmetrically. |
417 | 6.25M | if (6.25M DstIdx && 6.25M !SrcIdx435k ) { |
418 | 434k | std::swap(Src, Dst); |
419 | 434k | std::swap(SrcIdx, DstIdx); |
420 | 434k | Flipped = !Flipped; |
421 | 434k | } |
422 | 6.25M | |
423 | 3.52M | CrossClass = NewRC != DstRC || NewRC != SrcRC; |
424 | 6.33M | } |
425 | 18.2M | // Check our invariants |
426 | 18.0M | assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); |
427 | 18.0M | assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && |
428 | 18.0M | "Cannot have a physical SubIdx"); |
429 | 18.0M | SrcReg = Src; |
430 | 18.0M | DstReg = Dst; |
431 | 18.0M | return true; |
432 | 18.5M | } |
433 | | |
434 | 1.49M | bool CoalescerPair::flip() { |
435 | 1.49M | if (TargetRegisterInfo::isPhysicalRegister(DstReg)) |
436 | 0 | return false; |
437 | 1.49M | std::swap(SrcReg, DstReg); |
438 | 1.49M | std::swap(SrcIdx, DstIdx); |
439 | 1.49M | Flipped = !Flipped; |
440 | 1.49M | return true; |
441 | 1.49M | } |
442 | | |
443 | 10.4M | bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { |
444 | 10.4M | if (!MI) |
445 | 101k | return false; |
446 | 10.3M | unsigned Src, Dst, SrcSub, DstSub; |
447 | 10.3M | if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) |
448 | 2.82M | return false; |
449 | 7.51M | |
450 | 7.51M | // Find the virtual register that is SrcReg. |
451 | 7.51M | if (7.51M Dst == SrcReg7.51M ) { |
452 | 2.29M | std::swap(Src, Dst); |
453 | 2.29M | std::swap(SrcSub, DstSub); |
454 | 7.51M | } else if (5.22M Src != SrcReg5.22M ) { |
455 | 274k | return false; |
456 | 274k | } |
457 | 7.23M | |
458 | 7.23M | // Now check that Dst matches DstReg. |
459 | 7.23M | if (7.23M TargetRegisterInfo::isPhysicalRegister(DstReg)7.23M ) { |
460 | 331k | if (!TargetRegisterInfo::isPhysicalRegister(Dst)) |
461 | 3.09k | return false; |
462 | 331k | assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state."); |
463 | 328k | // DstSub could be set for a physreg from INSERT_SUBREG. |
464 | 328k | if (DstSub) |
465 | 0 | Dst = TRI.getSubReg(Dst, DstSub); |
466 | 328k | // Full copy of Src. |
467 | 328k | if (!SrcSub) |
468 | 315k | return DstReg == Dst; |
469 | 13.2k | // This is a partial register copy. Check that the parts match. |
470 | 13.2k | return TRI.getSubReg(DstReg, SrcSub) == Dst; |
471 | 0 | } else { |
472 | 6.90M | // DstReg is virtual. |
473 | 6.90M | if (DstReg != Dst) |
474 | 173k | return false; |
475 | 6.73M | // Registers match, do the subregisters line up? |
476 | 6.73M | return TRI.composeSubRegIndices(SrcIdx, SrcSub) == |
477 | 6.73M | TRI.composeSubRegIndices(DstIdx, DstSub); |
478 | 6.73M | } |
479 | 10.4M | } |
480 | | |
481 | 32.0k | void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { |
482 | 32.0k | AU.setPreservesCFG(); |
483 | 32.0k | AU.addRequired<AAResultsWrapperPass>(); |
484 | 32.0k | AU.addRequired<LiveIntervals>(); |
485 | 32.0k | AU.addPreserved<LiveIntervals>(); |
486 | 32.0k | AU.addPreserved<SlotIndexes>(); |
487 | 32.0k | AU.addRequired<MachineLoopInfo>(); |
488 | 32.0k | AU.addPreserved<MachineLoopInfo>(); |
489 | 32.0k | AU.addPreservedID(MachineDominatorsID); |
490 | 32.0k | MachineFunctionPass::getAnalysisUsage(AU); |
491 | 32.0k | } |
492 | | |
493 | 494k | void RegisterCoalescer::eliminateDeadDefs() { |
494 | 494k | SmallVector<unsigned, 8> NewRegs; |
495 | 494k | LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, |
496 | 494k | nullptr, this).eliminateDeadDefs(DeadDefs); |
497 | 494k | } |
498 | | |
499 | 494k | void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { |
500 | 494k | // MI may be in WorkList. Make sure we don't visit it. |
501 | 494k | ErasedInstrs.insert(MI); |
502 | 494k | } |
503 | | |
504 | | bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, |
505 | 390k | MachineInstr *CopyMI) { |
506 | 390k | assert(!CP.isPartial() && "This doesn't work for partial copies."); |
507 | 390k | assert(!CP.isPhys() && "This doesn't work for physreg copies."); |
508 | 390k | |
509 | 390k | LiveInterval &IntA = |
510 | 390k | LIS->getInterval(CP.isFlipped() ? CP.getDstReg()109k : CP.getSrcReg()280k ); |
511 | 390k | LiveInterval &IntB = |
512 | 390k | LIS->getInterval(CP.isFlipped() ? CP.getSrcReg()109k : CP.getDstReg()280k ); |
513 | 390k | SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); |
514 | 390k | |
515 | 390k | // We have a non-trivially-coalescable copy with IntA being the source and |
516 | 390k | // IntB being the dest, thus this defines a value number in IntB. If the |
517 | 390k | // source value number (in IntA) is defined by a copy from B, see if we can |
518 | 390k | // merge these two pieces of B into a single value number, eliminating a copy. |
519 | 390k | // For example: |
520 | 390k | // |
521 | 390k | // A3 = B0 |
522 | 390k | // ... |
523 | 390k | // B1 = A3 <- this copy |
524 | 390k | // |
525 | 390k | // In this case, B0 can be extended to where the B1 copy lives, allowing the |
526 | 390k | // B1 value number to be replaced with B0 (which simplifies the B |
527 | 390k | // liveinterval). |
528 | 390k | |
529 | 390k | // BValNo is a value number in B that is defined by a copy from A. 'B1' in |
530 | 390k | // the example above. |
531 | 390k | LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx); |
532 | 390k | if (BS == IntB.end()390k ) return false0 ; |
533 | 390k | VNInfo *BValNo = BS->valno; |
534 | 390k | |
535 | 390k | // Get the location that B is defined at. Two options: either this value has |
536 | 390k | // an unknown definition point or it is defined at CopyIdx. If unknown, we |
537 | 390k | // can't process it. |
538 | 390k | if (BValNo->def != CopyIdx390k ) return false0 ; |
539 | 390k | |
540 | 390k | // AValNo is the value number in A that defines the copy, A3 in the example. |
541 | 390k | SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); |
542 | 390k | LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx); |
543 | 390k | // The live segment might not exist after fun with physreg coalescing. |
544 | 390k | if (AS == IntA.end()390k ) return false0 ; |
545 | 390k | VNInfo *AValNo = AS->valno; |
546 | 390k | |
547 | 390k | // If AValNo is defined as a copy from IntB, we can potentially process this. |
548 | 390k | // Get the instruction that defines this value number. |
549 | 390k | MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def); |
550 | 390k | // Don't allow any partial copies, even if isCoalescable() allows them. |
551 | 390k | if (!CP.isCoalescable(ACopyMI) || 390k !ACopyMI->isFullCopy()736 ) |
552 | 390k | return false; |
553 | 736 | |
554 | 736 | // Get the Segment in IntB that this value number starts with. |
555 | 736 | LiveInterval::iterator ValS = |
556 | 736 | IntB.FindSegmentContaining(AValNo->def.getPrevSlot()); |
557 | 736 | if (ValS == IntB.end()) |
558 | 0 | return false; |
559 | 736 | |
560 | 736 | // Make sure that the end of the live segment is inside the same block as |
561 | 736 | // CopyMI. |
562 | 736 | MachineInstr *ValSEndInst = |
563 | 736 | LIS->getInstructionFromIndex(ValS->end.getPrevSlot()); |
564 | 736 | if (!ValSEndInst || 736 ValSEndInst->getParent() != CopyMI->getParent()736 ) |
565 | 659 | return false; |
566 | 77 | |
567 | 77 | // Okay, we now know that ValS ends in the same block that the CopyMI |
568 | 77 | // live-range starts. If there are no intervening live segments between them |
569 | 77 | // in IntB, we can merge them. |
570 | 77 | if (77 ValS+1 != BS77 ) return false0 ; |
571 | 77 | |
572 | 77 | DEBUG77 (dbgs() << "Extending: " << PrintReg(IntB.reg, TRI)); |
573 | 77 | |
574 | 77 | SlotIndex FillerStart = ValS->end, FillerEnd = BS->start; |
575 | 77 | // We are about to delete CopyMI, so need to remove it as the 'instruction |
576 | 77 | // that defines this value #'. Update the valnum with the new defining |
577 | 77 | // instruction #. |
578 | 77 | BValNo->def = FillerStart; |
579 | 77 | |
580 | 77 | // Okay, we can merge them. We need to insert a new liverange: |
581 | 77 | // [ValS.end, BS.begin) of either value number, then we merge the |
582 | 77 | // two value numbers. |
583 | 77 | IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo)); |
584 | 77 | |
585 | 77 | // Okay, merge "B1" into the same value number as "B0". |
586 | 77 | if (BValNo != ValS->valno) |
587 | 77 | IntB.MergeValueNumberInto(BValNo, ValS->valno); |
588 | 77 | |
589 | 77 | // Do the same for the subregister segments. |
590 | 0 | for (LiveInterval::SubRange &S : IntB.subranges()) { |
591 | 0 | VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx); |
592 | 0 | S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo)); |
593 | 0 | VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot()); |
594 | 0 | if (SubBValNo != SubValSNo) |
595 | 0 | S.MergeValueNumberInto(SubBValNo, SubValSNo); |
596 | 0 | } |
597 | 77 | |
598 | 77 | DEBUG(dbgs() << " result = " << IntB << '\n'); |
599 | 77 | |
600 | 77 | // If the source instruction was killing the source register before the |
601 | 77 | // merge, unset the isKill marker given the live range has been extended. |
602 | 77 | int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true); |
603 | 77 | if (UIdx != -177 ) { |
604 | 0 | ValSEndInst->getOperand(UIdx).setIsKill(false); |
605 | 0 | } |
606 | 77 | |
607 | 77 | // Rewrite the copy. If the copy instruction was killing the destination |
608 | 77 | // register before the merge, find the last use and trim the live range. That |
609 | 77 | // will also add the isKill marker. |
610 | 77 | CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); |
611 | 77 | if (AS->end == CopyIdx) |
612 | 0 | shrinkToUses(&IntA); |
613 | 390k | |
614 | 390k | ++numExtends; |
615 | 390k | return true; |
616 | 390k | } |
617 | | |
618 | | bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, |
619 | | LiveInterval &IntB, |
620 | | VNInfo *AValNo, |
621 | 365 | VNInfo *BValNo) { |
622 | 365 | // If AValNo has PHI kills, conservatively assume that IntB defs can reach |
623 | 365 | // the PHI values. |
624 | 365 | if (LIS->hasPHIKill(IntA, AValNo)) |
625 | 4 | return true; |
626 | 361 | |
627 | 361 | for (LiveRange::Segment &ASeg : IntA.segments) 361 { |
628 | 2.00k | if (ASeg.valno != AValNo2.00k ) continue1.62k ; |
629 | 380 | LiveInterval::iterator BI = |
630 | 380 | std::upper_bound(IntB.begin(), IntB.end(), ASeg.start); |
631 | 380 | if (BI != IntB.begin()) |
632 | 380 | --BI; |
633 | 1.12k | for (; BI != IntB.end() && 1.12k ASeg.end >= BI->start1.05k ; ++BI742 ) { |
634 | 749 | if (BI->valno == BValNo) |
635 | 380 | continue; |
636 | 369 | if (369 BI->start <= ASeg.start && 369 BI->end > ASeg.start361 ) |
637 | 0 | return true; |
638 | 369 | if (369 BI->start > ASeg.start && 369 BI->start < ASeg.end8 ) |
639 | 7 | return true; |
640 | 749 | } |
641 | 2.00k | } |
642 | 354 | return false; |
643 | 365 | } |
644 | | |
645 | | /// Copy segements with value number @p SrcValNo from liverange @p Src to live |
646 | | /// range @Dst and use value number @p DstValNo there. |
647 | | static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, |
648 | 352 | const LiveRange &Src, const VNInfo *SrcValNo) { |
649 | 1.96k | for (const LiveRange::Segment &S : Src.segments) { |
650 | 1.96k | if (S.valno != SrcValNo) |
651 | 1.59k | continue; |
652 | 371 | Dst.addSegment(LiveRange::Segment(S.start, S.end, DstValNo)); |
653 | 371 | } |
654 | 352 | } |
655 | | |
656 | | bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, |
657 | 390k | MachineInstr *CopyMI) { |
658 | 390k | assert(!CP.isPhys()); |
659 | 390k | |
660 | 390k | LiveInterval &IntA = |
661 | 390k | LIS->getInterval(CP.isFlipped() ? CP.getDstReg()109k : CP.getSrcReg()280k ); |
662 | 390k | LiveInterval &IntB = |
663 | 390k | LIS->getInterval(CP.isFlipped() ? CP.getSrcReg()109k : CP.getDstReg()280k ); |
664 | 390k | |
665 | 390k | // We found a non-trivially-coalescable copy with IntA being the source and |
666 | 390k | // IntB being the dest, thus this defines a value number in IntB. If the |
667 | 390k | // source value number (in IntA) is defined by a commutable instruction and |
668 | 390k | // its other operand is coalesced to the copy dest register, see if we can |
669 | 390k | // transform the copy into a noop by commuting the definition. For example, |
670 | 390k | // |
671 | 390k | // A3 = op A2 B0<kill> |
672 | 390k | // ... |
673 | 390k | // B1 = A3 <- this copy |
674 | 390k | // ... |
675 | 390k | // = op A3 <- more uses |
676 | 390k | // |
677 | 390k | // ==> |
678 | 390k | // |
679 | 390k | // B2 = op B0 A2<kill> |
680 | 390k | // ... |
681 | 390k | // B1 = B2 <- now an identity copy |
682 | 390k | // ... |
683 | 390k | // = op B2 <- more uses |
684 | 390k | |
685 | 390k | // BValNo is a value number in B that is defined by a copy from A. 'B1' in |
686 | 390k | // the example above. |
687 | 390k | SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); |
688 | 390k | VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); |
689 | 390k | assert(BValNo != nullptr && BValNo->def == CopyIdx); |
690 | 390k | |
691 | 390k | // AValNo is the value number in A that defines the copy, A3 in the example. |
692 | 390k | VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); |
693 | 390k | assert(AValNo && !AValNo->isUnused() && "COPY source not live"); |
694 | 390k | if (AValNo->isPHIDef()) |
695 | 101k | return false; |
696 | 288k | MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); |
697 | 288k | if (!DefMI) |
698 | 0 | return false; |
699 | 288k | if (288k !DefMI->isCommutable()288k ) |
700 | 281k | return false; |
701 | 7.03k | // If DefMI is a two-address instruction then commuting it will change the |
702 | 7.03k | // destination register. |
703 | 7.03k | int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); |
704 | 7.03k | assert(DefIdx != -1); |
705 | 7.03k | unsigned UseOpIdx; |
706 | 7.03k | if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) |
707 | 354 | return false; |
708 | 6.67k | |
709 | 6.67k | // FIXME: The code below tries to commute 'UseOpIdx' operand with some other |
710 | 6.67k | // commutable operand which is expressed by 'CommuteAnyOperandIndex'value |
711 | 6.67k | // passed to the method. That _other_ operand is chosen by |
712 | 6.67k | // the findCommutedOpIndices() method. |
713 | 6.67k | // |
714 | 6.67k | // That is obviously an area for improvement in case of instructions having |
715 | 6.67k | // more than 2 operands. For example, if some instruction has 3 commutable |
716 | 6.67k | // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3, |
717 | 6.67k | // op#2<->op#3) of commute transformation should be considered/tried here. |
718 | 6.67k | unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex; |
719 | 6.67k | if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx)) |
720 | 114 | return false; |
721 | 6.56k | |
722 | 6.56k | MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); |
723 | 6.56k | unsigned NewReg = NewDstMO.getReg(); |
724 | 6.56k | if (NewReg != IntB.reg || 6.56k !IntB.Query(AValNo->def).isKill()426 ) |
725 | 6.19k | return false; |
726 | 365 | |
727 | 365 | // Make sure there are no other definitions of IntB that would reach the |
728 | 365 | // uses which the new definition can reach. |
729 | 365 | if (365 hasOtherReachingDefs(IntA, IntB, AValNo, BValNo)365 ) |
730 | 11 | return false; |
731 | 354 | |
732 | 354 | // If some of the uses of IntA.reg is already coalesced away, return false. |
733 | 354 | // It's not possible to determine whether it's safe to perform the coalescing. |
734 | 354 | for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) 354 { |
735 | 1.70k | MachineInstr *UseMI = MO.getParent(); |
736 | 1.70k | unsigned OpNo = &MO - &UseMI->getOperand(0); |
737 | 1.70k | SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI); |
738 | 1.70k | LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); |
739 | 1.70k | if (US == IntA.end() || 1.70k US->valno != AValNo1.70k ) |
740 | 1.30k | continue; |
741 | 406 | // If this use is tied to a def, we can't rewrite the register. |
742 | 406 | if (406 UseMI->isRegTiedToDefOperand(OpNo)406 ) |
743 | 0 | return false; |
744 | 354 | } |
745 | 354 | |
746 | 354 | DEBUG354 (dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t' |
747 | 354 | << *DefMI); |
748 | 354 | |
749 | 354 | // At this point we have decided that it is legal to do this |
750 | 354 | // transformation. Start by commuting the instruction. |
751 | 354 | MachineBasicBlock *MBB = DefMI->getParent(); |
752 | 354 | MachineInstr *NewMI = |
753 | 354 | TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx); |
754 | 354 | if (!NewMI) |
755 | 2 | return false; |
756 | 352 | if (352 TargetRegisterInfo::isVirtualRegister(IntA.reg) && |
757 | 352 | TargetRegisterInfo::isVirtualRegister(IntB.reg) && |
758 | 352 | !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) |
759 | 0 | return false; |
760 | 352 | if (352 NewMI != DefMI352 ) { |
761 | 0 | LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI); |
762 | 0 | MachineBasicBlock::iterator Pos = DefMI; |
763 | 0 | MBB->insert(Pos, NewMI); |
764 | 0 | MBB->erase(DefMI); |
765 | 0 | } |
766 | 352 | |
767 | 352 | // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. |
768 | 352 | // A = or A, B |
769 | 352 | // ... |
770 | 352 | // B = A |
771 | 352 | // ... |
772 | 352 | // C = A<kill> |
773 | 352 | // ... |
774 | 352 | // = B |
775 | 352 | |
776 | 352 | // Update uses of IntA of the specific Val# with IntB. |
777 | 352 | for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg), |
778 | 352 | UE = MRI->use_end(); |
779 | 2.05k | UI != UE2.05k ; /* ++UI is below because of possible MI removal */) { |
780 | 1.70k | MachineOperand &UseMO = *UI; |
781 | 1.70k | ++UI; |
782 | 1.70k | if (UseMO.isUndef()) |
783 | 0 | continue; |
784 | 1.70k | MachineInstr *UseMI = UseMO.getParent(); |
785 | 1.70k | if (UseMI->isDebugValue()1.70k ) { |
786 | 0 | // FIXME These don't have an instruction index. Not clear we have enough |
787 | 0 | // info to decide whether to do this replacement or not. For now do it. |
788 | 0 | UseMO.setReg(NewReg); |
789 | 0 | continue; |
790 | 0 | } |
791 | 1.70k | SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true); |
792 | 1.70k | LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); |
793 | 1.70k | assert(US != IntA.end() && "Use must be live"); |
794 | 1.70k | if (US->valno != AValNo) |
795 | 1.29k | continue; |
796 | 402 | // Kill flags are no longer accurate. They are recomputed after RA. |
797 | 402 | UseMO.setIsKill(false); |
798 | 402 | if (TargetRegisterInfo::isPhysicalRegister(NewReg)) |
799 | 0 | UseMO.substPhysReg(NewReg, *TRI); |
800 | 402 | else |
801 | 402 | UseMO.setReg(NewReg); |
802 | 402 | if (UseMI == CopyMI) |
803 | 352 | continue; |
804 | 50 | if (50 !UseMI->isCopy()50 ) |
805 | 25 | continue; |
806 | 25 | if (25 UseMI->getOperand(0).getReg() != IntB.reg || |
807 | 1 | UseMI->getOperand(0).getSubReg()) |
808 | 24 | continue; |
809 | 1 | |
810 | 1 | // This copy will become a noop. If it's defining a new val#, merge it into |
811 | 1 | // BValNo. |
812 | 1 | SlotIndex DefIdx = UseIdx.getRegSlot(); |
813 | 1 | VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); |
814 | 1 | if (!DVNI) |
815 | 0 | continue; |
816 | 1 | DEBUG1 (dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); |
817 | 1 | assert(DVNI->def == DefIdx); |
818 | 1 | BValNo = IntB.MergeValueNumberInto(DVNI, BValNo); |
819 | 0 | for (LiveInterval::SubRange &S : IntB.subranges()) { |
820 | 0 | VNInfo *SubDVNI = S.getVNInfoAt(DefIdx); |
821 | 0 | if (!SubDVNI) |
822 | 0 | continue; |
823 | 0 | VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx); |
824 | 0 | assert(SubBValNo->def == CopyIdx); |
825 | 0 | S.MergeValueNumberInto(SubDVNI, SubBValNo); |
826 | 0 | } |
827 | 1.70k | |
828 | 1.70k | deleteInstr(UseMI); |
829 | 1.70k | } |
830 | 352 | |
831 | 352 | // Extend BValNo by merging in IntA live segments of AValNo. Val# definition |
832 | 352 | // is updated. |
833 | 352 | BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); |
834 | 352 | if (IntB.hasSubRanges()352 ) { |
835 | 0 | if (!IntA.hasSubRanges()0 ) { |
836 | 0 | LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg); |
837 | 0 | IntA.createSubRangeFrom(Allocator, Mask, IntA); |
838 | 0 | } |
839 | 0 | SlotIndex AIdx = CopyIdx.getRegSlot(true); |
840 | 0 | for (LiveInterval::SubRange &SA : IntA.subranges()) { |
841 | 0 | VNInfo *ASubValNo = SA.getVNInfoAt(AIdx); |
842 | 0 | assert(ASubValNo != nullptr); |
843 | 0 |
|
844 | 0 | IntB.refineSubRanges(Allocator, SA.LaneMask, |
845 | 0 | [&Allocator,&SA,CopyIdx,ASubValNo](LiveInterval::SubRange &SR) { |
846 | 0 | VNInfo *BSubValNo = SR.empty() |
847 | 0 | ? SR.getNextValue(CopyIdx, Allocator) |
848 | 0 | : SR.getVNInfoAt(CopyIdx); |
849 | 0 | assert(BSubValNo != nullptr); |
850 | 0 | addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo); |
851 | 0 | }); |
852 | 0 | } |
853 | 0 | } |
854 | 352 | |
855 | 352 | BValNo->def = AValNo->def; |
856 | 352 | addSegmentsWithValNo(IntB, BValNo, IntA, AValNo); |
857 | 352 | DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); |
858 | 352 | |
859 | 352 | LIS->removeVRegDefAt(IntA, AValNo->def); |
860 | 352 | |
861 | 352 | DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); |
862 | 390k | ++numCommutes; |
863 | 390k | return true; |
864 | 390k | } |
865 | | |
866 | | /// For copy B = A in BB2, if A is defined by A = B in BB0 which is a |
867 | | /// predecessor of BB2, and if B is not redefined on the way from A = B |
868 | | /// in BB2 to B = A in BB2, B = A in BB2 is partially redundant if the |
869 | | /// execution goes through the path from BB0 to BB2. We may move B = A |
870 | | /// to the predecessor without such reversed copy. |
871 | | /// So we will transform the program from: |
872 | | /// BB0: |
873 | | /// A = B; BB1: |
874 | | /// ... ... |
875 | | /// / \ / |
876 | | /// BB2: |
877 | | /// ... |
878 | | /// B = A; |
879 | | /// |
880 | | /// to: |
881 | | /// |
882 | | /// BB0: BB1: |
883 | | /// A = B; ... |
884 | | /// ... B = A; |
885 | | /// / \ / |
886 | | /// BB2: |
887 | | /// ... |
888 | | /// |
889 | | /// A special case is when BB0 and BB2 are the same BB which is the only |
890 | | /// BB in a loop: |
891 | | /// BB1: |
892 | | /// ... |
893 | | /// BB0/BB2: ---- |
894 | | /// B = A; | |
895 | | /// ... | |
896 | | /// A = B; | |
897 | | /// |------- |
898 | | /// | |
899 | | /// We may hoist B = A from BB0/BB2 to BB1. |
900 | | /// |
901 | | /// The major preconditions for correctness to remove such partial |
902 | | /// redundancy include: |
903 | | /// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of |
904 | | /// the PHI is defined by the reversed copy A = B in BB0. |
905 | | /// 2. No B is referenced from the start of BB2 to B = A. |
906 | | /// 3. No B is defined from A = B to the end of BB0. |
907 | | /// 4. BB1 has only one successor. |
908 | | /// |
909 | | /// 2 and 4 implicitly ensure B is not live at the end of BB1. |
910 | | /// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a |
911 | | /// colder place, which not only prevent endless loop, but also make sure |
912 | | /// the movement of copy is beneficial. |
913 | | bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, |
914 | 390k | MachineInstr &CopyMI) { |
915 | 390k | assert(!CP.isPhys()); |
916 | 390k | if (!CopyMI.isFullCopy()) |
917 | 0 | return false; |
918 | 390k | |
919 | 390k | MachineBasicBlock &MBB = *CopyMI.getParent(); |
920 | 390k | if (MBB.isEHPad()) |
921 | 160 | return false; |
922 | 390k | |
923 | 390k | if (390k MBB.pred_size() != 2390k ) |
924 | 226k | return false; |
925 | 163k | |
926 | 163k | LiveInterval &IntA = |
927 | 163k | LIS->getInterval(CP.isFlipped() ? CP.getDstReg()39.4k : CP.getSrcReg()124k ); |
928 | 163k | LiveInterval &IntB = |
929 | 163k | LIS->getInterval(CP.isFlipped() ? CP.getSrcReg()39.4k : CP.getDstReg()124k ); |
930 | 163k | |
931 | 163k | // A is defined by PHI at the entry of MBB. |
932 | 163k | SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); |
933 | 163k | VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx); |
934 | 163k | assert(AValNo && !AValNo->isUnused() && "COPY source not live"); |
935 | 163k | if (!AValNo->isPHIDef()) |
936 | 114k | return false; |
937 | 48.9k | |
938 | 48.9k | // No B is referenced before CopyMI in MBB. |
939 | 48.9k | if (48.9k IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx)48.9k ) |
940 | 950 | return false; |
941 | 48.0k | |
942 | 48.0k | // MBB has two predecessors: one contains A = B so no copy will be inserted |
943 | 48.0k | // for it. The other one will have a copy moved from MBB. |
944 | 48.0k | bool FoundReverseCopy = false; |
945 | 48.0k | MachineBasicBlock *CopyLeftBB = nullptr; |
946 | 96.0k | for (MachineBasicBlock *Pred : MBB.predecessors()) { |
947 | 96.0k | VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred)); |
948 | 96.0k | MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def); |
949 | 96.0k | if (!DefMI || 96.0k !DefMI->isFullCopy()89.5k ) { |
950 | 46.6k | CopyLeftBB = Pred; |
951 | 46.6k | continue; |
952 | 46.6k | } |
953 | 49.3k | // Check DefMI is a reverse copy and it is in BB Pred. |
954 | 49.3k | if (49.3k DefMI->getOperand(0).getReg() != IntA.reg || |
955 | 49.3k | DefMI->getOperand(1).getReg() != IntB.reg || |
956 | 49.3k | DefMI->getParent() != Pred861 ) { |
957 | 48.5k | CopyLeftBB = Pred; |
958 | 48.5k | continue; |
959 | 48.5k | } |
960 | 843 | // If there is any other def of B after DefMI and before the end of Pred, |
961 | 843 | // we need to keep the copy of B = A at the end of Pred if we remove |
962 | 843 | // B = A from MBB. |
963 | 843 | bool ValB_Changed = false; |
964 | 4.60k | for (auto VNI : IntB.valnos) { |
965 | 4.60k | if (VNI->isUnused()) |
966 | 114 | continue; |
967 | 4.48k | if (4.48k PVal->def < VNI->def && 4.48k VNI->def < LIS->getMBBEndIdx(Pred)1.40k ) { |
968 | 4 | ValB_Changed = true; |
969 | 4 | break; |
970 | 4 | } |
971 | 843 | } |
972 | 843 | if (ValB_Changed843 ) { |
973 | 4 | CopyLeftBB = Pred; |
974 | 4 | continue; |
975 | 4 | } |
976 | 839 | FoundReverseCopy = true; |
977 | 839 | } |
978 | 48.0k | |
979 | 48.0k | // If no reverse copy is found in predecessors, nothing to do. |
980 | 48.0k | if (!FoundReverseCopy) |
981 | 47.1k | return false; |
982 | 833 | |
983 | 833 | // If CopyLeftBB is nullptr, it means every predecessor of MBB contains |
984 | 833 | // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated. |
985 | 833 | // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and |
986 | 833 | // update IntA/IntB. |
987 | 833 | // |
988 | 833 | // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so |
989 | 833 | // MBB is hotter than CopyLeftBB. |
990 | 833 | if (833 CopyLeftBB && 833 CopyLeftBB->succ_size() > 1827 ) |
991 | 346 | return false; |
992 | 487 | |
993 | 487 | // Now ok to move copy. |
994 | 487 | if (487 CopyLeftBB487 ) { |
995 | 481 | DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to BB#" |
996 | 481 | << CopyLeftBB->getNumber() << '\t' << CopyMI); |
997 | 481 | |
998 | 481 | // Insert new copy to CopyLeftBB. |
999 | 481 | auto InsPos = CopyLeftBB->getFirstTerminator(); |
1000 | 481 | MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(), |
1001 | 481 | TII->get(TargetOpcode::COPY), IntB.reg) |
1002 | 481 | .addReg(IntA.reg); |
1003 | 481 | SlotIndex NewCopyIdx = |
1004 | 481 | LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot(); |
1005 | 481 | IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator()); |
1006 | 481 | for (LiveInterval::SubRange &SR : IntB.subranges()) |
1007 | 0 | SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator()); |
1008 | 481 | |
1009 | 481 | // If the newly created Instruction has an address of an instruction that was |
1010 | 481 | // deleted before (object recycled by the allocator) it needs to be removed from |
1011 | 481 | // the deleted list. |
1012 | 481 | ErasedInstrs.erase(NewCopyMI); |
1013 | 487 | } else { |
1014 | 6 | DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from BB#" |
1015 | 6 | << MBB.getNumber() << '\t' << CopyMI); |
1016 | 6 | } |
1017 | 487 | |
1018 | 487 | // Remove CopyMI. |
1019 | 487 | // Note: This is fine to remove the copy before updating the live-ranges. |
1020 | 487 | // While updating the live-ranges, we only look at slot indices and |
1021 | 487 | // never go back to the instruction. |
1022 | 487 | // Mark instructions as deleted. |
1023 | 487 | deleteInstr(&CopyMI); |
1024 | 487 | |
1025 | 487 | // Update the liveness. |
1026 | 487 | SmallVector<SlotIndex, 8> EndPoints; |
1027 | 487 | VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead(); |
1028 | 487 | LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(), |
1029 | 487 | &EndPoints); |
1030 | 487 | BValNo->markUnused(); |
1031 | 487 | // Extend IntB to the EndPoints of its original live interval. |
1032 | 487 | LIS->extendToIndices(IntB, EndPoints); |
1033 | 487 | |
1034 | 487 | // Now, do the same for its subranges. |
1035 | 0 | for (LiveInterval::SubRange &SR : IntB.subranges()) { |
1036 | 0 | EndPoints.clear(); |
1037 | 0 | VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead(); |
1038 | 0 | assert(BValNo && "All sublanes should be live"); |
1039 | 0 | LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints); |
1040 | 0 | BValNo->markUnused(); |
1041 | 0 | LIS->extendToIndices(SR, EndPoints); |
1042 | 0 | } |
1043 | 390k | |
1044 | 390k | // Finally, update the live-range of IntA. |
1045 | 390k | shrinkToUses(&IntA); |
1046 | 390k | return true; |
1047 | 390k | } |
1048 | | |
1049 | | /// Returns true if @p MI defines the full vreg @p Reg, as opposed to just |
1050 | | /// defining a subregister. |
1051 | 724k | static bool definesFullReg(const MachineInstr &MI, unsigned Reg) { |
1052 | 724k | assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && |
1053 | 724k | "This code cannot handle physreg aliasing"); |
1054 | 726k | for (const MachineOperand &Op : MI.operands()) { |
1055 | 726k | if (!Op.isReg() || 726k !Op.isDef()725k || Op.getReg() != Reg724k ) |
1056 | 1.48k | continue; |
1057 | 724k | // Return true if we define the full register or don't care about the value |
1058 | 724k | // inside other subregisters. |
1059 | 724k | if (724k Op.getSubReg() == 0 || 724k Op.isUndef()153k ) |
1060 | 724k | return true; |
1061 | 742 | } |
1062 | 742 | return false; |
1063 | 742 | } |
1064 | | |
1065 | | bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, |
1066 | | MachineInstr *CopyMI, |
1067 | 11.1M | bool &IsDefCopy) { |
1068 | 11.1M | IsDefCopy = false; |
1069 | 11.1M | unsigned SrcReg = CP.isFlipped() ? CP.getDstReg()2.62M : CP.getSrcReg()8.55M ; |
1070 | 11.1M | unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx()2.62M : CP.getSrcIdx()8.55M ; |
1071 | 11.1M | unsigned DstReg = CP.isFlipped() ? CP.getSrcReg()2.62M : CP.getDstReg()8.55M ; |
1072 | 11.1M | unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx()2.62M : CP.getDstIdx()8.55M ; |
1073 | 11.1M | if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) |
1074 | 2.50M | return false; |
1075 | 8.67M | |
1076 | 8.67M | LiveInterval &SrcInt = LIS->getInterval(SrcReg); |
1077 | 8.67M | SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI); |
1078 | 8.67M | VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn(); |
1079 | 8.67M | assert(ValNo && "CopyMI input register not live"); |
1080 | 8.67M | if (ValNo->isPHIDef() || 8.67M ValNo->isUnused()8.26M ) |
1081 | 414k | return false; |
1082 | 8.26M | MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def); |
1083 | 8.26M | if (!DefMI) |
1084 | 0 | return false; |
1085 | 8.26M | if (8.26M DefMI->isCopyLike()8.26M ) { |
1086 | 4.84M | IsDefCopy = true; |
1087 | 4.84M | return false; |
1088 | 4.84M | } |
1089 | 3.42M | if (3.42M !TII->isAsCheapAsAMove(*DefMI)3.42M ) |
1090 | 1.83M | return false; |
1091 | 1.58M | if (1.58M !TII->isTriviallyReMaterializable(*DefMI, AA)1.58M ) |
1092 | 858k | return false; |
1093 | 724k | if (724k !definesFullReg(*DefMI, SrcReg)724k ) |
1094 | 742 | return false; |
1095 | 724k | bool SawStore = false; |
1096 | 724k | if (!DefMI->isSafeToMove(AA, SawStore)) |
1097 | 0 | return false; |
1098 | 724k | const MCInstrDesc &MCID = DefMI->getDesc(); |
1099 | 724k | if (MCID.getNumDefs() != 1) |
1100 | 0 | return false; |
1101 | 724k | // Only support subregister destinations when the def is read-undef. |
1102 | 724k | MachineOperand &DstOperand = CopyMI->getOperand(0); |
1103 | 724k | unsigned CopyDstReg = DstOperand.getReg(); |
1104 | 724k | if (DstOperand.getSubReg() && 724k !DstOperand.isUndef()377 ) |
1105 | 4 | return false; |
1106 | 724k | |
1107 | 724k | // If both SrcIdx and DstIdx are set, correct rematerialization would widen |
1108 | 724k | // the register substantially (beyond both source and dest size). This is bad |
1109 | 724k | // for performance since it can cascade through a function, introducing many |
1110 | 724k | // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers |
1111 | 724k | // around after a few subreg copies). |
1112 | 724k | if (724k SrcIdx && 724k DstIdx304 ) |
1113 | 0 | return false; |
1114 | 724k | |
1115 | 724k | const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); |
1116 | 724k | if (!DefMI->isImplicitDef()724k ) { |
1117 | 724k | if (TargetRegisterInfo::isPhysicalRegister(DstReg)724k ) { |
1118 | 690k | unsigned NewDstReg = DstReg; |
1119 | 690k | |
1120 | 690k | unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), |
1121 | 690k | DefMI->getOperand(0).getSubReg()); |
1122 | 690k | if (NewDstIdx) |
1123 | 138k | NewDstReg = TRI->getSubReg(DstReg, NewDstIdx); |
1124 | 690k | |
1125 | 690k | // Finally, make sure that the physical subregister that will be |
1126 | 690k | // constructed later is permitted for the instruction. |
1127 | 690k | if (!DefRC->contains(NewDstReg)) |
1128 | 0 | return false; |
1129 | 33.6k | } else { |
1130 | 33.6k | // Theoretically, some stack frame reference could exist. Just make sure |
1131 | 33.6k | // it hasn't actually happened. |
1132 | 33.6k | assert(TargetRegisterInfo::isVirtualRegister(DstReg) && |
1133 | 33.6k | "Only expect to deal with virtual or physical registers"); |
1134 | 33.6k | } |
1135 | 724k | } |
1136 | 724k | |
1137 | 724k | DebugLoc DL = CopyMI->getDebugLoc(); |
1138 | 724k | MachineBasicBlock *MBB = CopyMI->getParent(); |
1139 | 724k | MachineBasicBlock::iterator MII = |
1140 | 724k | std::next(MachineBasicBlock::iterator(CopyMI)); |
1141 | 724k | TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, *DefMI, *TRI); |
1142 | 724k | MachineInstr &NewMI = *std::prev(MII); |
1143 | 724k | NewMI.setDebugLoc(DL); |
1144 | 724k | |
1145 | 724k | // In a situation like the following: |
1146 | 724k | // %vreg0:subreg = instr ; DefMI, subreg = DstIdx |
1147 | 724k | // %vreg1 = copy %vreg0:subreg ; CopyMI, SrcIdx = 0 |
1148 | 724k | // instead of widening %vreg1 to the register class of %vreg0 simply do: |
1149 | 724k | // %vreg1 = instr |
1150 | 724k | const TargetRegisterClass *NewRC = CP.getNewRC(); |
1151 | 724k | if (DstIdx != 0724k ) { |
1152 | 291 | MachineOperand &DefMO = NewMI.getOperand(0); |
1153 | 291 | if (DefMO.getSubReg() == DstIdx291 ) { |
1154 | 239 | assert(SrcIdx == 0 && CP.isFlipped() |
1155 | 239 | && "Shouldn't have SrcIdx+DstIdx at this point"); |
1156 | 239 | const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); |
1157 | 239 | const TargetRegisterClass *CommonRC = |
1158 | 239 | TRI->getCommonSubClass(DefRC, DstRC); |
1159 | 239 | if (CommonRC != nullptr239 ) { |
1160 | 239 | NewRC = CommonRC; |
1161 | 239 | DstIdx = 0; |
1162 | 239 | DefMO.setSubReg(0); |
1163 | 239 | DefMO.setIsUndef(false); // Only subregs can have def+undef. |
1164 | 239 | } |
1165 | 239 | } |
1166 | 291 | } |
1167 | 724k | |
1168 | 724k | // CopyMI may have implicit operands, save them so that we can transfer them |
1169 | 724k | // over to the newly materialized instruction after CopyMI is removed. |
1170 | 724k | SmallVector<MachineOperand, 4> ImplicitOps; |
1171 | 724k | ImplicitOps.reserve(CopyMI->getNumOperands() - |
1172 | 724k | CopyMI->getDesc().getNumOperands()); |
1173 | 724k | for (unsigned I = CopyMI->getDesc().getNumOperands(), |
1174 | 724k | E = CopyMI->getNumOperands(); |
1175 | 724k | I != E724k ; ++I1 ) { |
1176 | 1 | MachineOperand &MO = CopyMI->getOperand(I); |
1177 | 1 | if (MO.isReg()1 ) { |
1178 | 1 | assert(MO.isImplicit() && "No explicit operands after implict operands."); |
1179 | 1 | // Discard VReg implicit defs. |
1180 | 1 | if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) |
1181 | 1 | ImplicitOps.push_back(MO); |
1182 | 1 | } |
1183 | 1 | } |
1184 | 724k | |
1185 | 724k | LIS->ReplaceMachineInstrInMaps(*CopyMI, NewMI); |
1186 | 724k | CopyMI->eraseFromParent(); |
1187 | 724k | ErasedInstrs.insert(CopyMI); |
1188 | 724k | |
1189 | 724k | // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86). |
1190 | 724k | // We need to remember these so we can add intervals once we insert |
1191 | 724k | // NewMI into SlotIndexes. |
1192 | 724k | SmallVector<unsigned, 4> NewMIImplDefs; |
1193 | 724k | for (unsigned i = NewMI.getDesc().getNumOperands(), |
1194 | 724k | e = NewMI.getNumOperands(); |
1195 | 733k | i != e733k ; ++i9.14k ) { |
1196 | 9.14k | MachineOperand &MO = NewMI.getOperand(i); |
1197 | 9.14k | if (MO.isReg() && 9.14k MO.isDef()9.14k ) { |
1198 | 8.08k | assert(MO.isImplicit() && MO.isDead() && |
1199 | 8.08k | TargetRegisterInfo::isPhysicalRegister(MO.getReg())); |
1200 | 8.08k | NewMIImplDefs.push_back(MO.getReg()); |
1201 | 8.08k | } |
1202 | 9.14k | } |
1203 | 724k | |
1204 | 724k | if (TargetRegisterInfo::isVirtualRegister(DstReg)724k ) { |
1205 | 33.6k | unsigned NewIdx = NewMI.getOperand(0).getSubReg(); |
1206 | 33.6k | |
1207 | 33.6k | if (DefRC != nullptr33.6k ) { |
1208 | 33.6k | if (NewIdx) |
1209 | 14.5k | NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx); |
1210 | 33.6k | else |
1211 | 19.0k | NewRC = TRI->getCommonSubClass(NewRC, DefRC); |
1212 | 33.6k | assert(NewRC && "subreg chosen for remat incompatible with instruction"); |
1213 | 33.6k | } |
1214 | 33.6k | // Remap subranges to new lanemask and change register class. |
1215 | 33.6k | LiveInterval &DstInt = LIS->getInterval(DstReg); |
1216 | 2 | for (LiveInterval::SubRange &SR : DstInt.subranges()) { |
1217 | 2 | SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask); |
1218 | 2 | } |
1219 | 33.6k | MRI->setRegClass(DstReg, NewRC); |
1220 | 33.6k | |
1221 | 33.6k | // Update machine operands and add flags. |
1222 | 33.6k | updateRegDefsUses(DstReg, DstReg, DstIdx); |
1223 | 33.6k | NewMI.getOperand(0).setSubReg(NewIdx); |
1224 | 33.6k | // Add dead subregister definitions if we are defining the whole register |
1225 | 33.6k | // but only part of it is live. |
1226 | 33.6k | // This could happen if the rematerialization instruction is rematerializing |
1227 | 33.6k | // more than actually is used in the register. |
1228 | 33.6k | // An example would be: |
1229 | 33.6k | // vreg1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs |
1230 | 33.6k | // ; Copying only part of the register here, but the rest is undef. |
1231 | 33.6k | // vreg2:sub_16bit<def, read-undef> = COPY vreg1:sub_16bit |
1232 | 33.6k | // ==> |
1233 | 33.6k | // ; Materialize all the constants but only using one |
1234 | 33.6k | // vreg2 = LOAD_CONSTANTS 5, 8 |
1235 | 33.6k | // |
1236 | 33.6k | // at this point for the part that wasn't defined before we could have |
1237 | 33.6k | // subranges missing the definition. |
1238 | 33.6k | if (NewIdx == 0 && 33.6k DstInt.hasSubRanges()19.0k ) { |
1239 | 0 | SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI); |
1240 | 0 | SlotIndex DefIndex = |
1241 | 0 | CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber()); |
1242 | 0 | LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg); |
1243 | 0 | VNInfo::Allocator& Alloc = LIS->getVNInfoAllocator(); |
1244 | 0 | for (LiveInterval::SubRange &SR : DstInt.subranges()) { |
1245 | 0 | if (!SR.liveAt(DefIndex)) |
1246 | 0 | SR.createDeadDef(DefIndex, Alloc); |
1247 | 0 | MaxMask &= ~SR.LaneMask; |
1248 | 0 | } |
1249 | 0 | if (MaxMask.any()0 ) { |
1250 | 0 | LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask); |
1251 | 0 | SR->createDeadDef(DefIndex, Alloc); |
1252 | 0 | } |
1253 | 0 | } |
1254 | 33.6k | |
1255 | 33.6k | // Make sure that the subrange for resultant undef is removed |
1256 | 33.6k | // For example: |
1257 | 33.6k | // vreg1:sub1<def,read-undef> = LOAD CONSTANT 1 |
1258 | 33.6k | // vreg2<def> = COPY vreg1 |
1259 | 33.6k | // ==> |
1260 | 33.6k | // vreg2:sub1<def, read-undef> = LOAD CONSTANT 1 |
1261 | 33.6k | // ; Correct but need to remove the subrange for vreg2:sub0 |
1262 | 33.6k | // ; as it is now undef |
1263 | 33.6k | if (NewIdx != 0 && 33.6k DstInt.hasSubRanges()14.5k ) { |
1264 | 1 | // The affected subregister segments can be removed. |
1265 | 1 | SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI); |
1266 | 1 | LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx); |
1267 | 1 | bool UpdatedSubRanges = false; |
1268 | 2 | for (LiveInterval::SubRange &SR : DstInt.subranges()) { |
1269 | 2 | if ((SR.LaneMask & DstMask).none()2 ) { |
1270 | 1 | DEBUG(dbgs() << "Removing undefined SubRange " |
1271 | 1 | << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n"); |
1272 | 1 | // VNI is in ValNo - remove any segments in this SubRange that have this ValNo |
1273 | 1 | if (VNInfo *RmValNo1 = SR.getVNInfoAt(CurrIdx.getRegSlot())) { |
1274 | 1 | SR.removeValNo(RmValNo); |
1275 | 1 | UpdatedSubRanges = true; |
1276 | 1 | } |
1277 | 1 | } |
1278 | 2 | } |
1279 | 1 | if (UpdatedSubRanges) |
1280 | 1 | DstInt.removeEmptySubRanges(); |
1281 | 1 | } |
1282 | 724k | } else if (690k NewMI.getOperand(0).getReg() != CopyDstReg690k ) { |
1283 | 137k | // The New instruction may be defining a sub-register of what's actually |
1284 | 137k | // been asked for. If so it must implicitly define the whole thing. |
1285 | 137k | assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && |
1286 | 137k | "Only expect virtual or physical registers in remat"); |
1287 | 137k | NewMI.getOperand(0).setIsDead(true); |
1288 | 137k | NewMI.addOperand(MachineOperand::CreateReg( |
1289 | 137k | CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/)); |
1290 | 137k | // Record small dead def live-ranges for all the subregisters |
1291 | 137k | // of the destination register. |
1292 | 137k | // Otherwise, variables that live through may miss some |
1293 | 137k | // interferences, thus creating invalid allocation. |
1294 | 137k | // E.g., i386 code: |
1295 | 137k | // vreg1 = somedef ; vreg1 GR8 |
1296 | 137k | // vreg2 = remat ; vreg2 GR32 |
1297 | 137k | // CL = COPY vreg2.sub_8bit |
1298 | 137k | // = somedef vreg1 ; vreg1 GR8 |
1299 | 137k | // => |
1300 | 137k | // vreg1 = somedef ; vreg1 GR8 |
1301 | 137k | // ECX<def, dead> = remat ; CL<imp-def> |
1302 | 137k | // = somedef vreg1 ; vreg1 GR8 |
1303 | 137k | // vreg1 will see the inteferences with CL but not with CH since |
1304 | 137k | // no live-ranges would have been created for ECX. |
1305 | 137k | // Fix that! |
1306 | 137k | SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); |
1307 | 137k | for (MCRegUnitIterator Units(NewMI.getOperand(0).getReg(), TRI); |
1308 | 278k | Units.isValid()278k ; ++Units140k ) |
1309 | 140k | if (LiveRange *140k LR140k = LIS->getCachedRegUnit(*Units)) |
1310 | 103k | LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); |
1311 | 690k | } |
1312 | 724k | |
1313 | 724k | if (NewMI.getOperand(0).getSubReg()) |
1314 | 14.5k | NewMI.getOperand(0).setIsUndef(); |
1315 | 724k | |
1316 | 724k | // Transfer over implicit operands to the rematerialized instruction. |
1317 | 724k | for (MachineOperand &MO : ImplicitOps) |
1318 | 1 | NewMI.addOperand(MO); |
1319 | 724k | |
1320 | 724k | SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); |
1321 | 732k | for (unsigned i = 0, e = NewMIImplDefs.size(); i != e732k ; ++i8.08k ) { |
1322 | 8.08k | unsigned Reg = NewMIImplDefs[i]; |
1323 | 16.1k | for (MCRegUnitIterator Units(Reg, TRI); Units.isValid()16.1k ; ++Units8.08k ) |
1324 | 8.08k | if (LiveRange *8.08k LR8.08k = LIS->getCachedRegUnit(*Units)) |
1325 | 0 | LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); |
1326 | 8.08k | } |
1327 | 724k | |
1328 | 724k | DEBUG(dbgs() << "Remat: " << NewMI); |
1329 | 724k | ++NumReMats; |
1330 | 724k | |
1331 | 724k | // The source interval can become smaller because we removed a use. |
1332 | 724k | shrinkToUses(&SrcInt, &DeadDefs); |
1333 | 724k | if (!DeadDefs.empty()724k ) { |
1334 | 489k | // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs |
1335 | 489k | // to describe DstReg instead. |
1336 | 5 | for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) { |
1337 | 5 | MachineInstr *UseMI = UseMO.getParent(); |
1338 | 5 | if (UseMI->isDebugValue()5 ) { |
1339 | 5 | UseMO.setReg(DstReg); |
1340 | 5 | DEBUG(dbgs() << "\t\tupdated: " << *UseMI); |
1341 | 5 | } |
1342 | 5 | } |
1343 | 489k | eliminateDeadDefs(); |
1344 | 489k | } |
1345 | 724k | |
1346 | 724k | return true; |
1347 | 11.1M | } |
1348 | | |
1349 | 6.24M | bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { |
1350 | 6.24M | // ProcessImpicitDefs may leave some copies of <undef> values, it only removes |
1351 | 6.24M | // local variables. When we have a copy like: |
1352 | 6.24M | // |
1353 | 6.24M | // %vreg1 = COPY %vreg2<undef> |
1354 | 6.24M | // |
1355 | 6.24M | // We delete the copy and remove the corresponding value number from %vreg1. |
1356 | 6.24M | // Any uses of that value number are marked as <undef>. |
1357 | 6.24M | |
1358 | 6.24M | // Note that we do not query CoalescerPair here but redo isMoveInstr as the |
1359 | 6.24M | // CoalescerPair may have a new register class with adjusted subreg indices |
1360 | 6.24M | // at this point. |
1361 | 6.24M | unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; |
1362 | 6.24M | isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); |
1363 | 6.24M | |
1364 | 6.24M | SlotIndex Idx = LIS->getInstructionIndex(*CopyMI); |
1365 | 6.24M | const LiveInterval &SrcLI = LIS->getInterval(SrcReg); |
1366 | 6.24M | // CopyMI is undef iff SrcReg is not live before the instruction. |
1367 | 6.24M | if (SrcSubIdx != 0 && 6.24M SrcLI.hasSubRanges()534k ) { |
1368 | 72.6k | LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx); |
1369 | 169k | for (const LiveInterval::SubRange &SR : SrcLI.subranges()) { |
1370 | 169k | if ((SR.LaneMask & SrcMask).none()) |
1371 | 97.1k | continue; |
1372 | 72.6k | if (72.6k SR.liveAt(Idx)72.6k ) |
1373 | 72.6k | return false; |
1374 | 6.24M | } |
1375 | 6.17M | } else if (6.17M SrcLI.liveAt(Idx)6.17M ) |
1376 | 6.17M | return false; |
1377 | 1 | |
1378 | 1 | DEBUG1 (dbgs() << "\tEliminating copy of <undef> value\n"); |
1379 | 1 | |
1380 | 1 | // Remove any DstReg segments starting at the instruction. |
1381 | 1 | LiveInterval &DstLI = LIS->getInterval(DstReg); |
1382 | 1 | SlotIndex RegIndex = Idx.getRegSlot(); |
1383 | 1 | // Remove value or merge with previous one in case of a subregister def. |
1384 | 1 | if (VNInfo *PrevVNI1 = DstLI.getVNInfoAt(Idx)) { |
1385 | 0 | VNInfo *VNI = DstLI.getVNInfoAt(RegIndex); |
1386 | 0 | DstLI.MergeValueNumberInto(VNI, PrevVNI); |
1387 | 0 |
|
1388 | 0 | // The affected subregister segments can be removed. |
1389 | 0 | LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx); |
1390 | 0 | for (LiveInterval::SubRange &SR : DstLI.subranges()) { |
1391 | 0 | if ((SR.LaneMask & DstMask).none()) |
1392 | 0 | continue; |
1393 | 0 |
|
1394 | 0 | VNInfo *SVNI = SR.getVNInfoAt(RegIndex); |
1395 | 0 | assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex)); |
1396 | 0 | SR.removeValNo(SVNI); |
1397 | 0 | } |
1398 | 0 | DstLI.removeEmptySubRanges(); |
1399 | 0 | } else |
1400 | 1 | LIS->removeVRegDefAt(DstLI, RegIndex); |
1401 | 1 | |
1402 | 1 | // Mark uses as undef. |
1403 | 2 | for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) { |
1404 | 2 | if (MO.isDef() /*|| MO.isUndef()*/) |
1405 | 1 | continue; |
1406 | 1 | const MachineInstr &MI = *MO.getParent(); |
1407 | 1 | SlotIndex UseIdx = LIS->getInstructionIndex(MI); |
1408 | 1 | LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); |
1409 | 1 | bool isLive; |
1410 | 1 | if (!UseMask.all() && 1 DstLI.hasSubRanges()0 ) { |
1411 | 0 | isLive = false; |
1412 | 0 | for (const LiveInterval::SubRange &SR : DstLI.subranges()) { |
1413 | 0 | if ((SR.LaneMask & UseMask).none()) |
1414 | 0 | continue; |
1415 | 0 | if (0 SR.liveAt(UseIdx)0 ) { |
1416 | 0 | isLive = true; |
1417 | 0 | break; |
1418 | 0 | } |
1419 | 1 | } |
1420 | 0 | } else |
1421 | 1 | isLive = DstLI.liveAt(UseIdx); |
1422 | 1 | if (isLive) |
1423 | 0 | continue; |
1424 | 1 | MO.setIsUndef(true); |
1425 | 1 | DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI); |
1426 | 2 | } |
1427 | 1 | |
1428 | 1 | // A def of a subregister may be a use of the other subregisters, so |
1429 | 1 | // deleting a def of a subregister may also remove uses. Since CopyMI |
1430 | 1 | // is still part of the function (but about to be erased), mark all |
1431 | 1 | // defs of DstReg in it as <undef>, so that shrinkToUses would |
1432 | 1 | // ignore them. |
1433 | 1 | for (MachineOperand &MO : CopyMI->operands()) |
1434 | 2 | if (2 MO.isReg() && 2 MO.isDef()2 && MO.getReg() == DstReg1 ) |
1435 | 1 | MO.setIsUndef(true); |
1436 | 6.24M | LIS->shrinkToUses(&DstLI); |
1437 | 6.24M | |
1438 | 6.24M | return true; |
1439 | 6.24M | } |
1440 | | |
1441 | | void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, |
1442 | 355k | MachineOperand &MO, unsigned SubRegIdx) { |
1443 | 355k | LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx); |
1444 | 355k | if (MO.isDef()) |
1445 | 137k | Mask = ~Mask; |
1446 | 355k | bool IsUndef = true; |
1447 | 896k | for (const LiveInterval::SubRange &S : Int.subranges()) { |
1448 | 896k | if ((S.LaneMask & Mask).none()) |
1449 | 523k | continue; |
1450 | 372k | if (372k S.liveAt(UseIdx)372k ) { |
1451 | 355k | IsUndef = false; |
1452 | 355k | break; |
1453 | 355k | } |
1454 | 355k | } |
1455 | 355k | if (IsUndef355k ) { |
1456 | 35 | MO.setIsUndef(true); |
1457 | 35 | // We found out some subregister use is actually reading an undefined |
1458 | 35 | // value. In some cases the whole vreg has become undefined at this |
1459 | 35 | // point so we have to potentially shrink the main range if the |
1460 | 35 | // use was ending a live segment there. |
1461 | 35 | LiveQueryResult Q = Int.Query(UseIdx); |
1462 | 35 | if (Q.valueOut() == nullptr) |
1463 | 0 | ShrinkMainRange = true; |
1464 | 35 | } |
1465 | 355k | } |
1466 | | |
1467 | | void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, |
1468 | | unsigned DstReg, |
1469 | 6.85M | unsigned SubIdx) { |
1470 | 6.85M | bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); |
1471 | 6.85M | LiveInterval *DstInt = DstIsPhys ? nullptr1.02M : &LIS->getInterval(DstReg)5.82M ; |
1472 | 6.85M | |
1473 | 6.85M | if (DstInt && 6.85M DstInt->hasSubRanges()5.82M && DstReg != SrcReg127k ) { |
1474 | 476k | for (MachineOperand &MO : MRI->reg_operands(DstReg)) { |
1475 | 476k | unsigned SubReg = MO.getSubReg(); |
1476 | 476k | if (SubReg == 0 || 476k MO.isUndef()318k ) |
1477 | 192k | continue; |
1478 | 284k | MachineInstr &MI = *MO.getParent(); |
1479 | 284k | if (MI.isDebugValue()) |
1480 | 0 | continue; |
1481 | 284k | SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true); |
1482 | 284k | addUndefFlag(*DstInt, UseIdx, MO, SubReg); |
1483 | 284k | } |
1484 | 127k | } |
1485 | 6.85M | |
1486 | 6.85M | SmallPtrSet<MachineInstr*, 8> Visited; |
1487 | 6.85M | for (MachineRegisterInfo::reg_instr_iterator |
1488 | 6.85M | I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end(); |
1489 | 18.4M | I != E18.4M ; ) { |
1490 | 11.5M | MachineInstr *UseMI = &*(I++); |
1491 | 11.5M | |
1492 | 11.5M | // Each instruction can only be rewritten once because sub-register |
1493 | 11.5M | // composition is not always idempotent. When SrcReg != DstReg, rewriting |
1494 | 11.5M | // the UseMI operands removes them from the SrcReg use-def chain, but when |
1495 | 11.5M | // SrcReg is DstReg we could encounter UseMI twice if it has multiple |
1496 | 11.5M | // operands mentioning the virtual register. |
1497 | 11.5M | if (SrcReg == DstReg && 11.5M !Visited.insert(UseMI).second212k ) |
1498 | 8.84k | continue; |
1499 | 11.5M | |
1500 | 11.5M | SmallVector<unsigned,8> Ops; |
1501 | 11.5M | bool Reads, Writes; |
1502 | 11.5M | std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); |
1503 | 11.5M | |
1504 | 11.5M | // If SrcReg wasn't read, it may still be the case that DstReg is live-in |
1505 | 11.5M | // because SrcReg is a sub-register. |
1506 | 11.5M | if (DstInt && 11.5M !Reads10.3M && SubIdx4.40M && !UseMI->isDebugValue()827k ) |
1507 | 827k | Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI)); |
1508 | 11.5M | |
1509 | 11.5M | // Replace SrcReg with DstReg in all UseMI operands. |
1510 | 23.3M | for (unsigned i = 0, e = Ops.size(); i != e23.3M ; ++i11.8M ) { |
1511 | 11.8M | MachineOperand &MO = UseMI->getOperand(Ops[i]); |
1512 | 11.8M | |
1513 | 11.8M | // Adjust <undef> flags in case of sub-register joins. We don't want to |
1514 | 11.8M | // turn a full def into a read-modify-write sub-register def and vice |
1515 | 11.8M | // versa. |
1516 | 11.8M | if (SubIdx && 11.8M MO.isDef()1.74M ) |
1517 | 849k | MO.setIsUndef(!Reads); |
1518 | 11.8M | |
1519 | 11.8M | // A subreg use of a partially undef (super) register may be a complete |
1520 | 11.8M | // undef use now and then has to be marked that way. |
1521 | 11.8M | if (SubIdx != 0 && 11.8M MO.isUse()1.74M && MRI->shouldTrackSubRegLiveness(DstReg)893k ) { |
1522 | 70.3k | if (!DstInt->hasSubRanges()70.3k ) { |
1523 | 0 | BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); |
1524 | 0 | LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg); |
1525 | 0 | DstInt->createSubRangeFrom(Allocator, Mask, *DstInt); |
1526 | 0 | } |
1527 | 70.3k | SlotIndex MIIdx = UseMI->isDebugValue() |
1528 | 2 | ? LIS->getSlotIndexes()->getIndexBefore(*UseMI) |
1529 | 70.3k | : LIS->getInstructionIndex(*UseMI); |
1530 | 70.3k | SlotIndex UseIdx = MIIdx.getRegSlot(true); |
1531 | 70.3k | addUndefFlag(*DstInt, UseIdx, MO, SubIdx); |
1532 | 70.3k | } |
1533 | 11.8M | |
1534 | 11.8M | if (DstIsPhys) |
1535 | 1.20M | MO.substPhysReg(DstReg, *TRI); |
1536 | 11.8M | else |
1537 | 10.5M | MO.substVirtReg(DstReg, SubIdx, *TRI); |
1538 | 11.8M | } |
1539 | 11.5M | |
1540 | 11.5M | DEBUG({ |
1541 | 11.5M | dbgs() << "\t\tupdated: "; |
1542 | 11.5M | if (!UseMI->isDebugValue()) |
1543 | 11.5M | dbgs() << LIS->getInstructionIndex(*UseMI) << "\t"; |
1544 | 11.5M | dbgs() << *UseMI; |
1545 | 11.5M | }); |
1546 | 11.5M | } |
1547 | 6.85M | } |
1548 | | |
1549 | 11.7M | bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { |
1550 | 11.7M | // Always join simple intervals that are defined by a single copy from a |
1551 | 11.7M | // reserved register. This doesn't increase register pressure, so it is |
1552 | 11.7M | // always beneficial. |
1553 | 11.7M | if (!MRI->isReserved(CP.getDstReg())11.7M ) { |
1554 | 10.5M | DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); |
1555 | 10.5M | return false; |
1556 | 10.5M | } |
1557 | 1.14M | |
1558 | 1.14M | LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); |
1559 | 1.14M | if (JoinVInt.containsOneValue()) |
1560 | 1.03M | return true; |
1561 | 117k | |
1562 | 117k | DEBUG117k (dbgs() << "\tCannot join complex intervals into reserved register.\n"); |
1563 | 117k | return false; |
1564 | 117k | } |
1565 | | |
1566 | 18.5M | bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { |
1567 | 18.5M | Again = false; |
1568 | 18.5M | DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI); |
1569 | 18.5M | |
1570 | 18.5M | CoalescerPair CP(*TRI); |
1571 | 18.5M | if (!CP.setRegisters(CopyMI)18.5M ) { |
1572 | 524k | DEBUG(dbgs() << "\tNot coalescable.\n"); |
1573 | 524k | return false; |
1574 | 524k | } |
1575 | 18.0M | |
1576 | 18.0M | if (18.0M CP.getNewRC()18.0M ) { |
1577 | 6.25M | auto SrcRC = MRI->getRegClass(CP.getSrcReg()); |
1578 | 6.25M | auto DstRC = MRI->getRegClass(CP.getDstReg()); |
1579 | 6.25M | unsigned SrcIdx = CP.getSrcIdx(); |
1580 | 6.25M | unsigned DstIdx = CP.getDstIdx(); |
1581 | 6.25M | if (CP.isFlipped()6.25M ) { |
1582 | 434k | std::swap(SrcIdx, DstIdx); |
1583 | 434k | std::swap(SrcRC, DstRC); |
1584 | 434k | } |
1585 | 6.25M | if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx, |
1586 | 6.25M | CP.getNewRC())) { |
1587 | 920 | DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n"); |
1588 | 920 | return false; |
1589 | 920 | } |
1590 | 18.0M | } |
1591 | 18.0M | |
1592 | 18.0M | // Dead code elimination. This really should be handled by MachineDCE, but |
1593 | 18.0M | // sometimes dead copies slip through, and we can't generate invalid live |
1594 | 18.0M | // ranges. |
1595 | 18.0M | if (18.0M !CP.isPhys() && 18.0M CopyMI->allDefsAreDead()6.25M ) { |
1596 | 5.30k | DEBUG(dbgs() << "\tCopy is dead.\n"); |
1597 | 5.30k | DeadDefs.push_back(CopyMI); |
1598 | 5.30k | eliminateDeadDefs(); |
1599 | 5.30k | return true; |
1600 | 5.30k | } |
1601 | 17.9M | |
1602 | 17.9M | // Eliminate undefs. |
1603 | 17.9M | if (17.9M !CP.isPhys() && 17.9M eliminateUndefCopy(CopyMI)6.24M ) { |
1604 | 1 | deleteInstr(CopyMI); |
1605 | 1 | return false; // Not coalescable. |
1606 | 1 | } |
1607 | 17.9M | |
1608 | 17.9M | // Coalesced copies are normally removed immediately, but transformations |
1609 | 17.9M | // like removeCopyByCommutingDef() can inadvertently create identity copies. |
1610 | 17.9M | // When that happens, just join the values and remove the copy. |
1611 | 17.9M | if (17.9M CP.getSrcReg() == CP.getDstReg()17.9M ) { |
1612 | 0 | LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); |
1613 | 0 | DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); |
1614 | 0 | const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI); |
1615 | 0 | LiveQueryResult LRQ = LI.Query(CopyIdx); |
1616 | 0 | if (VNInfo *DefVNI0 = LRQ.valueDefined()) { |
1617 | 0 | VNInfo *ReadVNI = LRQ.valueIn(); |
1618 | 0 | assert(ReadVNI && "No value before copy and no <undef> flag."); |
1619 | 0 | assert(ReadVNI != DefVNI && "Cannot read and define the same value."); |
1620 | 0 | LI.MergeValueNumberInto(DefVNI, ReadVNI); |
1621 | 0 |
|
1622 | 0 | // Process subregister liveranges. |
1623 | 0 | for (LiveInterval::SubRange &S : LI.subranges()) { |
1624 | 0 | LiveQueryResult SLRQ = S.Query(CopyIdx); |
1625 | 0 | if (VNInfo *SDefVNI0 = SLRQ.valueDefined()) { |
1626 | 0 | VNInfo *SReadVNI = SLRQ.valueIn(); |
1627 | 0 | S.MergeValueNumberInto(SDefVNI, SReadVNI); |
1628 | 0 | } |
1629 | 0 | } |
1630 | 0 | DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); |
1631 | 0 | } |
1632 | 0 | deleteInstr(CopyMI); |
1633 | 0 | return true; |
1634 | 0 | } |
1635 | 17.9M | |
1636 | 17.9M | // Enforce policies. |
1637 | 17.9M | if (17.9M CP.isPhys()17.9M ) { |
1638 | 11.7M | DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI) |
1639 | 11.7M | << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) |
1640 | 11.7M | << '\n'); |
1641 | 11.7M | if (!canJoinPhys(CP)11.7M ) { |
1642 | 10.7M | // Before giving up coalescing, if definition of source is defined by |
1643 | 10.7M | // trivial computation, try rematerializing it. |
1644 | 10.7M | bool IsDefCopy; |
1645 | 10.7M | if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) |
1646 | 690k | return true; |
1647 | 10.0M | if (10.0M IsDefCopy10.0M ) |
1648 | 4.72M | Again = true; // May be possible to coalesce later. |
1649 | 10.7M | return false; |
1650 | 10.7M | } |
1651 | 0 | } else { |
1652 | 6.24M | // When possible, let DstReg be the larger interval. |
1653 | 6.24M | if (!CP.isPartial() && 6.24M LIS->getInterval(CP.getSrcReg()).size() > |
1654 | 4.92M | LIS->getInterval(CP.getDstReg()).size()) |
1655 | 1.49M | CP.flip(); |
1656 | 6.24M | |
1657 | 6.24M | DEBUG({ |
1658 | 6.24M | dbgs() << "\tConsidering merging to " |
1659 | 6.24M | << TRI->getRegClassName(CP.getNewRC()) << " with "; |
1660 | 6.24M | if (CP.getDstIdx() && CP.getSrcIdx()) |
1661 | 6.24M | dbgs() << PrintReg(CP.getDstReg()) << " in " |
1662 | 6.24M | << TRI->getSubRegIndexName(CP.getDstIdx()) << " and " |
1663 | 6.24M | << PrintReg(CP.getSrcReg()) << " in " |
1664 | 6.24M | << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n'; |
1665 | 6.24M | else |
1666 | 6.24M | dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in " |
1667 | 6.24M | << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'; |
1668 | 6.24M | }); |
1669 | 6.24M | } |
1670 | 17.9M | |
1671 | 7.28M | ShrinkMask = LaneBitmask::getNone(); |
1672 | 7.28M | ShrinkMainRange = false; |
1673 | 7.28M | |
1674 | 7.28M | // Okay, attempt to join these two intervals. On failure, this returns false. |
1675 | 7.28M | // Otherwise, if one of the intervals being joined is a physreg, this method |
1676 | 7.28M | // always canonicalizes DstInt to be it. The output "SrcInt" will not have |
1677 | 7.28M | // been modified, so we can use this information below to update aliases. |
1678 | 7.28M | if (!joinIntervals(CP)7.28M ) { |
1679 | 459k | // Coalescing failed. |
1680 | 459k | |
1681 | 459k | // If definition of source is defined by trivial computation, try |
1682 | 459k | // rematerializing it. |
1683 | 459k | bool IsDefCopy; |
1684 | 459k | if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) |
1685 | 33.6k | return true; |
1686 | 426k | |
1687 | 426k | // If we can eliminate the copy without merging the live segments, do so |
1688 | 426k | // now. |
1689 | 426k | if (426k !CP.isPartial() && 426k !CP.isPhys()392k ) { |
1690 | 390k | if (adjustCopiesBackFrom(CP, CopyMI) || |
1691 | 390k | removeCopyByCommutingDef(CP, CopyMI)390k ) { |
1692 | 429 | deleteInstr(CopyMI); |
1693 | 429 | DEBUG(dbgs() << "\tTrivial!\n"); |
1694 | 429 | return true; |
1695 | 429 | } |
1696 | 425k | } |
1697 | 425k | |
1698 | 425k | // Try and see if we can partially eliminate the copy by moving the copy to |
1699 | 425k | // its predecessor. |
1700 | 425k | if (425k !CP.isPartial() && 425k !CP.isPhys()391k ) |
1701 | 390k | if (390k removePartialRedundancy(CP, *CopyMI)390k ) |
1702 | 487 | return true; |
1703 | 425k | |
1704 | 425k | // Otherwise, we are unable to join the intervals. |
1705 | 425k | DEBUG425k (dbgs() << "\tInterference!\n"); |
1706 | 425k | Again = true; // May be possible to coalesce later. |
1707 | 425k | return false; |
1708 | 425k | } |
1709 | 6.82M | |
1710 | 6.82M | // Coalescing to a virtual register that is of a sub-register class of the |
1711 | 6.82M | // other. Make sure the resulting register is set to the right register class. |
1712 | 6.82M | if (6.82M CP.isCrossClass()6.82M ) { |
1713 | 3.45M | ++numCrossRCs; |
1714 | 3.45M | MRI->setRegClass(CP.getDstReg(), CP.getNewRC()); |
1715 | 3.45M | } |
1716 | 6.82M | |
1717 | 6.82M | // Removing sub-register copies can ease the register class constraints. |
1718 | 6.82M | // Make sure we attempt to inflate the register class of DstReg. |
1719 | 6.82M | if (!CP.isPhys() && 6.82M RegClassInfo.isProperSubClass(CP.getNewRC())5.79M ) |
1720 | 15.0k | InflateRegs.push_back(CP.getDstReg()); |
1721 | 6.82M | |
1722 | 6.82M | // CopyMI has been erased by joinIntervals at this point. Remove it from |
1723 | 6.82M | // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back |
1724 | 6.82M | // to the work list. This keeps ErasedInstrs from growing needlessly. |
1725 | 6.82M | ErasedInstrs.erase(CopyMI); |
1726 | 6.82M | |
1727 | 6.82M | // Rewrite all SrcReg operands to DstReg. |
1728 | 6.82M | // Also update DstReg operands to include DstIdx if it is set. |
1729 | 6.82M | if (CP.getDstIdx()) |
1730 | 291 | updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); |
1731 | 6.82M | updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); |
1732 | 6.82M | |
1733 | 6.82M | // Shrink subregister ranges if necessary. |
1734 | 6.82M | if (ShrinkMask.any()6.82M ) { |
1735 | 28 | LiveInterval &LI = LIS->getInterval(CP.getDstReg()); |
1736 | 74 | for (LiveInterval::SubRange &S : LI.subranges()) { |
1737 | 74 | if ((S.LaneMask & ShrinkMask).none()) |
1738 | 34 | continue; |
1739 | 40 | DEBUG40 (dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask) |
1740 | 40 | << ")\n"); |
1741 | 40 | LIS->shrinkToUses(S, LI.reg); |
1742 | 40 | } |
1743 | 28 | LI.removeEmptySubRanges(); |
1744 | 28 | } |
1745 | 6.82M | if (ShrinkMainRange6.82M ) { |
1746 | 0 | LiveInterval &LI = LIS->getInterval(CP.getDstReg()); |
1747 | 0 | shrinkToUses(&LI); |
1748 | 0 | } |
1749 | 6.82M | |
1750 | 6.82M | // SrcReg is guaranteed to be the register whose live interval that is |
1751 | 6.82M | // being merged. |
1752 | 6.82M | LIS->removeInterval(CP.getSrcReg()); |
1753 | 6.82M | |
1754 | 6.82M | // Update regalloc hint. |
1755 | 6.82M | TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); |
1756 | 6.82M | |
1757 | 6.82M | DEBUG({ |
1758 | 18.5M | dbgs() << "\tSuccess: " << PrintReg(CP.getSrcReg(), TRI, CP.getSrcIdx()) |
1759 | 18.5M | << " -> " << PrintReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n'; |
1760 | 18.5M | dbgs() << "\tResult = "; |
1761 | 18.5M | if (CP.isPhys()) |
1762 | 18.5M | dbgs() << PrintReg(CP.getDstReg(), TRI); |
1763 | 18.5M | else |
1764 | 18.5M | dbgs() << LIS->getInterval(CP.getDstReg()); |
1765 | 18.5M | dbgs() << '\n'; |
1766 | 18.5M | }); |
1767 | 18.5M | |
1768 | 18.5M | ++numJoins; |
1769 | 18.5M | return true; |
1770 | 18.5M | } |
1771 | | |
1772 | 1.03M | bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { |
1773 | 1.03M | unsigned DstReg = CP.getDstReg(); |
1774 | 1.03M | unsigned SrcReg = CP.getSrcReg(); |
1775 | 1.03M | assert(CP.isPhys() && "Must be a physreg copy"); |
1776 | 1.03M | assert(MRI->isReserved(DstReg) && "Not a reserved register"); |
1777 | 1.03M | LiveInterval &RHS = LIS->getInterval(SrcReg); |
1778 | 1.03M | DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); |
1779 | 1.03M | |
1780 | 1.03M | assert(RHS.containsOneValue() && "Invalid join with reserved register"); |
1781 | 1.03M | |
1782 | 1.03M | // Optimization for reserved registers like ESP. We can only merge with a |
1783 | 1.03M | // reserved physreg if RHS has a single value that is a copy of DstReg. |
1784 | 1.03M | // The live range of the reserved register will look like a set of dead defs |
1785 | 1.03M | // - we don't properly track the live range of reserved registers. |
1786 | 1.03M | |
1787 | 1.03M | // Deny any overlapping intervals. This depends on all the reserved |
1788 | 1.03M | // register live ranges to look like dead defs. |
1789 | 1.03M | if (!MRI->isConstantPhysReg(DstReg)1.03M ) { |
1790 | 141k | for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid()141k ; ++UI69.9k ) { |
1791 | 71.2k | // Abort if not all the regunits are reserved. |
1792 | 142k | for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid()142k ; ++RI71.2k ) { |
1793 | 71.2k | if (!MRI->isReserved(*RI)) |
1794 | 22 | return false; |
1795 | 71.2k | } |
1796 | 71.2k | if (71.2k RHS.overlaps(LIS->getRegUnit(*UI))71.2k ) { |
1797 | 1.23k | DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n'); |
1798 | 1.23k | return false; |
1799 | 1.23k | } |
1800 | 71.2k | } |
1801 | 71.2k | |
1802 | 71.2k | // We must also check for overlaps with regmask clobbers. |
1803 | 69.9k | BitVector RegMaskUsable; |
1804 | 69.9k | if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) && |
1805 | 69.9k | !RegMaskUsable.test(DstReg)352 ) { |
1806 | 348 | DEBUG(dbgs() << "\t\tRegMask interference\n"); |
1807 | 348 | return false; |
1808 | 348 | } |
1809 | 1.02M | } |
1810 | 1.02M | |
1811 | 1.02M | // Skip any value computations, we are not adding new values to the |
1812 | 1.02M | // reserved register. Also skip merging the live ranges, the reserved |
1813 | 1.02M | // register live range doesn't need to be accurate as long as all the |
1814 | 1.02M | // defs are there. |
1815 | 1.02M | |
1816 | 1.02M | // Delete the identity copy. |
1817 | 1.02M | MachineInstr *CopyMI; |
1818 | 1.02M | if (CP.isFlipped()1.02M ) { |
1819 | 1.02M | // Physreg is copied into vreg |
1820 | 1.02M | // %vregY = COPY %X |
1821 | 1.02M | // ... //< no other def of %X here |
1822 | 1.02M | // use %vregY |
1823 | 1.02M | // => |
1824 | 1.02M | // ... |
1825 | 1.02M | // use %X |
1826 | 1.02M | CopyMI = MRI->getVRegDef(SrcReg); |
1827 | 1.02M | } else { |
1828 | 55 | // VReg is copied into physreg: |
1829 | 55 | // %vregX = def |
1830 | 55 | // ... //< no other def or use of %Y here |
1831 | 55 | // %Y = COPY %vregX |
1832 | 55 | // => |
1833 | 55 | // %Y = def |
1834 | 55 | // ... |
1835 | 55 | if (!MRI->hasOneNonDBGUse(SrcReg)55 ) { |
1836 | 8 | DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); |
1837 | 8 | return false; |
1838 | 8 | } |
1839 | 47 | |
1840 | 47 | if (47 !LIS->intervalIsInOneMBB(RHS)47 ) { |
1841 | 9 | DEBUG(dbgs() << "\t\tComplex control flow!\n"); |
1842 | 9 | return false; |
1843 | 9 | } |
1844 | 38 | |
1845 | 38 | MachineInstr &DestMI = *MRI->getVRegDef(SrcReg); |
1846 | 38 | CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg); |
1847 | 38 | SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); |
1848 | 38 | SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot(); |
1849 | 38 | |
1850 | 38 | if (!MRI->isConstantPhysReg(DstReg)38 ) { |
1851 | 37 | // We checked above that there are no interfering defs of the physical |
1852 | 37 | // register. However, for this case, where we intend to move up the def of |
1853 | 37 | // the physical register, we also need to check for interfering uses. |
1854 | 37 | SlotIndexes *Indexes = LIS->getSlotIndexes(); |
1855 | 37 | for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx); |
1856 | 69 | SI != CopyRegIdx69 ; SI = Indexes->getNextNonNullIndex(SI)32 ) { |
1857 | 36 | MachineInstr *MI = LIS->getInstructionFromIndex(SI); |
1858 | 36 | if (MI->readsRegister(DstReg, TRI)36 ) { |
1859 | 4 | DEBUG(dbgs() << "\t\tInterference (read): " << *MI); |
1860 | 4 | return false; |
1861 | 4 | } |
1862 | 36 | } |
1863 | 37 | } |
1864 | 38 | |
1865 | 38 | // We're going to remove the copy which defines a physical reserved |
1866 | 38 | // register, so remove its valno, etc. |
1867 | 34 | DEBUG34 (dbgs() << "\t\tRemoving phys reg def of " << PrintReg(DstReg, TRI) |
1868 | 34 | << " at " << CopyRegIdx << "\n"); |
1869 | 34 | |
1870 | 34 | LIS->removePhysRegDefAt(DstReg, CopyRegIdx); |
1871 | 34 | // Create a new dead def at the new def location. |
1872 | 68 | for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid()68 ; ++UI34 ) { |
1873 | 34 | LiveRange &LR = LIS->getRegUnit(*UI); |
1874 | 34 | LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator()); |
1875 | 34 | } |
1876 | 55 | } |
1877 | 1.02M | |
1878 | 1.02M | deleteInstr(CopyMI); |
1879 | 1.02M | |
1880 | 1.02M | // We don't track kills for reserved registers. |
1881 | 1.02M | MRI->clearKillFlags(CP.getSrcReg()); |
1882 | 1.02M | |
1883 | 1.02M | return true; |
1884 | 1.03M | } |
1885 | | |
1886 | | //===----------------------------------------------------------------------===// |
1887 | | // Interference checking and interval joining |
1888 | | //===----------------------------------------------------------------------===// |
1889 | | // |
1890 | | // In the easiest case, the two live ranges being joined are disjoint, and |
1891 | | // there is no interference to consider. It is quite common, though, to have |
1892 | | // overlapping live ranges, and we need to check if the interference can be |
1893 | | // resolved. |
1894 | | // |
1895 | | // The live range of a single SSA value forms a sub-tree of the dominator tree. |
1896 | | // This means that two SSA values overlap if and only if the def of one value |
1897 | | // is contained in the live range of the other value. As a special case, the |
1898 | | // overlapping values can be defined at the same index. |
1899 | | // |
1900 | | // The interference from an overlapping def can be resolved in these cases: |
1901 | | // |
1902 | | // 1. Coalescable copies. The value is defined by a copy that would become an |
1903 | | // identity copy after joining SrcReg and DstReg. The copy instruction will |
1904 | | // be removed, and the value will be merged with the source value. |
1905 | | // |
1906 | | // There can be several copies back and forth, causing many values to be |
1907 | | // merged into one. We compute a list of ultimate values in the joined live |
1908 | | // range as well as a mappings from the old value numbers. |
1909 | | // |
1910 | | // 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI |
1911 | | // predecessors have a live out value. It doesn't cause real interference, |
1912 | | // and can be merged into the value it overlaps. Like a coalescable copy, it |
1913 | | // can be erased after joining. |
1914 | | // |
1915 | | // 3. Copy of external value. The overlapping def may be a copy of a value that |
1916 | | // is already in the other register. This is like a coalescable copy, but |
1917 | | // the live range of the source register must be trimmed after erasing the |
1918 | | // copy instruction: |
1919 | | // |
1920 | | // %src = COPY %ext |
1921 | | // %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext. |
1922 | | // |
1923 | | // 4. Clobbering undefined lanes. Vector registers are sometimes built by |
1924 | | // defining one lane at a time: |
1925 | | // |
1926 | | // %dst:ssub0<def,read-undef> = FOO |
1927 | | // %src = BAR |
1928 | | // %dst:ssub1<def> = COPY %src |
1929 | | // |
1930 | | // The live range of %src overlaps the %dst value defined by FOO, but |
1931 | | // merging %src into %dst:ssub1 is only going to clobber the ssub1 lane |
1932 | | // which was undef anyway. |
1933 | | // |
1934 | | // The value mapping is more complicated in this case. The final live range |
1935 | | // will have different value numbers for both FOO and BAR, but there is no |
1936 | | // simple mapping from old to new values. It may even be necessary to add |
1937 | | // new PHI values. |
1938 | | // |
1939 | | // 5. Clobbering dead lanes. A def may clobber a lane of a vector register that |
1940 | | // is live, but never read. This can happen because we don't compute |
1941 | | // individual live ranges per lane. |
1942 | | // |
1943 | | // %dst<def> = FOO |
1944 | | // %src = BAR |
1945 | | // %dst:ssub1<def> = COPY %src |
1946 | | // |
1947 | | // This kind of interference is only resolved locally. If the clobbered |
1948 | | // lane value escapes the block, the join is aborted. |
1949 | | |
1950 | | namespace { |
1951 | | |
1952 | | /// Track information about values in a single virtual register about to be |
1953 | | /// joined. Objects of this class are always created in pairs - one for each |
1954 | | /// side of the CoalescerPair (or one for each lane of a side of the coalescer |
1955 | | /// pair) |
1956 | | class JoinVals { |
1957 | | /// Live range we work on. |
1958 | | LiveRange &LR; |
1959 | | |
1960 | | /// (Main) register we work on. |
1961 | | const unsigned Reg; |
1962 | | |
1963 | | /// Reg (and therefore the values in this liverange) will end up as |
1964 | | /// subregister SubIdx in the coalesced register. Either CP.DstIdx or |
1965 | | /// CP.SrcIdx. |
1966 | | const unsigned SubIdx; |
1967 | | |
1968 | | /// The LaneMask that this liverange will occupy the coalesced register. May |
1969 | | /// be smaller than the lanemask produced by SubIdx when merging subranges. |
1970 | | const LaneBitmask LaneMask; |
1971 | | |
1972 | | /// This is true when joining sub register ranges, false when joining main |
1973 | | /// ranges. |
1974 | | const bool SubRangeJoin; |
1975 | | |
1976 | | /// Whether the current LiveInterval tracks subregister liveness. |
1977 | | const bool TrackSubRegLiveness; |
1978 | | |
1979 | | /// Values that will be present in the final live range. |
1980 | | SmallVectorImpl<VNInfo*> &NewVNInfo; |
1981 | | |
1982 | | const CoalescerPair &CP; |
1983 | | LiveIntervals *LIS; |
1984 | | SlotIndexes *Indexes; |
1985 | | const TargetRegisterInfo *TRI; |
1986 | | |
1987 | | /// Value number assignments. Maps value numbers in LI to entries in |
1988 | | /// NewVNInfo. This is suitable for passing to LiveInterval::join(). |
1989 | | SmallVector<int, 8> Assignments; |
1990 | | |
1991 | | /// Conflict resolution for overlapping values. |
1992 | | enum ConflictResolution { |
1993 | | /// No overlap, simply keep this value. |
1994 | | CR_Keep, |
1995 | | |
1996 | | /// Merge this value into OtherVNI and erase the defining instruction. |
1997 | | /// Used for IMPLICIT_DEF, coalescable copies, and copies from external |
1998 | | /// values. |
1999 | | CR_Erase, |
2000 | | |
2001 | | /// Merge this value into OtherVNI but keep the defining instruction. |
2002 | | /// This is for the special case where OtherVNI is defined by the same |
2003 | | /// instruction. |
2004 | | CR_Merge, |
2005 | | |
2006 | | /// Keep this value, and have it replace OtherVNI where possible. This |
2007 | | /// complicates value mapping since OtherVNI maps to two different values |
2008 | | /// before and after this def. |
2009 | | /// Used when clobbering undefined or dead lanes. |
2010 | | CR_Replace, |
2011 | | |
2012 | | /// Unresolved conflict. Visit later when all values have been mapped. |
2013 | | CR_Unresolved, |
2014 | | |
2015 | | /// Unresolvable conflict. Abort the join. |
2016 | | CR_Impossible |
2017 | | }; |
2018 | | |
2019 | | /// Per-value info for LI. The lane bit masks are all relative to the final |
2020 | | /// joined register, so they can be compared directly between SrcReg and |
2021 | | /// DstReg. |
2022 | | struct Val { |
2023 | | ConflictResolution Resolution = CR_Keep; |
2024 | | |
2025 | | /// Lanes written by this def, 0 for unanalyzed values. |
2026 | | LaneBitmask WriteLanes; |
2027 | | |
2028 | | /// Lanes with defined values in this register. Other lanes are undef and |
2029 | | /// safe to clobber. |
2030 | | LaneBitmask ValidLanes; |
2031 | | |
2032 | | /// Value in LI being redefined by this def. |
2033 | | VNInfo *RedefVNI = nullptr; |
2034 | | |
2035 | | /// Value in the other live range that overlaps this def, if any. |
2036 | | VNInfo *OtherVNI = nullptr; |
2037 | | |
2038 | | /// Is this value an IMPLICIT_DEF that can be erased? |
2039 | | /// |
2040 | | /// IMPLICIT_DEF values should only exist at the end of a basic block that |
2041 | | /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be |
2042 | | /// safely erased if they are overlapping a live value in the other live |
2043 | | /// interval. |
2044 | | /// |
2045 | | /// Weird control flow graphs and incomplete PHI handling in |
2046 | | /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with |
2047 | | /// longer live ranges. Such IMPLICIT_DEF values should be treated like |
2048 | | /// normal values. |
2049 | | bool ErasableImplicitDef = false; |
2050 | | |
2051 | | /// True when the live range of this value will be pruned because of an |
2052 | | /// overlapping CR_Replace value in the other live range. |
2053 | | bool Pruned = false; |
2054 | | |
2055 | | /// True once Pruned above has been computed. |
2056 | | bool PrunedComputed = false; |
2057 | | |
2058 | 12.8M | Val() = default; |
2059 | | |
2060 | 38.3M | bool isAnalyzed() const { return WriteLanes.any(); } |
2061 | | }; |
2062 | | |
2063 | | /// One entry per value number in LI. |
2064 | | SmallVector<Val, 8> Vals; |
2065 | | |
2066 | | /// Compute the bitmask of lanes actually written by DefMI. |
2067 | | /// Set Redef if there are any partial register definitions that depend on the |
2068 | | /// previous value of the register. |
2069 | | LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; |
2070 | | |
2071 | | /// Find the ultimate value that VNI was copied from. |
2072 | | std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const; |
2073 | | |
2074 | | bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const; |
2075 | | |
2076 | | /// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. |
2077 | | /// Return a conflict resolution when possible, but leave the hard cases as |
2078 | | /// CR_Unresolved. |
2079 | | /// Recursively calls computeAssignment() on this and Other, guaranteeing that |
2080 | | /// both OtherVNI and RedefVNI have been analyzed and mapped before returning. |
2081 | | /// The recursion always goes upwards in the dominator tree, making loops |
2082 | | /// impossible. |
2083 | | ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other); |
2084 | | |
2085 | | /// Compute the value assignment for ValNo in RI. |
2086 | | /// This may be called recursively by analyzeValue(), but never for a ValNo on |
2087 | | /// the stack. |
2088 | | void computeAssignment(unsigned ValNo, JoinVals &Other); |
2089 | | |
2090 | | /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute |
2091 | | /// the extent of the tainted lanes in the block. |
2092 | | /// |
2093 | | /// Multiple values in Other.LR can be affected since partial redefinitions |
2094 | | /// can preserve previously tainted lanes. |
2095 | | /// |
2096 | | /// 1 %dst = VLOAD <-- Define all lanes in %dst |
2097 | | /// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0 |
2098 | | /// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0 |
2099 | | /// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read |
2100 | | /// |
2101 | | /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes) |
2102 | | /// entry to TaintedVals. |
2103 | | /// |
2104 | | /// Returns false if the tainted lanes extend beyond the basic block. |
2105 | | bool |
2106 | | taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, |
2107 | | SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent); |
2108 | | |
2109 | | /// Return true if MI uses any of the given Lanes from Reg. |
2110 | | /// This does not include partial redefinitions of Reg. |
2111 | | bool usesLanes(const MachineInstr &MI, unsigned, unsigned, LaneBitmask) const; |
2112 | | |
2113 | | /// Determine if ValNo is a copy of a value number in LR or Other.LR that will |
2114 | | /// be pruned: |
2115 | | /// |
2116 | | /// %dst = COPY %src |
2117 | | /// %src = COPY %dst <-- This value to be pruned. |
2118 | | /// %dst = COPY %src <-- This value is a copy of a pruned value. |
2119 | | bool isPrunedValue(unsigned ValNo, JoinVals &Other); |
2120 | | |
2121 | | public: |
2122 | | JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask, |
2123 | | SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp, |
2124 | | LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin, |
2125 | | bool TrackSubRegLiveness) |
2126 | | : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask), |
2127 | | SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness), |
2128 | | NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()), |
2129 | 12.8M | TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) {} |
2130 | | |
2131 | | /// Analyze defs in LR and compute a value mapping in NewVNInfo. |
2132 | | /// Returns false if any conflicts were impossible to resolve. |
2133 | | bool mapValues(JoinVals &Other); |
2134 | | |
2135 | | /// Try to resolve conflicts that require all values to be mapped. |
2136 | | /// Returns false if any conflicts were impossible to resolve. |
2137 | | bool resolveConflicts(JoinVals &Other); |
2138 | | |
2139 | | /// Prune the live range of values in Other.LR where they would conflict with |
2140 | | /// CR_Replace values in LR. Collect end points for restoring the live range |
2141 | | /// after joining. |
2142 | | void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints, |
2143 | | bool changeInstrs); |
2144 | | |
2145 | | /// Removes subranges starting at copies that get removed. This sometimes |
2146 | | /// happens when undefined subranges are copied around. These ranges contain |
2147 | | /// no useful information and can be removed. |
2148 | | void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask); |
2149 | | |
2150 | | /// Pruning values in subranges can lead to removing segments in these |
2151 | | /// subranges started by IMPLICIT_DEFs. The corresponding segments in |
2152 | | /// the main range also need to be removed. This function will mark |
2153 | | /// the corresponding values in the main range as pruned, so that |
2154 | | /// eraseInstrs can do the final cleanup. |
2155 | | /// The parameter @p LI must be the interval whose main range is the |
2156 | | /// live range LR. |
2157 | | void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange); |
2158 | | |
2159 | | /// Erase any machine instructions that have been coalesced away. |
2160 | | /// Add erased instructions to ErasedInstrs. |
2161 | | /// Add foreign virtual registers to ShrinkRegs if their live range ended at |
2162 | | /// the erased instrs. |
2163 | | void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, |
2164 | | SmallVectorImpl<unsigned> &ShrinkRegs, |
2165 | | LiveInterval *LI = nullptr); |
2166 | | |
2167 | | /// Remove liverange defs at places where implicit defs will be removed. |
2168 | | void removeImplicitDefs(); |
2169 | | |
2170 | | /// Get the value assignments suitable for passing to LiveInterval::join. |
2171 | 11.8M | const int *getAssignments() const { return Assignments.data(); } |
2172 | | }; |
2173 | | |
2174 | | } // end anonymous namespace |
2175 | | |
2176 | | LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) |
2177 | 24.7M | const { |
2178 | 24.7M | LaneBitmask L; |
2179 | 59.7M | for (const MachineOperand &MO : DefMI->operands()) { |
2180 | 59.7M | if (!MO.isReg() || 59.7M MO.getReg() != Reg49.5M || !MO.isDef()26.2M ) |
2181 | 34.9M | continue; |
2182 | 24.7M | L |= TRI->getSubRegIndexLaneMask( |
2183 | 24.7M | TRI->composeSubRegIndices(SubIdx, MO.getSubReg())); |
2184 | 24.7M | if (MO.readsReg()) |
2185 | 350k | Redef = true; |
2186 | 59.7M | } |
2187 | 24.7M | return L; |
2188 | 24.7M | } |
2189 | | |
2190 | | std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain( |
2191 | 305k | const VNInfo *VNI) const { |
2192 | 305k | unsigned Reg = this->Reg; |
2193 | 305k | |
2194 | 514k | while (!VNI->isPHIDef()514k ) { |
2195 | 414k | SlotIndex Def = VNI->def; |
2196 | 414k | MachineInstr *MI = Indexes->getInstructionFromIndex(Def); |
2197 | 414k | assert(MI && "No defining instruction"); |
2198 | 414k | if (!MI->isFullCopy()) |
2199 | 99.8k | return std::make_pair(VNI, Reg); |
2200 | 314k | unsigned SrcReg = MI->getOperand(1).getReg(); |
2201 | 314k | if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) |
2202 | 105k | return std::make_pair(VNI, Reg); |
2203 | 208k | |
2204 | 208k | const LiveInterval &LI = LIS->getInterval(SrcReg); |
2205 | 208k | const VNInfo *ValueIn; |
2206 | 208k | // No subrange involved. |
2207 | 208k | if (!SubRangeJoin || 208k !LI.hasSubRanges()2 ) { |
2208 | 208k | LiveQueryResult LRQ = LI.Query(Def); |
2209 | 208k | ValueIn = LRQ.valueIn(); |
2210 | 208k | } else { |
2211 | 1 | // Query subranges. Pick the first matching one. |
2212 | 1 | ValueIn = nullptr; |
2213 | 2 | for (const LiveInterval::SubRange &S : LI.subranges()) { |
2214 | 2 | // Transform lanemask to a mask in the joined live interval. |
2215 | 2 | LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask); |
2216 | 2 | if ((SMask & LaneMask).none()) |
2217 | 1 | continue; |
2218 | 1 | LiveQueryResult LRQ = S.Query(Def); |
2219 | 1 | ValueIn = LRQ.valueIn(); |
2220 | 1 | break; |
2221 | 1 | } |
2222 | 1 | } |
2223 | 208k | if (ValueIn == nullptr) |
2224 | 0 | break; |
2225 | 208k | VNI = ValueIn; |
2226 | 208k | Reg = SrcReg; |
2227 | 208k | } |
2228 | 99.8k | return std::make_pair(VNI, Reg); |
2229 | 305k | } |
2230 | | |
2231 | | bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1, |
2232 | 155k | const JoinVals &Other) const { |
2233 | 155k | const VNInfo *Orig0; |
2234 | 155k | unsigned Reg0; |
2235 | 155k | std::tie(Orig0, Reg0) = followCopyChain(Value0); |
2236 | 155k | if (Orig0 == Value1) |
2237 | 5.39k | return true; |
2238 | 149k | |
2239 | 149k | const VNInfo *Orig1; |
2240 | 149k | unsigned Reg1; |
2241 | 149k | std::tie(Orig1, Reg1) = Other.followCopyChain(Value1); |
2242 | 149k | |
2243 | 149k | // The values are equal if they are defined at the same place and use the |
2244 | 149k | // same register. Note that we cannot compare VNInfos directly as some of |
2245 | 149k | // them might be from a copy created in mergeSubRangeInto() while the other |
2246 | 149k | // is from the original LiveInterval. |
2247 | 15.5k | return Orig0->def == Orig1->def && Reg0 == Reg1; |
2248 | 155k | } |
2249 | | |
2250 | | JoinVals::ConflictResolution |
2251 | 30.4M | JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { |
2252 | 30.4M | Val &V = Vals[ValNo]; |
2253 | 30.4M | assert(!V.isAnalyzed() && "Value has already been analyzed!"); |
2254 | 30.4M | VNInfo *VNI = LR.getValNumInfo(ValNo); |
2255 | 30.4M | if (VNI->isUnused()30.4M ) { |
2256 | 3.62k | V.WriteLanes = LaneBitmask::getAll(); |
2257 | 3.62k | return CR_Keep; |
2258 | 3.62k | } |
2259 | 30.4M | |
2260 | 30.4M | // Get the instruction defining this value, compute the lanes written. |
2261 | 30.4M | const MachineInstr *DefMI = nullptr; |
2262 | 30.4M | if (VNI->isPHIDef()30.4M ) { |
2263 | 5.37M | // Conservatively assume that all lanes in a PHI are valid. |
2264 | 1.16k | LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0) |
2265 | 5.37M | : TRI->getSubRegIndexLaneMask(SubIdx); |
2266 | 5.37M | V.ValidLanes = V.WriteLanes = Lanes; |
2267 | 30.4M | } else { |
2268 | 25.1M | DefMI = Indexes->getInstructionFromIndex(VNI->def); |
2269 | 25.1M | assert(DefMI != nullptr); |
2270 | 25.1M | if (SubRangeJoin25.1M ) { |
2271 | 316k | // We don't care about the lanes when joining subregister ranges. |
2272 | 316k | V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0); |
2273 | 316k | if (DefMI->isImplicitDef()316k ) { |
2274 | 82 | V.ValidLanes = LaneBitmask::getNone(); |
2275 | 82 | V.ErasableImplicitDef = true; |
2276 | 82 | } |
2277 | 25.1M | } else { |
2278 | 24.7M | bool Redef = false; |
2279 | 24.7M | V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); |
2280 | 24.7M | |
2281 | 24.7M | // If this is a read-modify-write instruction, there may be more valid |
2282 | 24.7M | // lanes than the ones written by this instruction. |
2283 | 24.7M | // This only covers partial redef operands. DefMI may have normal use |
2284 | 24.7M | // operands reading the register. They don't contribute valid lanes. |
2285 | 24.7M | // |
2286 | 24.7M | // This adds ssub1 to the set of valid lanes in %src: |
2287 | 24.7M | // |
2288 | 24.7M | // %src:ssub1<def> = FOO |
2289 | 24.7M | // |
2290 | 24.7M | // This leaves only ssub1 valid, making any other lanes undef: |
2291 | 24.7M | // |
2292 | 24.7M | // %src:ssub1<def,read-undef> = FOO %src:ssub2 |
2293 | 24.7M | // |
2294 | 24.7M | // The <read-undef> flag on the def operand means that old lane values are |
2295 | 24.7M | // not important. |
2296 | 24.7M | if (Redef24.7M ) { |
2297 | 350k | V.RedefVNI = LR.Query(VNI->def).valueIn(); |
2298 | 350k | assert((TrackSubRegLiveness || V.RedefVNI) && |
2299 | 350k | "Instruction is reading nonexistent value"); |
2300 | 350k | if (V.RedefVNI != nullptr350k ) { |
2301 | 350k | computeAssignment(V.RedefVNI->id, Other); |
2302 | 350k | V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; |
2303 | 350k | } |
2304 | 350k | } |
2305 | 24.7M | |
2306 | 24.7M | // An IMPLICIT_DEF writes undef values. |
2307 | 24.7M | if (DefMI->isImplicitDef()24.7M ) { |
2308 | 16.7k | // We normally expect IMPLICIT_DEF values to be live only until the end |
2309 | 16.7k | // of their block. If the value is really live longer and gets pruned in |
2310 | 16.7k | // another block, this flag is cleared again. |
2311 | 16.7k | V.ErasableImplicitDef = true; |
2312 | 16.7k | V.ValidLanes &= ~V.WriteLanes; |
2313 | 16.7k | } |
2314 | 24.7M | } |
2315 | 25.1M | } |
2316 | 30.4M | |
2317 | 30.4M | // Find the value in Other that overlaps VNI->def, if any. |
2318 | 30.4M | LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def); |
2319 | 30.4M | |
2320 | 30.4M | // It is possible that both values are defined by the same instruction, or |
2321 | 30.4M | // the values are PHIs defined in the same block. When that happens, the two |
2322 | 30.4M | // values should be merged into one, but not into any preceding value. |
2323 | 30.4M | // The first value defined or visited gets CR_Keep, the other gets CR_Merge. |
2324 | 30.4M | if (VNInfo *OtherVNI30.4M = OtherLRQ.valueDefined()) { |
2325 | 43.0k | assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ"); |
2326 | 43.0k | |
2327 | 43.0k | // One value stays, the other is merged. Keep the earlier one, or the first |
2328 | 43.0k | // one we see. |
2329 | 43.0k | if (OtherVNI->def < VNI->def) |
2330 | 70 | Other.computeAssignment(OtherVNI->id, *this); |
2331 | 42.9k | else if (42.9k VNI->def < OtherVNI->def && 42.9k OtherLRQ.valueIn()86 ) { |
2332 | 0 | // This is an early-clobber def overlapping a live-in value in the other |
2333 | 0 | // register. Not mergeable. |
2334 | 0 | V.OtherVNI = OtherLRQ.valueIn(); |
2335 | 0 | return CR_Impossible; |
2336 | 0 | } |
2337 | 43.0k | V.OtherVNI = OtherVNI; |
2338 | 43.0k | Val &OtherV = Other.Vals[OtherVNI->id]; |
2339 | 43.0k | // Keep this value, check for conflicts when analyzing OtherVNI. |
2340 | 43.0k | if (!OtherV.isAnalyzed()) |
2341 | 32.0k | return CR_Keep; |
2342 | 11.0k | // Both sides have been analyzed now. |
2343 | 11.0k | // Allow overlapping PHI values. Any real interference would show up in a |
2344 | 11.0k | // predecessor, the PHI itself can't introduce any conflicts. |
2345 | 11.0k | if (11.0k VNI->isPHIDef()11.0k ) |
2346 | 10.8k | return CR_Merge; |
2347 | 195 | if (195 (V.ValidLanes & OtherV.ValidLanes).any()195 ) |
2348 | 195 | // Overlapping lanes can't be resolved. |
2349 | 182 | return CR_Impossible; |
2350 | 195 | else |
2351 | 13 | return CR_Merge; |
2352 | 30.4M | } |
2353 | 30.4M | |
2354 | 30.4M | // No simultaneous def. Is Other live at the def? |
2355 | 30.4M | V.OtherVNI = OtherLRQ.valueIn(); |
2356 | 30.4M | if (!V.OtherVNI) |
2357 | 30.4M | // No overlap, no conflict. |
2358 | 22.6M | return CR_Keep; |
2359 | 7.76M | |
2360 | 30.4M | assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ"); |
2361 | 7.76M | |
2362 | 7.76M | // We have overlapping values, or possibly a kill of Other. |
2363 | 7.76M | // Recursively compute assignments up the dominator tree. |
2364 | 7.76M | Other.computeAssignment(V.OtherVNI->id, *this); |
2365 | 7.76M | Val &OtherV = Other.Vals[V.OtherVNI->id]; |
2366 | 7.76M | |
2367 | 7.76M | // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block. |
2368 | 7.76M | // This shouldn't normally happen, but ProcessImplicitDefs can leave such |
2369 | 7.76M | // IMPLICIT_DEF instructions behind, and there is nothing wrong with it |
2370 | 7.76M | // technically. |
2371 | 7.76M | // |
2372 | 7.76M | // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try |
2373 | 7.76M | // to erase the IMPLICIT_DEF instruction. |
2374 | 7.76M | if (OtherV.ErasableImplicitDef && 7.76M DefMI858 && |
2375 | 7.76M | DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)858 ) { |
2376 | 1 | DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def |
2377 | 1 | << " extends into BB#" << DefMI->getParent()->getNumber() |
2378 | 1 | << ", keeping it.\n"); |
2379 | 1 | OtherV.ErasableImplicitDef = false; |
2380 | 1 | } |
2381 | 7.76M | |
2382 | 7.76M | // Allow overlapping PHI values. Any real interference would show up in a |
2383 | 7.76M | // predecessor, the PHI itself can't introduce any conflicts. |
2384 | 7.76M | if (VNI->isPHIDef()) |
2385 | 90.6k | return CR_Replace; |
2386 | 7.67M | |
2387 | 7.67M | // Check for simple erasable conflicts. |
2388 | 7.67M | if (7.67M DefMI->isImplicitDef()7.67M ) { |
2389 | 1.87k | // We need the def for the subregister if there is nothing else live at the |
2390 | 1.87k | // subrange at this point. |
2391 | 1.87k | if (TrackSubRegLiveness |
2392 | 24 | && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)).none()) |
2393 | 7 | return CR_Replace; |
2394 | 1.87k | return CR_Erase; |
2395 | 1.87k | } |
2396 | 7.67M | |
2397 | 7.67M | // Include the non-conflict where DefMI is a coalescable copy that kills |
2398 | 7.67M | // OtherVNI. We still want the copy erased and value numbers merged. |
2399 | 7.67M | if (7.67M CP.isCoalescable(DefMI)7.67M ) { |
2400 | 6.72M | // Some of the lanes copied from OtherVNI may be undef, making them undef |
2401 | 6.72M | // here too. |
2402 | 6.72M | V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes; |
2403 | 6.72M | return CR_Erase; |
2404 | 6.72M | } |
2405 | 945k | |
2406 | 945k | // This may not be a real conflict if DefMI simply kills Other and defines |
2407 | 945k | // VNI. |
2408 | 945k | if (945k OtherLRQ.isKill() && 945k OtherLRQ.endPoint() <= VNI->def347k ) |
2409 | 347k | return CR_Keep; |
2410 | 597k | |
2411 | 597k | // Handle the case where VNI and OtherVNI can be proven to be identical: |
2412 | 597k | // |
2413 | 597k | // %other = COPY %ext |
2414 | 597k | // %this = COPY %ext <-- Erase this copy |
2415 | 597k | // |
2416 | 597k | if (597k DefMI->isFullCopy() && 597k !CP.isPartial()161k |
2417 | 155k | && valuesIdentical(VNI, V.OtherVNI, Other)) |
2418 | 9.63k | return CR_Erase; |
2419 | 588k | |
2420 | 588k | // If the lanes written by this instruction were all undef in OtherVNI, it is |
2421 | 588k | // still safe to join the live ranges. This can't be done with a simple value |
2422 | 588k | // mapping, though - OtherVNI will map to multiple values: |
2423 | 588k | // |
2424 | 588k | // 1 %dst:ssub0 = FOO <-- OtherVNI |
2425 | 588k | // 2 %src = BAR <-- VNI |
2426 | 588k | // 3 %dst:ssub1 = COPY %src<kill> <-- Eliminate this copy. |
2427 | 588k | // 4 BAZ %dst<kill> |
2428 | 588k | // 5 QUUX %src<kill> |
2429 | 588k | // |
2430 | 588k | // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace |
2431 | 588k | // handles this complex value mapping. |
2432 | 588k | if (588k (V.WriteLanes & OtherV.ValidLanes).none()588k ) |
2433 | 110k | return CR_Replace; |
2434 | 477k | |
2435 | 477k | // If the other live range is killed by DefMI and the live ranges are still |
2436 | 477k | // overlapping, it must be because we're looking at an early clobber def: |
2437 | 477k | // |
2438 | 477k | // %dst<def,early-clobber> = ASM %src<kill> |
2439 | 477k | // |
2440 | 477k | // In this case, it is illegal to merge the two live ranges since the early |
2441 | 477k | // clobber def would clobber %src before it was read. |
2442 | 477k | if (477k OtherLRQ.isKill()477k ) { |
2443 | 16 | // This case where the def doesn't overlap the kill is handled above. |
2444 | 16 | assert(VNI->def.isEarlyClobber() && |
2445 | 16 | "Only early clobber defs can overlap a kill"); |
2446 | 16 | return CR_Impossible; |
2447 | 16 | } |
2448 | 477k | |
2449 | 477k | // VNI is clobbering live lanes in OtherVNI, but there is still the |
2450 | 477k | // possibility that no instructions actually read the clobbered lanes. |
2451 | 477k | // If we're clobbering all the lanes in OtherVNI, at least one must be read. |
2452 | 477k | // Otherwise Other.RI wouldn't be live here. |
2453 | 477k | if (477k (TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none()477k ) |
2454 | 431k | return CR_Impossible; |
2455 | 46.3k | |
2456 | 46.3k | // We need to verify that no instructions are reading the clobbered lanes. To |
2457 | 46.3k | // save compile time, we'll only check that locally. Don't allow the tainted |
2458 | 46.3k | // value to escape the basic block. |
2459 | 46.3k | MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); |
2460 | 46.3k | if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB)) |
2461 | 14.1k | return CR_Impossible; |
2462 | 32.2k | |
2463 | 32.2k | // There are still some things that could go wrong besides clobbered lanes |
2464 | 32.2k | // being read, for example OtherVNI may be only partially redefined in MBB, |
2465 | 32.2k | // and some clobbered lanes could escape the block. Save this analysis for |
2466 | 32.2k | // resolveConflicts() when all values have been mapped. We need to know |
2467 | 32.2k | // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute |
2468 | 32.2k | // that now - the recursive analyzeValue() calls must go upwards in the |
2469 | 32.2k | // dominator tree. |
2470 | 32.2k | return CR_Unresolved; |
2471 | 32.2k | } |
2472 | | |
2473 | 38.2M | void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { |
2474 | 38.2M | Val &V = Vals[ValNo]; |
2475 | 38.2M | if (V.isAnalyzed()38.2M ) { |
2476 | 7.78M | // Recursion should always move up the dominator tree, so ValNo is not |
2477 | 7.78M | // supposed to reappear before it has been assigned. |
2478 | 7.78M | assert(Assignments[ValNo] != -1 && "Bad recursion?"); |
2479 | 7.78M | return; |
2480 | 7.78M | } |
2481 | 30.4M | switch ((V.Resolution = analyzeValue(ValNo, Other))) { |
2482 | 6.74M | case CR_Erase: |
2483 | 6.74M | case CR_Merge: |
2484 | 6.74M | // Merge this ValNo into OtherVNI. |
2485 | 6.74M | assert(V.OtherVNI && "OtherVNI not assigned, can't merge."); |
2486 | 6.74M | assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion"); |
2487 | 6.74M | Assignments[ValNo] = Other.Assignments[V.OtherVNI->id]; |
2488 | 6.74M | DEBUG(dbgs() << "\t\tmerge " << PrintReg(Reg) << ':' << ValNo << '@' |
2489 | 6.74M | << LR.getValNumInfo(ValNo)->def << " into " |
2490 | 6.74M | << PrintReg(Other.Reg) << ':' << V.OtherVNI->id << '@' |
2491 | 6.74M | << V.OtherVNI->def << " --> @" |
2492 | 6.74M | << NewVNInfo[Assignments[ValNo]]->def << '\n'); |
2493 | 6.74M | break; |
2494 | 233k | case CR_Replace: |
2495 | 233k | case CR_Unresolved: { |
2496 | 233k | // The other value is going to be pruned if this join is successful. |
2497 | 233k | assert(V.OtherVNI && "OtherVNI not assigned, can't prune"); |
2498 | 233k | Val &OtherV = Other.Vals[V.OtherVNI->id]; |
2499 | 233k | // We cannot erase an IMPLICIT_DEF if we don't have valid values for all |
2500 | 233k | // its lanes. |
2501 | 233k | if ((OtherV.WriteLanes & ~V.ValidLanes).any() && 233k TrackSubRegLiveness83.5k ) |
2502 | 72.0k | OtherV.ErasableImplicitDef = false; |
2503 | 233k | OtherV.Pruned = true; |
2504 | 233k | LLVM_FALLTHROUGH; |
2505 | 233k | } |
2506 | 23.7M | default: |
2507 | 23.7M | // This value number needs to go in the final joined live range. |
2508 | 23.7M | Assignments[ValNo] = NewVNInfo.size(); |
2509 | 23.7M | NewVNInfo.push_back(LR.getValNumInfo(ValNo)); |
2510 | 23.7M | break; |
2511 | 30.4M | } |
2512 | 30.4M | } |
2513 | | |
2514 | 12.5M | bool JoinVals::mapValues(JoinVals &Other) { |
2515 | 42.2M | for (unsigned i = 0, e = LR.getNumValNums(); i != e42.2M ; ++i29.7M ) { |
2516 | 30.1M | computeAssignment(i, Other); |
2517 | 30.1M | if (Vals[i].Resolution == CR_Impossible30.1M ) { |
2518 | 436k | DEBUG(dbgs() << "\t\tinterference at " << PrintReg(Reg) << ':' << i |
2519 | 436k | << '@' << LR.getValNumInfo(i)->def << '\n'); |
2520 | 436k | return false; |
2521 | 436k | } |
2522 | 30.1M | } |
2523 | 12.1M | return true; |
2524 | 12.5M | } |
2525 | | |
2526 | | bool JoinVals:: |
2527 | | taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, |
2528 | 27.1k | SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) { |
2529 | 27.1k | VNInfo *VNI = LR.getValNumInfo(ValNo); |
2530 | 27.1k | MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); |
2531 | 27.1k | SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB); |
2532 | 27.1k | |
2533 | 27.1k | // Scan Other.LR from VNI.def to MBBEnd. |
2534 | 27.1k | LiveInterval::iterator OtherI = Other.LR.find(VNI->def); |
2535 | 27.1k | assert(OtherI != Other.LR.end() && "No conflict?"); |
2536 | 56.9k | do { |
2537 | 56.9k | // OtherI is pointing to a tainted value. Abort the join if the tainted |
2538 | 56.9k | // lanes escape the block. |
2539 | 56.9k | SlotIndex End = OtherI->end; |
2540 | 56.9k | if (End >= MBBEnd56.9k ) { |
2541 | 12 | DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.Reg) << ':' |
2542 | 12 | << OtherI->valno->id << '@' << OtherI->start << '\n'); |
2543 | 12 | return false; |
2544 | 12 | } |
2545 | 56.9k | DEBUG56.9k (dbgs() << "\t\ttaints local " << PrintReg(Other.Reg) << ':' |
2546 | 56.9k | << OtherI->valno->id << '@' << OtherI->start |
2547 | 56.9k | << " to " << End << '\n'); |
2548 | 56.9k | // A dead def is not a problem. |
2549 | 56.9k | if (End.isDead()) |
2550 | 0 | break; |
2551 | 56.9k | TaintExtent.push_back(std::make_pair(End, TaintedLanes)); |
2552 | 56.9k | |
2553 | 56.9k | // Check for another def in the MBB. |
2554 | 56.9k | if (++OtherI == Other.LR.end() || 56.9k OtherI->start >= MBBEnd33.5k ) |
2555 | 23.6k | break; |
2556 | 33.3k | |
2557 | 33.3k | // Lanes written by the new def are no longer tainted. |
2558 | 33.3k | const Val &OV = Other.Vals[OtherI->valno->id]; |
2559 | 33.3k | TaintedLanes &= ~OV.WriteLanes; |
2560 | 33.3k | if (!OV.RedefVNI) |
2561 | 1.32k | break; |
2562 | 31.9k | } while (TaintedLanes.any()); |
2563 | 27.1k | return true; |
2564 | 27.1k | } |
2565 | | |
2566 | | bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx, |
2567 | 547k | LaneBitmask Lanes) const { |
2568 | 547k | if (MI.isDebugValue()) |
2569 | 2 | return false; |
2570 | 547k | for (const MachineOperand &MO : MI.operands()) 547k { |
2571 | 1.96M | if (!MO.isReg() || 1.96M MO.isDef()1.45M || MO.getReg() != Reg913k ) |
2572 | 1.85M | continue; |
2573 | 106k | if (106k !MO.readsReg()106k ) |
2574 | 4 | continue; |
2575 | 106k | unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg()); |
2576 | 106k | if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any()) |
2577 | 21.2k | return true; |
2578 | 526k | } |
2579 | 526k | return false; |
2580 | 526k | } |
2581 | | |
2582 | 11.9M | bool JoinVals::resolveConflicts(JoinVals &Other) { |
2583 | 40.1M | for (unsigned i = 0, e = LR.getNumValNums(); i != e40.1M ; ++i28.1M ) { |
2584 | 28.2M | Val &V = Vals[i]; |
2585 | 28.2M | assert(V.Resolution != CR_Impossible && "Unresolvable conflict"); |
2586 | 28.2M | if (V.Resolution != CR_Unresolved) |
2587 | 28.1M | continue; |
2588 | 27.1k | DEBUG27.1k (dbgs() << "\t\tconflict at " << PrintReg(Reg) << ':' << i |
2589 | 27.1k | << '@' << LR.getValNumInfo(i)->def << '\n'); |
2590 | 27.1k | if (SubRangeJoin) |
2591 | 0 | return false; |
2592 | 27.1k | |
2593 | 27.1k | ++NumLaneConflicts; |
2594 | 27.1k | assert(V.OtherVNI && "Inconsistent conflict resolution."); |
2595 | 27.1k | VNInfo *VNI = LR.getValNumInfo(i); |
2596 | 27.1k | const Val &OtherV = Other.Vals[V.OtherVNI->id]; |
2597 | 27.1k | |
2598 | 27.1k | // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the |
2599 | 27.1k | // join, those lanes will be tainted with a wrong value. Get the extent of |
2600 | 27.1k | // the tainted lanes. |
2601 | 27.1k | LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes; |
2602 | 27.1k | SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent; |
2603 | 27.1k | if (!taintExtent(i, TaintedLanes, Other, TaintExtent)) |
2604 | 27.1k | // Tainted lanes would extend beyond the basic block. |
2605 | 12 | return false; |
2606 | 27.1k | |
2607 | 27.1k | assert(!TaintExtent.empty() && "There should be at least one conflict."); |
2608 | 27.1k | |
2609 | 27.1k | // Now look at the instructions from VNI->def to TaintExtent (inclusive). |
2610 | 27.1k | MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); |
2611 | 27.1k | MachineBasicBlock::iterator MI = MBB->begin(); |
2612 | 27.1k | if (!VNI->isPHIDef()27.1k ) { |
2613 | 27.1k | MI = Indexes->getInstructionFromIndex(VNI->def); |
2614 | 27.1k | // No need to check the instruction defining VNI for reads. |
2615 | 27.1k | ++MI; |
2616 | 27.1k | } |
2617 | 27.1k | assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) && |
2618 | 27.1k | "Interference ends on VNI->def. Should have been handled earlier"); |
2619 | 27.1k | MachineInstr *LastMI = |
2620 | 27.1k | Indexes->getInstructionFromIndex(TaintExtent.front().first); |
2621 | 27.1k | assert(LastMI && "Range must end at a proper instruction"); |
2622 | 27.1k | unsigned TaintNum = 0; |
2623 | 547k | while (true547k ) { |
2624 | 547k | assert(MI != MBB->end() && "Bad LastMI"); |
2625 | 547k | if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)547k ) { |
2626 | 21.2k | DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); |
2627 | 21.2k | return false; |
2628 | 21.2k | } |
2629 | 526k | // LastMI is the last instruction to use the current value. |
2630 | 526k | if (526k &*MI == LastMI526k ) { |
2631 | 35.2k | if (++TaintNum == TaintExtent.size()) |
2632 | 5.87k | break; |
2633 | 29.3k | LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first); |
2634 | 29.3k | assert(LastMI && "Range must end at a proper instruction"); |
2635 | 29.3k | TaintedLanes = TaintExtent[TaintNum].second; |
2636 | 29.3k | } |
2637 | 520k | ++MI; |
2638 | 520k | } |
2639 | 27.1k | |
2640 | 27.1k | // The tainted lanes are unused. |
2641 | 5.87k | V.Resolution = CR_Replace; |
2642 | 5.87k | ++NumLaneResolves; |
2643 | 5.87k | } |
2644 | 11.8M | return true; |
2645 | 11.9M | } |
2646 | | |
2647 | 12.4M | bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) { |
2648 | 12.4M | Val &V = Vals[ValNo]; |
2649 | 12.4M | if (V.Pruned || 12.4M V.PrunedComputed12.4M ) |
2650 | 136k | return V.Pruned; |
2651 | 12.3M | |
2652 | 12.3M | if (12.3M V.Resolution != CR_Erase && 12.3M V.Resolution != CR_Merge6.10M ) |
2653 | 6.10M | return V.Pruned; |
2654 | 6.24M | |
2655 | 6.24M | // Follow copies up the dominator tree and check if any intermediate value |
2656 | 6.24M | // has been pruned. |
2657 | 6.24M | V.PrunedComputed = true; |
2658 | 6.24M | V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this); |
2659 | 6.24M | return V.Pruned; |
2660 | 6.24M | } |
2661 | | |
2662 | | void JoinVals::pruneValues(JoinVals &Other, |
2663 | | SmallVectorImpl<SlotIndex> &EndPoints, |
2664 | 11.8M | bool changeInstrs) { |
2665 | 40.0M | for (unsigned i = 0, e = LR.getNumValNums(); i != e40.0M ; ++i28.1M ) { |
2666 | 28.1M | SlotIndex Def = LR.getValNumInfo(i)->def; |
2667 | 28.1M | switch (Vals[i].Resolution) { |
2668 | 21.8M | case CR_Keep: |
2669 | 21.8M | break; |
2670 | 83.6k | case CR_Replace: { |
2671 | 83.6k | // This value takes precedence over the value in Other.LR. |
2672 | 83.6k | LIS->pruneValue(Other.LR, Def, &EndPoints); |
2673 | 83.6k | // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF |
2674 | 83.6k | // instructions are only inserted to provide a live-out value for PHI |
2675 | 83.6k | // predecessors, so the instruction should simply go away once its value |
2676 | 83.6k | // has been replaced. |
2677 | 83.6k | Val &OtherV = Other.Vals[Vals[i].OtherVNI->id]; |
2678 | 83.6k | bool EraseImpDef = OtherV.ErasableImplicitDef && |
2679 | 216 | OtherV.Resolution == CR_Keep; |
2680 | 83.6k | if (!Def.isBlock()83.6k ) { |
2681 | 83.0k | if (changeInstrs83.0k ) { |
2682 | 83.0k | // Remove <def,read-undef> flags. This def is now a partial redef. |
2683 | 83.0k | // Also remove <def,dead> flags since the joined live range will |
2684 | 83.0k | // continue past this instruction. |
2685 | 83.0k | for (MachineOperand &MO : |
2686 | 356k | Indexes->getInstructionFromIndex(Def)->operands()) { |
2687 | 356k | if (MO.isReg() && 356k MO.isDef()197k && MO.getReg() == Reg87.0k ) { |
2688 | 83.0k | if (MO.getSubReg() != 0) |
2689 | 49.3k | MO.setIsUndef(EraseImpDef); |
2690 | 83.0k | MO.setIsDead(false); |
2691 | 83.0k | } |
2692 | 356k | } |
2693 | 83.0k | } |
2694 | 83.0k | // This value will reach instructions below, but we need to make sure |
2695 | 83.0k | // the live range also reaches the instruction at Def. |
2696 | 83.0k | if (!EraseImpDef) |
2697 | 82.8k | EndPoints.push_back(Def); |
2698 | 83.0k | } |
2699 | 83.6k | DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.Reg) << " at " << Def |
2700 | 83.6k | << ": " << Other.LR << '\n'); |
2701 | 83.6k | break; |
2702 | 28.1M | } |
2703 | 6.24M | case CR_Erase: |
2704 | 6.24M | case CR_Merge: |
2705 | 6.24M | if (isPrunedValue(i, Other)6.24M ) { |
2706 | 33.4k | // This value is ultimately a copy of a pruned value in LR or Other.LR. |
2707 | 33.4k | // We can no longer trust the value mapping computed by |
2708 | 33.4k | // computeAssignment(), the value that was originally copied could have |
2709 | 33.4k | // been replaced. |
2710 | 33.4k | LIS->pruneValue(LR, Def, &EndPoints); |
2711 | 33.4k | DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(Reg) << " at " |
2712 | 33.4k | << Def << ": " << LR << '\n'); |
2713 | 33.4k | } |
2714 | 6.24M | break; |
2715 | 0 | case CR_Unresolved: |
2716 | 0 | case CR_Impossible: |
2717 | 0 | llvm_unreachable("Unresolved conflicts"); |
2718 | 28.1M | } |
2719 | 28.1M | } |
2720 | 11.8M | } |
2721 | | |
2722 | 254k | void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) { |
2723 | 254k | // Look for values being erased. |
2724 | 254k | bool DidPrune = false; |
2725 | 710k | for (unsigned i = 0, e = LR.getNumValNums(); i != e710k ; ++i456k ) { |
2726 | 456k | // We should trigger in all cases in which eraseInstrs() does something. |
2727 | 456k | // match what eraseInstrs() is doing, print a message so |
2728 | 456k | if (Vals[i].Resolution != CR_Erase && |
2729 | 311k | (Vals[i].Resolution != CR_Keep || 311k !Vals[i].ErasableImplicitDef235k || |
2730 | 67 | !Vals[i].Pruned)) |
2731 | 311k | continue; |
2732 | 144k | |
2733 | 144k | // Check subranges at the point where the copy will be removed. |
2734 | 144k | SlotIndex Def = LR.getValNumInfo(i)->def; |
2735 | 144k | // Print message so mismatches with eraseInstrs() can be diagnosed. |
2736 | 144k | DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def << '\n'); |
2737 | 505k | for (LiveInterval::SubRange &S : LI.subranges()) { |
2738 | 505k | LiveQueryResult Q = S.Query(Def); |
2739 | 505k | |
2740 | 505k | // If a subrange starts at the copy then an undefined value has been |
2741 | 505k | // copied and we must remove that subrange value as well. |
2742 | 505k | VNInfo *ValueOut = Q.valueOutOrDead(); |
2743 | 505k | if (ValueOut != nullptr && 505k Q.valueIn() == nullptr376k ) { |
2744 | 190 | DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask) |
2745 | 190 | << " at " << Def << "\n"); |
2746 | 190 | LIS->pruneValue(S, Def, nullptr); |
2747 | 190 | DidPrune = true; |
2748 | 190 | // Mark value number as unused. |
2749 | 190 | ValueOut->markUnused(); |
2750 | 190 | continue; |
2751 | 190 | } |
2752 | 505k | // If a subrange ends at the copy, then a value was copied but only |
2753 | 505k | // partially used later. Shrink the subregister range appropriately. |
2754 | 505k | if (505k Q.valueIn() != nullptr && 505k Q.valueOut() == nullptr376k ) { |
2755 | 41 | DEBUG(dbgs() << "\t\tDead uses at sublane " << PrintLaneMask(S.LaneMask) |
2756 | 41 | << " at " << Def << "\n"); |
2757 | 41 | ShrinkMask |= S.LaneMask; |
2758 | 41 | } |
2759 | 505k | } |
2760 | 456k | } |
2761 | 254k | if (DidPrune) |
2762 | 164 | LI.removeEmptySubRanges(); |
2763 | 254k | } |
2764 | | |
2765 | | /// Check if any of the subranges of @p LI contain a definition at @p Def. |
2766 | 169k | static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def) { |
2767 | 383k | for (LiveInterval::SubRange &SR : LI.subranges()) { |
2768 | 383k | if (VNInfo *VNI = SR.Query(Def).valueOutOrDead()) |
2769 | 379k | if (379k VNI->def == Def379k ) |
2770 | 169k | return true; |
2771 | 0 | } |
2772 | 0 | return false; |
2773 | 0 | } |
2774 | | |
2775 | 127k | void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) { |
2776 | 127k | assert(&static_cast<LiveRange&>(LI) == &LR); |
2777 | 127k | |
2778 | 440k | for (unsigned i = 0, e = LR.getNumValNums(); i != e440k ; ++i313k ) { |
2779 | 313k | if (Vals[i].Resolution != CR_Keep) |
2780 | 142k | continue; |
2781 | 170k | VNInfo *VNI = LR.getValNumInfo(i); |
2782 | 170k | if (VNI->isUnused() || 170k VNI->isPHIDef()170k || isDefInSubRange(LI, VNI->def)169k ) |
2783 | 170k | continue; |
2784 | 0 | Vals[i].Pruned = true; |
2785 | 0 | ShrinkMainRange = true; |
2786 | 0 | } |
2787 | 127k | } |
2788 | | |
2789 | 302k | void JoinVals::removeImplicitDefs() { |
2790 | 620k | for (unsigned i = 0, e = LR.getNumValNums(); i != e620k ; ++i317k ) { |
2791 | 317k | Val &V = Vals[i]; |
2792 | 317k | if (V.Resolution != CR_Keep || 317k !V.ErasableImplicitDef169k || !V.Pruned74 ) |
2793 | 317k | continue; |
2794 | 1 | |
2795 | 1 | VNInfo *VNI = LR.getValNumInfo(i); |
2796 | 1 | VNI->markUnused(); |
2797 | 1 | LR.removeValNo(VNI); |
2798 | 1 | } |
2799 | 302k | } |
2800 | | |
2801 | | void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, |
2802 | | SmallVectorImpl<unsigned> &ShrinkRegs, |
2803 | 11.5M | LiveInterval *LI) { |
2804 | 39.4M | for (unsigned i = 0, e = LR.getNumValNums(); i != e39.4M ; ++i27.8M ) { |
2805 | 27.8M | // Get the def location before markUnused() below invalidates it. |
2806 | 27.8M | SlotIndex Def = LR.getValNumInfo(i)->def; |
2807 | 27.8M | switch (Vals[i].Resolution) { |
2808 | 21.6M | case CR_Keep: { |
2809 | 21.6M | // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any |
2810 | 21.6M | // longer. The IMPLICIT_DEF instructions are only inserted by |
2811 | 21.6M | // PHIElimination to guarantee that all PHI predecessors have a value. |
2812 | 21.6M | if (!Vals[i].ErasableImplicitDef || 21.6M !Vals[i].Pruned12.2k ) |
2813 | 21.6M | break; |
2814 | 215 | // Remove value number i from LR. |
2815 | 215 | // For intervals with subranges, removing a segment from the main range |
2816 | 215 | // may require extending the previous segment: for each definition of |
2817 | 215 | // a subregister, there will be a corresponding def in the main range. |
2818 | 215 | // That def may fall in the middle of a segment from another subrange. |
2819 | 215 | // In such cases, removing this def from the main range must be |
2820 | 215 | // complemented by extending the main range to account for the liveness |
2821 | 215 | // of the other subrange. |
2822 | 215 | VNInfo *VNI = LR.getValNumInfo(i); |
2823 | 215 | SlotIndex Def = VNI->def; |
2824 | 215 | // The new end point of the main range segment to be extended. |
2825 | 215 | SlotIndex NewEnd; |
2826 | 215 | if (LI != nullptr215 ) { |
2827 | 213 | LiveRange::iterator I = LR.FindSegmentContaining(Def); |
2828 | 213 | assert(I != LR.end()); |
2829 | 213 | // Do not extend beyond the end of the segment being removed. |
2830 | 213 | // The segment may have been pruned in preparation for joining |
2831 | 213 | // live ranges. |
2832 | 213 | NewEnd = I->end; |
2833 | 213 | } |
2834 | 215 | |
2835 | 215 | LR.removeValNo(VNI); |
2836 | 215 | // Note that this VNInfo is reused and still referenced in NewVNInfo, |
2837 | 215 | // make it appear like an unused value number. |
2838 | 215 | VNI->markUnused(); |
2839 | 215 | |
2840 | 215 | if (LI != nullptr && 215 LI->hasSubRanges()213 ) { |
2841 | 0 | assert(static_cast<LiveRange*>(LI) == &LR); |
2842 | 0 | // Determine the end point based on the subrange information: |
2843 | 0 | // minimum of (earliest def of next segment, |
2844 | 0 | // latest end point of containing segment) |
2845 | 0 | SlotIndex ED, LE; |
2846 | 0 | for (LiveInterval::SubRange &SR : LI->subranges()) { |
2847 | 0 | LiveRange::iterator I = SR.find(Def); |
2848 | 0 | if (I == SR.end()) |
2849 | 0 | continue; |
2850 | 0 | if (0 I->start > Def0 ) |
2851 | 0 | ED = ED.isValid() ? 0 std::min(ED, I->start)0 : I->start0 ; |
2852 | 0 | else |
2853 | 0 | LE = LE.isValid() ? 0 std::max(LE, I->end)0 : I->end0 ; |
2854 | 0 | } |
2855 | 0 | if (LE.isValid()) |
2856 | 0 | NewEnd = std::min(NewEnd, LE); |
2857 | 0 | if (ED.isValid()) |
2858 | 0 | NewEnd = std::min(NewEnd, ED); |
2859 | 0 |
|
2860 | 0 | // We only want to do the extension if there was a subrange that |
2861 | 0 | // was live across Def. |
2862 | 0 | if (LE.isValid()0 ) { |
2863 | 0 | LiveRange::iterator S = LR.find(Def); |
2864 | 0 | if (S != LR.begin()) |
2865 | 0 | std::prev(S)->end = NewEnd; |
2866 | 0 | } |
2867 | 0 | } |
2868 | 215 | DEBUG({ |
2869 | 215 | dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n'; |
2870 | 215 | if (LI != nullptr) |
2871 | 215 | dbgs() << "\t\t LHS = " << *LI << '\n'; |
2872 | 215 | }); |
2873 | 215 | LLVM_FALLTHROUGH; |
2874 | 215 | } |
2875 | 215 | |
2876 | 6.08M | case CR_Erase: { |
2877 | 6.08M | MachineInstr *MI = Indexes->getInstructionFromIndex(Def); |
2878 | 6.08M | assert(MI && "No instruction to erase"); |
2879 | 6.08M | if (MI->isCopy()6.08M ) { |
2880 | 5.51M | unsigned Reg = MI->getOperand(1).getReg(); |
2881 | 5.51M | if (TargetRegisterInfo::isVirtualRegister(Reg) && |
2882 | 5.51M | Reg != CP.getSrcReg()5.51M && Reg != CP.getDstReg()1.87M ) |
2883 | 7.59k | ShrinkRegs.push_back(Reg); |
2884 | 5.51M | } |
2885 | 6.08M | ErasedInstrs.insert(MI); |
2886 | 6.08M | DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI); |
2887 | 6.08M | LIS->RemoveMachineInstrFromMaps(*MI); |
2888 | 6.08M | MI->eraseFromParent(); |
2889 | 6.08M | break; |
2890 | 215 | } |
2891 | 88.3k | default: |
2892 | 88.3k | break; |
2893 | 27.8M | } |
2894 | 27.8M | } |
2895 | 11.5M | } |
2896 | | |
2897 | | void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, |
2898 | | LaneBitmask LaneMask, |
2899 | 151k | const CoalescerPair &CP) { |
2900 | 151k | SmallVector<VNInfo*, 16> NewVNInfo; |
2901 | 151k | JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask, |
2902 | 151k | NewVNInfo, CP, LIS, TRI, true, true); |
2903 | 151k | JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask, |
2904 | 151k | NewVNInfo, CP, LIS, TRI, true, true); |
2905 | 151k | |
2906 | 151k | // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs()) |
2907 | 151k | // We should be able to resolve all conflicts here as we could successfully do |
2908 | 151k | // it on the mainrange already. There is however a problem when multiple |
2909 | 151k | // ranges get mapped to the "overflow" lane mask bit which creates unexpected |
2910 | 151k | // interferences. |
2911 | 151k | if (!LHSVals.mapValues(RHSVals) || 151k !RHSVals.mapValues(LHSVals)151k ) { |
2912 | 0 | // We already determined that it is legal to merge the intervals, so this |
2913 | 0 | // should never fail. |
2914 | 0 | llvm_unreachable("*** Couldn't join subrange!\n"); |
2915 | 0 | } |
2916 | 151k | if (151k !LHSVals.resolveConflicts(RHSVals) || |
2917 | 151k | !RHSVals.resolveConflicts(LHSVals)151k ) { |
2918 | 0 | // We already determined that it is legal to merge the intervals, so this |
2919 | 0 | // should never fail. |
2920 | 0 | llvm_unreachable("*** Couldn't join subrange!\n"); |
2921 | 0 | } |
2922 | 151k | |
2923 | 151k | // The merging algorithm in LiveInterval::join() can't handle conflicting |
2924 | 151k | // value mappings, so we need to remove any live ranges that overlap a |
2925 | 151k | // CR_Replace resolution. Collect a set of end points that can be used to |
2926 | 151k | // restore the live range after joining. |
2927 | 151k | SmallVector<SlotIndex, 8> EndPoints; |
2928 | 151k | LHSVals.pruneValues(RHSVals, EndPoints, false); |
2929 | 151k | RHSVals.pruneValues(LHSVals, EndPoints, false); |
2930 | 151k | |
2931 | 151k | LHSVals.removeImplicitDefs(); |
2932 | 151k | RHSVals.removeImplicitDefs(); |
2933 | 151k | |
2934 | 151k | LRange.verify(); |
2935 | 151k | RRange.verify(); |
2936 | 151k | |
2937 | 151k | // Join RRange into LHS. |
2938 | 151k | LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(), |
2939 | 151k | NewVNInfo); |
2940 | 151k | |
2941 | 151k | DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n"); |
2942 | 151k | if (EndPoints.empty()) |
2943 | 151k | return; |
2944 | 1 | |
2945 | 1 | // Recompute the parts of the live range we had to remove because of |
2946 | 1 | // CR_Replace conflicts. |
2947 | 1 | DEBUG1 ({ |
2948 | 1 | dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: "; |
2949 | 1 | for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) { |
2950 | 1 | dbgs() << EndPoints[i]; |
2951 | 1 | if (i != n-1) |
2952 | 1 | dbgs() << ','; |
2953 | 1 | } |
2954 | 1 | dbgs() << ": " << LRange << '\n'; |
2955 | 1 | }); |
2956 | 1 | LIS->extendToIndices(LRange, EndPoints); |
2957 | 1 | } |
2958 | | |
2959 | | void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, |
2960 | | const LiveRange &ToMerge, |
2961 | | LaneBitmask LaneMask, |
2962 | 150k | CoalescerPair &CP) { |
2963 | 150k | BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); |
2964 | 150k | LI.refineSubRanges(Allocator, LaneMask, |
2965 | 151k | [this,&Allocator,&ToMerge,&CP](LiveInterval::SubRange &SR) { |
2966 | 151k | if (SR.empty()151k ) { |
2967 | 215 | SR.assign(ToMerge, Allocator); |
2968 | 151k | } else { |
2969 | 151k | // joinSubRegRange() destroys the merged range, so we need a copy. |
2970 | 151k | LiveRange RangeCopy(ToMerge, Allocator); |
2971 | 151k | joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP); |
2972 | 151k | } |
2973 | 151k | }); |
2974 | 150k | } |
2975 | | |
2976 | 6.24M | bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { |
2977 | 6.24M | SmallVector<VNInfo*, 16> NewVNInfo; |
2978 | 6.24M | LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); |
2979 | 6.24M | LiveInterval &LHS = LIS->getInterval(CP.getDstReg()); |
2980 | 6.24M | bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC()); |
2981 | 6.24M | JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(), |
2982 | 6.24M | NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness); |
2983 | 6.24M | JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(), |
2984 | 6.24M | NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness); |
2985 | 6.24M | |
2986 | 6.24M | DEBUG(dbgs() << "\t\tRHS = " << RHS |
2987 | 6.24M | << "\n\t\tLHS = " << LHS |
2988 | 6.24M | << '\n'); |
2989 | 6.24M | |
2990 | 6.24M | // First compute NewVNInfo and the simple value mappings. |
2991 | 6.24M | // Detect impossible conflicts early. |
2992 | 6.24M | if (!LHSVals.mapValues(RHSVals) || 6.24M !RHSVals.mapValues(LHSVals)6.01M ) |
2993 | 436k | return false; |
2994 | 5.81M | |
2995 | 5.81M | // Some conflicts can only be resolved after all values have been mapped. |
2996 | 5.81M | if (5.81M !LHSVals.resolveConflicts(RHSVals) || 5.81M !RHSVals.resolveConflicts(LHSVals)5.79M ) |
2997 | 21.2k | return false; |
2998 | 5.79M | |
2999 | 5.79M | // All clear, the live ranges can be merged. |
3000 | 5.79M | if (5.79M RHS.hasSubRanges() || 5.79M LHS.hasSubRanges()5.77M ) { |
3001 | 127k | BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); |
3002 | 127k | |
3003 | 127k | // Transform lanemasks from the LHS to masks in the coalesced register and |
3004 | 127k | // create initial subranges if necessary. |
3005 | 127k | unsigned DstIdx = CP.getDstIdx(); |
3006 | 127k | if (!LHS.hasSubRanges()127k ) { |
3007 | 190 | LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask() |
3008 | 0 | : TRI->getSubRegIndexLaneMask(DstIdx); |
3009 | 190 | // LHS must support subregs or we wouldn't be in this codepath. |
3010 | 190 | assert(Mask.any()); |
3011 | 190 | LHS.createSubRangeFrom(Allocator, Mask, LHS); |
3012 | 127k | } else if (127k DstIdx != 0127k ) { |
3013 | 0 | // Transform LHS lanemasks to new register class if necessary. |
3014 | 0 | for (LiveInterval::SubRange &R : LHS.subranges()) { |
3015 | 0 | LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask); |
3016 | 0 | R.LaneMask = Mask; |
3017 | 0 | } |
3018 | 127k | } |
3019 | 127k | DEBUG(dbgs() << "\t\tLHST = " << PrintReg(CP.getDstReg()) |
3020 | 127k | << ' ' << LHS << '\n'); |
3021 | 127k | |
3022 | 127k | // Determine lanemasks of RHS in the coalesced register and merge subranges. |
3023 | 127k | unsigned SrcIdx = CP.getSrcIdx(); |
3024 | 127k | if (!RHS.hasSubRanges()127k ) { |
3025 | 472 | LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() |
3026 | 107k | : TRI->getSubRegIndexLaneMask(SrcIdx); |
3027 | 108k | mergeSubRangeInto(LHS, RHS, Mask, CP); |
3028 | 127k | } else { |
3029 | 19.2k | // Pair up subranges and merge. |
3030 | 42.6k | for (LiveInterval::SubRange &R : RHS.subranges()) { |
3031 | 42.6k | LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask); |
3032 | 42.6k | mergeSubRangeInto(LHS, R, Mask, CP); |
3033 | 42.6k | } |
3034 | 19.2k | } |
3035 | 127k | DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); |
3036 | 127k | |
3037 | 127k | // Pruning implicit defs from subranges may result in the main range |
3038 | 127k | // having stale segments. |
3039 | 127k | LHSVals.pruneMainSegments(LHS, ShrinkMainRange); |
3040 | 127k | |
3041 | 127k | LHSVals.pruneSubRegValues(LHS, ShrinkMask); |
3042 | 127k | RHSVals.pruneSubRegValues(LHS, ShrinkMask); |
3043 | 127k | } |
3044 | 5.79M | |
3045 | 5.79M | // The merging algorithm in LiveInterval::join() can't handle conflicting |
3046 | 5.79M | // value mappings, so we need to remove any live ranges that overlap a |
3047 | 5.79M | // CR_Replace resolution. Collect a set of end points that can be used to |
3048 | 5.79M | // restore the live range after joining. |
3049 | 5.79M | SmallVector<SlotIndex, 8> EndPoints; |
3050 | 5.79M | LHSVals.pruneValues(RHSVals, EndPoints, true); |
3051 | 5.79M | RHSVals.pruneValues(LHSVals, EndPoints, true); |
3052 | 5.79M | |
3053 | 5.79M | // Erase COPY and IMPLICIT_DEF instructions. This may cause some external |
3054 | 5.79M | // registers to require trimming. |
3055 | 5.79M | SmallVector<unsigned, 8> ShrinkRegs; |
3056 | 5.79M | LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS); |
3057 | 5.79M | RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); |
3058 | 5.79M | while (!ShrinkRegs.empty()) |
3059 | 7.59k | shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); |
3060 | 5.79M | |
3061 | 5.79M | // Join RHS into LHS. |
3062 | 5.79M | LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo); |
3063 | 5.79M | |
3064 | 5.79M | // Kill flags are going to be wrong if the live ranges were overlapping. |
3065 | 5.79M | // Eventually, we should simply clear all kill flags when computing live |
3066 | 5.79M | // ranges. They are reinserted after register allocation. |
3067 | 5.79M | MRI->clearKillFlags(LHS.reg); |
3068 | 5.79M | MRI->clearKillFlags(RHS.reg); |
3069 | 5.79M | |
3070 | 5.79M | if (!EndPoints.empty()5.79M ) { |
3071 | 56.0k | // Recompute the parts of the live range we had to remove because of |
3072 | 56.0k | // CR_Replace conflicts. |
3073 | 56.0k | DEBUG({ |
3074 | 56.0k | dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: "; |
3075 | 56.0k | for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) { |
3076 | 56.0k | dbgs() << EndPoints[i]; |
3077 | 56.0k | if (i != n-1) |
3078 | 56.0k | dbgs() << ','; |
3079 | 56.0k | } |
3080 | 56.0k | dbgs() << ": " << LHS << '\n'; |
3081 | 56.0k | }); |
3082 | 56.0k | LIS->extendToIndices((LiveRange&)LHS, EndPoints); |
3083 | 56.0k | } |
3084 | 6.24M | |
3085 | 6.24M | return true; |
3086 | 6.24M | } |
3087 | | |
3088 | 7.28M | bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { |
3089 | 7.28M | return CP.isPhys() ? joinReservedPhysReg(CP)1.03M : joinVirtRegs(CP)6.24M ; |
3090 | 7.28M | } |
3091 | | |
3092 | | namespace { |
3093 | | |
3094 | | /// Information concerning MBB coalescing priority. |
3095 | | struct MBBPriorityInfo { |
3096 | | MachineBasicBlock *MBB; |
3097 | | unsigned Depth; |
3098 | | bool IsSplit; |
3099 | | |
3100 | | MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit) |
3101 | 4.24M | : MBB(mbb), Depth(depth), IsSplit(issplit) {} |
3102 | | }; |
3103 | | |
3104 | | } // end anonymous namespace |
3105 | | |
3106 | | /// C-style comparator that sorts first based on the loop depth of the basic |
3107 | | /// block (the unsigned), and then on the MBB number. |
3108 | | /// |
3109 | | /// EnableGlobalCopies assumes that the primary sort key is loop depth. |
3110 | | static int compareMBBPriority(const MBBPriorityInfo *LHS, |
3111 | 18.5M | const MBBPriorityInfo *RHS) { |
3112 | 18.5M | // Deeper loops first |
3113 | 18.5M | if (LHS->Depth != RHS->Depth) |
3114 | 3.00M | return LHS->Depth > RHS->Depth ? 3.00M -11.85M : 11.14M ; |
3115 | 15.5M | |
3116 | 15.5M | // Try to unsplit critical edges next. |
3117 | 15.5M | if (15.5M LHS->IsSplit != RHS->IsSplit15.5M ) |
3118 | 0 | return LHS->IsSplit ? 0 -10 : 10 ; |
3119 | 15.5M | |
3120 | 15.5M | // Prefer blocks that are more connected in the CFG. This takes care of |
3121 | 15.5M | // the most difficult copies first while intervals are short. |
3122 | 15.5M | unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size(); |
3123 | 15.5M | unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size(); |
3124 | 15.5M | if (cl != cr) |
3125 | 5.28M | return cl > cr ? 5.28M -12.31M : 12.97M ; |
3126 | 10.2M | |
3127 | 10.2M | // As a last resort, sort by block number. |
3128 | 10.2M | return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? 10.2M -14.88M : 15.39M ; |
3129 | 18.5M | } |
3130 | | |
3131 | | /// \returns true if the given copy uses or defines a local live range. |
3132 | 15.3M | static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { |
3133 | 15.3M | if (!Copy->isCopy()) |
3134 | 568k | return false; |
3135 | 14.8M | |
3136 | 14.8M | if (14.8M Copy->getOperand(1).isUndef()14.8M ) |
3137 | 14 | return false; |
3138 | 14.8M | |
3139 | 14.8M | unsigned SrcReg = Copy->getOperand(1).getReg(); |
3140 | 14.8M | unsigned DstReg = Copy->getOperand(0).getReg(); |
3141 | 14.8M | if (TargetRegisterInfo::isPhysicalRegister(SrcReg) |
3142 | 11.7M | || TargetRegisterInfo::isPhysicalRegister(DstReg)) |
3143 | 8.72M | return false; |
3144 | 6.08M | |
3145 | 6.08M | return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg)) |
3146 | 2.07M | || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg)); |
3147 | 15.3M | } |
3148 | | |
3149 | | bool RegisterCoalescer:: |
3150 | 6.16M | copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) { |
3151 | 6.16M | bool Progress = false; |
3152 | 26.7M | for (unsigned i = 0, e = CurrList.size(); i != e26.7M ; ++i20.5M ) { |
3153 | 20.5M | if (!CurrList[i]) |
3154 | 1.92M | continue; |
3155 | 18.6M | // Skip instruction pointers that have already been erased, for example by |
3156 | 18.6M | // dead code elimination. |
3157 | 18.6M | if (18.6M ErasedInstrs.count(CurrList[i])18.6M ) { |
3158 | 129k | CurrList[i] = nullptr; |
3159 | 129k | continue; |
3160 | 129k | } |
3161 | 18.5M | bool Again = false; |
3162 | 18.5M | bool Success = joinCopy(CurrList[i], Again); |
3163 | 18.5M | Progress |= Success; |
3164 | 18.5M | if (Success || 18.5M !Again10.9M ) |
3165 | 13.3M | CurrList[i] = nullptr; |
3166 | 20.5M | } |
3167 | 6.16M | return Progress; |
3168 | 6.16M | } |
3169 | | |
3170 | | /// Check if DstReg is a terminal node. |
3171 | | /// I.e., it does not have any affinity other than \p Copy. |
3172 | | static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy, |
3173 | 21 | const MachineRegisterInfo *MRI) { |
3174 | 21 | assert(Copy.isCopyLike()); |
3175 | 21 | // Check if the destination of this copy as any other affinity. |
3176 | 21 | for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg)) |
3177 | 48 | if (48 &MI != &Copy && 48 MI.isCopyLike()33 ) |
3178 | 19 | return false; |
3179 | 2 | return true; |
3180 | 2 | } |
3181 | | |
3182 | 15.6M | bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { |
3183 | 15.6M | assert(Copy.isCopyLike()); |
3184 | 15.6M | if (!UseTerminalRule) |
3185 | 15.6M | return false; |
3186 | 33 | unsigned DstReg, DstSubReg, SrcReg, SrcSubReg; |
3187 | 33 | isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg); |
3188 | 33 | // Check if the destination of this copy has any other affinity. |
3189 | 33 | if (TargetRegisterInfo::isPhysicalRegister(DstReg) || |
3190 | 33 | // If SrcReg is a physical register, the copy won't be coalesced. |
3191 | 33 | // Ignoring it may have other side effect (like missing |
3192 | 33 | // rematerialization). So keep it. |
3193 | 25 | TargetRegisterInfo::isPhysicalRegister(SrcReg) || |
3194 | 20 | !isTerminalReg(DstReg, Copy, MRI)) |
3195 | 31 | return false; |
3196 | 2 | |
3197 | 2 | // DstReg is a terminal node. Check if it interferes with any other |
3198 | 2 | // copy involving SrcReg. |
3199 | 2 | const MachineBasicBlock *OrigBB = Copy.getParent(); |
3200 | 2 | const LiveInterval &DstLI = LIS->getInterval(DstReg); |
3201 | 6 | for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) { |
3202 | 6 | // Technically we should check if the weight of the new copy is |
3203 | 6 | // interesting compared to the other one and update the weight |
3204 | 6 | // of the copies accordingly. However, this would only work if |
3205 | 6 | // we would gather all the copies first then coalesce, whereas |
3206 | 6 | // right now we interleave both actions. |
3207 | 6 | // For now, just consider the copies that are in the same block. |
3208 | 6 | if (&MI == &Copy || 6 !MI.isCopyLike()4 || MI.getParent() != OrigBB1 ) |
3209 | 5 | continue; |
3210 | 1 | unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg; |
3211 | 1 | isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg, |
3212 | 1 | OtherSubReg); |
3213 | 1 | if (OtherReg == SrcReg) |
3214 | 0 | OtherReg = OtherSrcReg; |
3215 | 1 | // Check if OtherReg is a non-terminal. |
3216 | 1 | if (TargetRegisterInfo::isPhysicalRegister(OtherReg) || |
3217 | 1 | isTerminalReg(OtherReg, MI, MRI)) |
3218 | 0 | continue; |
3219 | 1 | // Check that OtherReg interfere with DstReg. |
3220 | 1 | if (1 LIS->getInterval(OtherReg).overlaps(DstLI)1 ) { |
3221 | 1 | DEBUG(dbgs() << "Apply terminal rule for: " << PrintReg(DstReg) << '\n'); |
3222 | 1 | return true; |
3223 | 1 | } |
3224 | 1 | } |
3225 | 1 | return false; |
3226 | 1 | } |
3227 | | |
3228 | | void |
3229 | 4.24M | RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { |
3230 | 4.24M | DEBUG(dbgs() << MBB->getName() << ":\n"); |
3231 | 4.24M | |
3232 | 4.24M | // Collect all copy-like instructions in MBB. Don't start coalescing anything |
3233 | 4.24M | // yet, it might invalidate the iterator. |
3234 | 4.24M | const unsigned PrevSize = WorkList.size(); |
3235 | 4.24M | if (JoinGlobalCopies4.24M ) { |
3236 | 4.16M | SmallVector<MachineInstr*, 2> LocalTerminals; |
3237 | 4.16M | SmallVector<MachineInstr*, 2> GlobalTerminals; |
3238 | 4.16M | // Coalesce copies bottom-up to coalesce local defs before local uses. They |
3239 | 4.16M | // are not inherently easier to resolve, but slightly preferable until we |
3240 | 4.16M | // have local live range splitting. In particular this is required by |
3241 | 4.16M | // cmp+jmp macro fusion. |
3242 | 4.16M | for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); |
3243 | 42.0M | MII != E42.0M ; ++MII37.8M ) { |
3244 | 37.8M | if (!MII->isCopyLike()) |
3245 | 22.4M | continue; |
3246 | 15.3M | bool ApplyTerminalRule = applyTerminalRule(*MII); |
3247 | 15.3M | if (isLocalCopy(&(*MII), LIS)15.3M ) { |
3248 | 5.09M | if (ApplyTerminalRule) |
3249 | 1 | LocalTerminals.push_back(&(*MII)); |
3250 | 5.09M | else |
3251 | 5.09M | LocalWorkList.push_back(&(*MII)); |
3252 | 15.3M | } else { |
3253 | 10.2M | if (ApplyTerminalRule) |
3254 | 0 | GlobalTerminals.push_back(&(*MII)); |
3255 | 10.2M | else |
3256 | 10.2M | WorkList.push_back(&(*MII)); |
3257 | 10.2M | } |
3258 | 37.8M | } |
3259 | 4.16M | // Append the copies evicted by the terminal rule at the end of the list. |
3260 | 4.16M | LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end()); |
3261 | 4.16M | WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end()); |
3262 | 4.16M | } |
3263 | 80.0k | else { |
3264 | 80.0k | SmallVector<MachineInstr*, 2> Terminals; |
3265 | 80.0k | for (MachineInstr &MII : *MBB) |
3266 | 662k | if (662k MII.isCopyLike()662k ) { |
3267 | 248k | if (applyTerminalRule(MII)) |
3268 | 0 | Terminals.push_back(&MII); |
3269 | 248k | else |
3270 | 248k | WorkList.push_back(&MII); |
3271 | 662k | } |
3272 | 80.0k | // Append the copies evicted by the terminal rule at the end of the list. |
3273 | 80.0k | WorkList.append(Terminals.begin(), Terminals.end()); |
3274 | 80.0k | } |
3275 | 4.24M | // Try coalescing the collected copies immediately, and remove the nulls. |
3276 | 4.24M | // This prevents the WorkList from getting too large since most copies are |
3277 | 4.24M | // joinable on the first attempt. |
3278 | 4.24M | MutableArrayRef<MachineInstr*> |
3279 | 4.24M | CurrList(WorkList.begin() + PrevSize, WorkList.end()); |
3280 | 4.24M | if (copyCoalesceWorkList(CurrList)) |
3281 | 1.76M | WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(), |
3282 | 1.76M | nullptr), WorkList.end()); |
3283 | 4.24M | } |
3284 | | |
3285 | 1.31M | void RegisterCoalescer::coalesceLocals() { |
3286 | 1.31M | copyCoalesceWorkList(LocalWorkList); |
3287 | 6.40M | for (unsigned j = 0, je = LocalWorkList.size(); j != je6.40M ; ++j5.09M ) { |
3288 | 5.09M | if (LocalWorkList[j]) |
3289 | 87.7k | WorkList.push_back(LocalWorkList[j]); |
3290 | 5.09M | } |
3291 | 1.31M | LocalWorkList.clear(); |
3292 | 1.31M | } |
3293 | | |
3294 | 588k | void RegisterCoalescer::joinAllIntervals() { |
3295 | 588k | DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); |
3296 | 588k | assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around."); |
3297 | 588k | |
3298 | 588k | std::vector<MBBPriorityInfo> MBBs; |
3299 | 588k | MBBs.reserve(MF->size()); |
3300 | 4.83M | for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E4.83M ; ++I4.24M ) { |
3301 | 4.24M | MachineBasicBlock *MBB = &*I; |
3302 | 4.24M | MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB), |
3303 | 0 | JoinSplitEdges && isSplitEdge(MBB))); |
3304 | 4.24M | } |
3305 | 588k | array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority); |
3306 | 588k | |
3307 | 588k | // Coalesce intervals in MBB priority order. |
3308 | 588k | unsigned CurrDepth = std::numeric_limits<unsigned>::max(); |
3309 | 4.83M | for (unsigned i = 0, e = MBBs.size(); i != e4.83M ; ++i4.24M ) { |
3310 | 4.24M | // Try coalescing the collected local copies for deeper loops. |
3311 | 4.24M | if (JoinGlobalCopies && 4.24M MBBs[i].Depth < CurrDepth4.16M ) { |
3312 | 724k | coalesceLocals(); |
3313 | 724k | CurrDepth = MBBs[i].Depth; |
3314 | 724k | } |
3315 | 4.24M | copyCoalesceInMBB(MBBs[i].MBB); |
3316 | 4.24M | } |
3317 | 588k | coalesceLocals(); |
3318 | 588k | |
3319 | 588k | // Joining intervals can allow other intervals to be joined. Iteratively join |
3320 | 588k | // until we make no progress. |
3321 | 600k | while (copyCoalesceWorkList(WorkList)) |
3322 | 11.7k | /* empty */ ; |
3323 | 588k | } |
3324 | | |
3325 | 588k | void RegisterCoalescer::releaseMemory() { |
3326 | 588k | ErasedInstrs.clear(); |
3327 | 588k | WorkList.clear(); |
3328 | 588k | DeadDefs.clear(); |
3329 | 588k | InflateRegs.clear(); |
3330 | 588k | } |
3331 | | |
3332 | 588k | bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { |
3333 | 588k | MF = &fn; |
3334 | 588k | MRI = &fn.getRegInfo(); |
3335 | 588k | const TargetSubtargetInfo &STI = fn.getSubtarget(); |
3336 | 588k | TRI = STI.getRegisterInfo(); |
3337 | 588k | TII = STI.getInstrInfo(); |
3338 | 588k | LIS = &getAnalysis<LiveIntervals>(); |
3339 | 588k | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
3340 | 588k | Loops = &getAnalysis<MachineLoopInfo>(); |
3341 | 588k | if (EnableGlobalCopies == cl::BOU_UNSET) |
3342 | 588k | JoinGlobalCopies = STI.enableJoinGlobalCopies(); |
3343 | 588k | else |
3344 | 4 | JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); |
3345 | 588k | |
3346 | 588k | // The MachineScheduler does not currently require JoinSplitEdges. This will |
3347 | 588k | // either be enabled unconditionally or replaced by a more general live range |
3348 | 588k | // splitting optimization. |
3349 | 588k | JoinSplitEdges = EnableJoinSplits; |
3350 | 588k | |
3351 | 588k | DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" |
3352 | 588k | << "********** Function: " << MF->getName() << '\n'); |
3353 | 588k | |
3354 | 588k | if (VerifyCoalescing) |
3355 | 46 | MF->verify(this, "Before register coalescing"); |
3356 | 588k | |
3357 | 588k | RegClassInfo.runOnMachineFunction(fn); |
3358 | 588k | |
3359 | 588k | // Join (coalesce) intervals if requested. |
3360 | 588k | if (EnableJoining) |
3361 | 588k | joinAllIntervals(); |
3362 | 588k | |
3363 | 588k | // After deleting a lot of copies, register classes may be less constrained. |
3364 | 588k | // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 -> |
3365 | 588k | // DPR inflation. |
3366 | 588k | array_pod_sort(InflateRegs.begin(), InflateRegs.end()); |
3367 | 588k | InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()), |
3368 | 588k | InflateRegs.end()); |
3369 | 588k | DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n"); |
3370 | 597k | for (unsigned i = 0, e = InflateRegs.size(); i != e597k ; ++i8.91k ) { |
3371 | 8.91k | unsigned Reg = InflateRegs[i]; |
3372 | 8.91k | if (MRI->reg_nodbg_empty(Reg)) |
3373 | 1.17k | continue; |
3374 | 7.73k | if (7.73k MRI->recomputeRegClass(Reg)7.73k ) { |
3375 | 2.48k | DEBUG(dbgs() << PrintReg(Reg) << " inflated to " |
3376 | 2.48k | << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n'); |
3377 | 2.48k | ++NumInflated; |
3378 | 2.48k | |
3379 | 2.48k | LiveInterval &LI = LIS->getInterval(Reg); |
3380 | 2.48k | if (LI.hasSubRanges()2.48k ) { |
3381 | 0 | // If the inflated register class does not support subregisters anymore |
3382 | 0 | // remove the subranges. |
3383 | 0 | if (!MRI->shouldTrackSubRegLiveness(Reg)0 ) { |
3384 | 0 | LI.clearSubRanges(); |
3385 | 0 | } else { |
3386 | | #ifndef NDEBUG |
3387 | | LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg); |
3388 | | // If subranges are still supported, then the same subregs |
3389 | | // should still be supported. |
3390 | | for (LiveInterval::SubRange &S : LI.subranges()) { |
3391 | | assert((S.LaneMask & ~MaxMask).none()); |
3392 | | } |
3393 | | #endif |
3394 | | } |
3395 | 0 | } |
3396 | 2.48k | } |
3397 | 8.91k | } |
3398 | 588k | |
3399 | 588k | DEBUG(dump()); |
3400 | 588k | if (VerifyCoalescing) |
3401 | 46 | MF->verify(this, "After register coalescing"); |
3402 | 588k | return true; |
3403 | 588k | } |
3404 | | |
3405 | 0 | void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { |
3406 | 0 | LIS->print(O, m); |
3407 | 0 | } |