/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- HexagonExpandCondsets.cpp ------------------------------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | |
10 | | // Replace mux instructions with the corresponding legal instructions. |
11 | | // It is meant to work post-SSA, but still on virtual registers. It was |
12 | | // originally placed between register coalescing and machine instruction |
13 | | // scheduler. |
14 | | // In this place in the optimization sequence, live interval analysis had |
15 | | // been performed, and the live intervals should be preserved. A large part |
16 | | // of the code deals with preserving the liveness information. |
17 | | // |
18 | | // Liveness tracking aside, the main functionality of this pass is divided |
19 | | // into two steps. The first step is to replace an instruction |
20 | | // vreg0 = C2_mux vreg1, vreg2, vreg3 |
21 | | // with a pair of conditional transfers |
22 | | // vreg0 = A2_tfrt vreg1, vreg2 |
23 | | // vreg0 = A2_tfrf vreg1, vreg3 |
24 | | // It is the intention that the execution of this pass could be terminated |
25 | | // after this step, and the code generated would be functionally correct. |
26 | | // |
27 | | // If the uses of the source values vreg1 and vreg2 are kills, and their |
28 | | // definitions are predicable, then in the second step, the conditional |
29 | | // transfers will then be rewritten as predicated instructions. E.g. |
30 | | // vreg0 = A2_or vreg1, vreg2 |
31 | | // vreg3 = A2_tfrt vreg99, vreg0<kill> |
32 | | // will be rewritten as |
33 | | // vreg3 = A2_port vreg99, vreg1, vreg2 |
34 | | // |
35 | | // This replacement has two variants: "up" and "down". Consider this case: |
36 | | // vreg0 = A2_or vreg1, vreg2 |
37 | | // ... [intervening instructions] ... |
38 | | // vreg3 = A2_tfrt vreg99, vreg0<kill> |
39 | | // variant "up": |
40 | | // vreg3 = A2_port vreg99, vreg1, vreg2 |
41 | | // ... [intervening instructions, vreg0->vreg3] ... |
42 | | // [deleted] |
43 | | // variant "down": |
44 | | // [deleted] |
45 | | // ... [intervening instructions] ... |
46 | | // vreg3 = A2_port vreg99, vreg1, vreg2 |
47 | | // |
48 | | // Both, one or none of these variants may be valid, and checks are made |
49 | | // to rule out inapplicable variants. |
50 | | // |
51 | | // As an additional optimization, before either of the two steps above is |
52 | | // executed, the pass attempts to coalesce the target register with one of |
53 | | // the source registers, e.g. given an instruction |
54 | | // vreg3 = C2_mux vreg0, vreg1, vreg2 |
55 | | // vreg3 will be coalesced with either vreg1 or vreg2. If this succeeds, |
56 | | // the instruction would then be (for example) |
57 | | // vreg3 = C2_mux vreg0, vreg3, vreg2 |
58 | | // and, under certain circumstances, this could result in only one predicated |
59 | | // instruction: |
60 | | // vreg3 = A2_tfrf vreg0, vreg2 |
61 | | // |
62 | | |
63 | | // Splitting a definition of a register into two predicated transfers |
64 | | // creates a complication in liveness tracking. Live interval computation |
65 | | // will see both instructions as actual definitions, and will mark the |
66 | | // first one as dead. The definition is not actually dead, and this |
67 | | // situation will need to be fixed. For example: |
68 | | // vreg1<def,dead> = A2_tfrt ... ; marked as dead |
69 | | // vreg1<def> = A2_tfrf ... |
70 | | // |
71 | | // Since any of the individual predicated transfers may end up getting |
72 | | // removed (in case it is an identity copy), some pre-existing def may |
73 | | // be marked as dead after live interval recomputation: |
74 | | // vreg1<def,dead> = ... ; marked as dead |
75 | | // ... |
76 | | // vreg1<def> = A2_tfrf ... ; if A2_tfrt is removed |
77 | | // This case happens if vreg1 was used as a source in A2_tfrt, which means |
78 | | // that is it actually live at the A2_tfrf, and so the now dead definition |
79 | | // of vreg1 will need to be updated to non-dead at some point. |
80 | | // |
81 | | // This issue could be remedied by adding implicit uses to the predicated |
82 | | // transfers, but this will create a problem with subsequent predication, |
83 | | // since the transfers will no longer be possible to reorder. To avoid |
84 | | // that, the initial splitting will not add any implicit uses. These |
85 | | // implicit uses will be added later, after predication. The extra price, |
86 | | // however, is that finding the locations where the implicit uses need |
87 | | // to be added, and updating the live ranges will be more involved. |
88 | | |
89 | | #include "HexagonInstrInfo.h" |
90 | | #include "HexagonRegisterInfo.h" |
91 | | #include "llvm/ADT/DenseMap.h" |
92 | | #include "llvm/ADT/SetVector.h" |
93 | | #include "llvm/ADT/SmallVector.h" |
94 | | #include "llvm/ADT/StringRef.h" |
95 | | #include "llvm/CodeGen/LiveInterval.h" |
96 | | #include "llvm/CodeGen/LiveIntervalAnalysis.h" |
97 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
98 | | #include "llvm/CodeGen/MachineDominators.h" |
99 | | #include "llvm/CodeGen/MachineFunction.h" |
100 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
101 | | #include "llvm/CodeGen/MachineInstr.h" |
102 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
103 | | #include "llvm/CodeGen/MachineOperand.h" |
104 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
105 | | #include "llvm/CodeGen/SlotIndexes.h" |
106 | | #include "llvm/IR/DebugLoc.h" |
107 | | #include "llvm/IR/Function.h" |
108 | | #include "llvm/MC/LaneBitmask.h" |
109 | | #include "llvm/Pass.h" |
110 | | #include "llvm/Support/CommandLine.h" |
111 | | #include "llvm/Support/Debug.h" |
112 | | #include "llvm/Support/ErrorHandling.h" |
113 | | #include "llvm/Support/raw_ostream.h" |
114 | | #include "llvm/Target/TargetRegisterInfo.h" |
115 | | #include "llvm/Target/TargetSubtargetInfo.h" |
116 | | #include <cassert> |
117 | | #include <iterator> |
118 | | #include <set> |
119 | | #include <utility> |
120 | | |
121 | | #define DEBUG_TYPE "expand-condsets" |
122 | | |
123 | | using namespace llvm; |
124 | | |
125 | | static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit", |
126 | | cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions")); |
127 | | static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit", |
128 | | cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings")); |
129 | | |
130 | | namespace llvm { |
131 | | |
132 | | void initializeHexagonExpandCondsetsPass(PassRegistry&); |
133 | | FunctionPass *createHexagonExpandCondsets(); |
134 | | |
135 | | } // end namespace llvm |
136 | | |
137 | | namespace { |
138 | | |
139 | | class HexagonExpandCondsets : public MachineFunctionPass { |
140 | | public: |
141 | | static char ID; |
142 | | |
143 | 401 | HexagonExpandCondsets() : MachineFunctionPass(ID) { |
144 | 401 | if (OptCoaLimit.getPosition()) |
145 | 1 | CoaLimitActive = true, CoaLimit = OptCoaLimit; |
146 | 401 | if (OptTfrLimit.getPosition()) |
147 | 0 | TfrLimitActive = true, TfrLimit = OptTfrLimit; |
148 | 401 | initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry()); |
149 | 401 | } |
150 | | |
151 | 401 | StringRef getPassName() const override { return "Hexagon Expand Condsets"; } |
152 | | |
153 | 401 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
154 | 401 | AU.addRequired<LiveIntervals>(); |
155 | 401 | AU.addPreserved<LiveIntervals>(); |
156 | 401 | AU.addPreserved<SlotIndexes>(); |
157 | 401 | AU.addRequired<MachineDominatorTree>(); |
158 | 401 | AU.addPreserved<MachineDominatorTree>(); |
159 | 401 | MachineFunctionPass::getAnalysisUsage(AU); |
160 | 401 | } |
161 | | |
162 | | bool runOnMachineFunction(MachineFunction &MF) override; |
163 | | |
164 | | private: |
165 | | const HexagonInstrInfo *HII = nullptr; |
166 | | const TargetRegisterInfo *TRI = nullptr; |
167 | | MachineDominatorTree *MDT; |
168 | | MachineRegisterInfo *MRI = nullptr; |
169 | | LiveIntervals *LIS = nullptr; |
170 | | bool CoaLimitActive = false; |
171 | | bool TfrLimitActive = false; |
172 | | unsigned CoaLimit; |
173 | | unsigned TfrLimit; |
174 | | unsigned CoaCounter = 0; |
175 | | unsigned TfrCounter = 0; |
176 | | |
177 | | struct RegisterRef { |
178 | | RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()), |
179 | 2.30k | Sub(Op.getSubReg()) {} |
180 | 114 | RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {} |
181 | | |
182 | 396 | bool operator== (RegisterRef RR) const { |
183 | 100 | return Reg == RR.Reg && Sub == RR.Sub; |
184 | 396 | } |
185 | 157 | bool operator!= (RegisterRef RR) const { return !operator==(RR); } |
186 | 681 | bool operator< (RegisterRef RR) const { |
187 | 513 | return Reg < RR.Reg || (Reg == RR.Reg && 513 Sub < RR.Sub507 ); |
188 | 681 | } |
189 | | |
190 | | unsigned Reg, Sub; |
191 | | }; |
192 | | |
193 | | using ReferenceMap = DenseMap<unsigned, unsigned>; |
194 | | enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) }; |
195 | | enum { Exec_Then = 0x10, Exec_Else = 0x20 }; |
196 | | |
197 | | unsigned getMaskForSub(unsigned Sub); |
198 | | bool isCondset(const MachineInstr &MI); |
199 | | LaneBitmask getLaneMask(unsigned Reg, unsigned Sub); |
200 | | |
201 | | void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec); |
202 | | bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec); |
203 | | |
204 | | void updateDeadsInRange(unsigned Reg, LaneBitmask LM, LiveRange &Range); |
205 | | void updateKillFlags(unsigned Reg); |
206 | | void updateDeadFlags(unsigned Reg); |
207 | | void recalculateLiveInterval(unsigned Reg); |
208 | | void removeInstr(MachineInstr &MI); |
209 | | void updateLiveness(std::set<unsigned> &RegSet, bool Recalc, |
210 | | bool UpdateKills, bool UpdateDeads); |
211 | | |
212 | | unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond); |
213 | | MachineInstr *genCondTfrFor(MachineOperand &SrcOp, |
214 | | MachineBasicBlock::iterator At, unsigned DstR, |
215 | | unsigned DstSR, const MachineOperand &PredOp, bool PredSense, |
216 | | bool ReadUndef, bool ImpUse); |
217 | | bool split(MachineInstr &MI, std::set<unsigned> &UpdRegs); |
218 | | |
219 | | bool isPredicable(MachineInstr *MI); |
220 | | MachineInstr *getReachingDefForPred(RegisterRef RD, |
221 | | MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond); |
222 | | bool canMoveOver(MachineInstr &MI, ReferenceMap &Defs, ReferenceMap &Uses); |
223 | | bool canMoveMemTo(MachineInstr &MI, MachineInstr &ToI, bool IsDown); |
224 | | void predicateAt(const MachineOperand &DefOp, MachineInstr &MI, |
225 | | MachineBasicBlock::iterator Where, |
226 | | const MachineOperand &PredOp, bool Cond, |
227 | | std::set<unsigned> &UpdRegs); |
228 | | void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR, |
229 | | bool Cond, MachineBasicBlock::iterator First, |
230 | | MachineBasicBlock::iterator Last); |
231 | | bool predicate(MachineInstr &TfrI, bool Cond, std::set<unsigned> &UpdRegs); |
232 | | bool predicateInBlock(MachineBasicBlock &B, |
233 | | std::set<unsigned> &UpdRegs); |
234 | | |
235 | | bool isIntReg(RegisterRef RR, unsigned &BW); |
236 | | bool isIntraBlocks(LiveInterval &LI); |
237 | | bool coalesceRegisters(RegisterRef R1, RegisterRef R2); |
238 | | bool coalesceSegments(const SmallVectorImpl<MachineInstr*> &Condsets, |
239 | | std::set<unsigned> &UpdRegs); |
240 | | }; |
241 | | |
242 | | } // end anonymous namespace |
243 | | |
244 | | char HexagonExpandCondsets::ID = 0; |
245 | | |
246 | | namespace llvm { |
247 | | |
248 | | char &HexagonExpandCondsetsID = HexagonExpandCondsets::ID; |
249 | | |
250 | | } // end namespace llvm |
251 | | |
252 | 488 | INITIALIZE_PASS_BEGIN488 (HexagonExpandCondsets, "expand-condsets",
|
253 | 488 | "Hexagon Expand Condsets", false, false) |
254 | 488 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) |
255 | 488 | INITIALIZE_PASS_DEPENDENCY(SlotIndexes) |
256 | 488 | INITIALIZE_PASS_DEPENDENCY(LiveIntervals) |
257 | 488 | INITIALIZE_PASS_END(HexagonExpandCondsets, "expand-condsets", |
258 | | "Hexagon Expand Condsets", false, false) |
259 | | |
260 | 91 | unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) { |
261 | 91 | switch (Sub) { |
262 | 7 | case Hexagon::isub_lo: |
263 | 7 | case Hexagon::vsub_lo: |
264 | 7 | return Sub_Low; |
265 | 3 | case Hexagon::isub_hi: |
266 | 3 | case Hexagon::vsub_hi: |
267 | 3 | return Sub_High; |
268 | 81 | case Hexagon::NoSubRegister: |
269 | 81 | return Sub_None; |
270 | 0 | } |
271 | 0 | llvm_unreachable0 ("Invalid subregister"); |
272 | 0 | } |
273 | | |
274 | 13.3k | bool HexagonExpandCondsets::isCondset(const MachineInstr &MI) { |
275 | 13.3k | unsigned Opc = MI.getOpcode(); |
276 | 13.3k | switch (Opc) { |
277 | 107 | case Hexagon::C2_mux: |
278 | 107 | case Hexagon::C2_muxii: |
279 | 107 | case Hexagon::C2_muxir: |
280 | 107 | case Hexagon::C2_muxri: |
281 | 107 | case Hexagon::PS_pselect: |
282 | 107 | return true; |
283 | 0 | break; |
284 | 13.2k | } |
285 | 13.2k | return false; |
286 | 13.2k | } |
287 | | |
288 | 821 | LaneBitmask HexagonExpandCondsets::getLaneMask(unsigned Reg, unsigned Sub) { |
289 | 821 | assert(TargetRegisterInfo::isVirtualRegister(Reg)); |
290 | 33 | return Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub) |
291 | 788 | : MRI->getMaxLaneMaskForVReg(Reg); |
292 | 821 | } |
293 | | |
294 | | void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map, |
295 | 77 | unsigned Exec) { |
296 | 77 | unsigned Mask = getMaskForSub(RR.Sub) | Exec; |
297 | 77 | ReferenceMap::iterator F = Map.find(RR.Reg); |
298 | 77 | if (F == Map.end()) |
299 | 69 | Map.insert(std::make_pair(RR.Reg, Mask)); |
300 | 77 | else |
301 | 8 | F->second |= Mask; |
302 | 77 | } |
303 | | |
304 | | bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map, |
305 | 281 | unsigned Exec) { |
306 | 281 | ReferenceMap::iterator F = Map.find(RR.Reg); |
307 | 281 | if (F == Map.end()) |
308 | 267 | return false; |
309 | 14 | unsigned Mask = getMaskForSub(RR.Sub) | Exec; |
310 | 14 | if (Mask & F->second) |
311 | 14 | return true; |
312 | 0 | return false; |
313 | 0 | } |
314 | | |
315 | 468 | void HexagonExpandCondsets::updateKillFlags(unsigned Reg) { |
316 | 458 | auto KillAt = [this,Reg] (SlotIndex K, LaneBitmask LM) -> void { |
317 | 458 | // Set the <kill> flag on a use of Reg whose lane mask is contained in LM. |
318 | 458 | MachineInstr *MI = LIS->getInstructionFromIndex(K); |
319 | 1.15k | for (auto &Op : MI->operands()) { |
320 | 1.15k | if (!Op.isReg() || 1.15k !Op.isUse()1.14k || Op.getReg() != Reg700 ) |
321 | 699 | continue; |
322 | 458 | LaneBitmask SLM = getLaneMask(Reg, Op.getSubReg()); |
323 | 458 | if ((SLM & LM) == SLM458 ) { |
324 | 458 | // Only set the kill flag on the first encountered use of Reg in this |
325 | 458 | // instruction. |
326 | 458 | Op.setIsKill(true); |
327 | 458 | break; |
328 | 458 | } |
329 | 458 | } |
330 | 458 | }; |
331 | 468 | |
332 | 468 | LiveInterval &LI = LIS->getInterval(Reg); |
333 | 1.15k | for (auto I = LI.begin(), E = LI.end(); I != E1.15k ; ++I691 ) { |
334 | 691 | if (!I->end.isRegister()) |
335 | 74 | continue; |
336 | 617 | // Do not mark the end of the segment as <kill>, if the next segment |
337 | 617 | // starts with a predicated instruction. |
338 | 617 | auto NextI = std::next(I); |
339 | 617 | if (NextI != E && 617 NextI->start.isRegister()241 ) { |
340 | 233 | MachineInstr *DefI = LIS->getInstructionFromIndex(NextI->start); |
341 | 233 | if (HII->isPredicated(*DefI)) |
342 | 120 | continue; |
343 | 497 | } |
344 | 497 | bool WholeReg = true; |
345 | 497 | if (LI.hasSubRanges()497 ) { |
346 | 78 | auto EndsAtI = [I] (LiveInterval::SubRange &S) -> bool { |
347 | 78 | LiveRange::iterator F = S.find(I->end); |
348 | 49 | return F != S.end() && I->end == F->end; |
349 | 78 | }; |
350 | 39 | // Check if all subranges end at I->end. If so, make sure to kill |
351 | 39 | // the whole register. |
352 | 78 | for (LiveInterval::SubRange &S : LI.subranges()) { |
353 | 78 | if (EndsAtI(S)) |
354 | 0 | KillAt(I->end, S.LaneMask); |
355 | 78 | else |
356 | 78 | WholeReg = false; |
357 | 78 | } |
358 | 39 | } |
359 | 497 | if (WholeReg) |
360 | 458 | KillAt(I->end, MRI->getMaxLaneMaskForVReg(Reg)); |
361 | 691 | } |
362 | 468 | } |
363 | | |
364 | | void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, |
365 | 285 | LiveRange &Range) { |
366 | 285 | assert(TargetRegisterInfo::isVirtualRegister(Reg)); |
367 | 285 | if (Range.empty()) |
368 | 59 | return; |
369 | 226 | |
370 | 226 | // Return two booleans: { def-modifes-reg, def-covers-reg }. |
371 | 226 | auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> std::pair<bool,bool> 226 { |
372 | 1.06k | if (!Op.isReg() || 1.06k !Op.isDef()858 ) |
373 | 689 | return { false, false }; |
374 | 376 | unsigned DR = Op.getReg(), DSR = Op.getSubReg(); |
375 | 376 | if (!TargetRegisterInfo::isVirtualRegister(DR) || 376 DR != Reg365 ) |
376 | 13 | return { false, false }; |
377 | 363 | LaneBitmask SLM = getLaneMask(DR, DSR); |
378 | 363 | LaneBitmask A = SLM & LM; |
379 | 363 | return { A.any(), A == SLM }; |
380 | 363 | }; |
381 | 226 | |
382 | 226 | // The splitting step will create pairs of predicated definitions without |
383 | 226 | // any implicit uses (since implicit uses would interfere with predication). |
384 | 226 | // This can cause the reaching defs to become dead after live range |
385 | 226 | // recomputation, even though they are not really dead. |
386 | 226 | // We need to identify predicated defs that need implicit uses, and |
387 | 226 | // dead defs that are not really dead, and correct both problems. |
388 | 226 | |
389 | 226 | auto Dominate = [this] (SetVector<MachineBasicBlock*> &Defs, |
390 | 19 | MachineBasicBlock *Dest) -> bool { |
391 | 19 | for (MachineBasicBlock *D : Defs) |
392 | 21 | if (21 D != Dest && 21 MDT->dominates(D, Dest)6 ) |
393 | 4 | return true; |
394 | 15 | |
395 | 15 | MachineBasicBlock *Entry = &Dest->getParent()->front(); |
396 | 15 | SetVector<MachineBasicBlock*> Work(Dest->pred_begin(), Dest->pred_end()); |
397 | 70 | for (unsigned i = 0; i < Work.size()70 ; ++i55 ) { |
398 | 69 | MachineBasicBlock *B = Work[i]; |
399 | 69 | if (Defs.count(B)) |
400 | 2 | continue; |
401 | 67 | if (67 B == Entry67 ) |
402 | 14 | return false; |
403 | 53 | for (auto *P : B->predecessors()) |
404 | 71 | Work.insert(P); |
405 | 69 | } |
406 | 1 | return true; |
407 | 19 | }; |
408 | 226 | |
409 | 226 | // First, try to extend live range within individual basic blocks. This |
410 | 226 | // will leave us only with dead defs that do not reach any predicated |
411 | 226 | // defs in the same block. |
412 | 226 | SetVector<MachineBasicBlock*> Defs; |
413 | 226 | SmallVector<SlotIndex,4> PredDefs; |
414 | 388 | for (auto &Seg : Range) { |
415 | 388 | if (!Seg.start.isRegister()) |
416 | 25 | continue; |
417 | 363 | MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); |
418 | 363 | Defs.insert(DefI->getParent()); |
419 | 363 | if (HII->isPredicated(*DefI)) |
420 | 183 | PredDefs.push_back(Seg.start); |
421 | 388 | } |
422 | 226 | |
423 | 226 | SmallVector<SlotIndex,8> Undefs; |
424 | 226 | LiveInterval &LI = LIS->getInterval(Reg); |
425 | 226 | LI.computeSubRangeUndefs(Undefs, LM, *MRI, *LIS->getSlotIndexes()); |
426 | 226 | |
427 | 183 | for (auto &SI : PredDefs) { |
428 | 183 | MachineBasicBlock *BB = LIS->getMBBFromIndex(SI); |
429 | 183 | auto P = Range.extendInBlock(Undefs, LIS->getMBBStartIdx(BB), SI); |
430 | 183 | if (P.first != nullptr || 183 P.second76 ) |
431 | 112 | SI = SlotIndex(); |
432 | 183 | } |
433 | 226 | |
434 | 226 | // Calculate reachability for those predicated defs that were not handled |
435 | 226 | // by the in-block extension. |
436 | 226 | SmallVector<SlotIndex,4> ExtTo; |
437 | 183 | for (auto &SI : PredDefs) { |
438 | 183 | if (!SI.isValid()) |
439 | 112 | continue; |
440 | 71 | MachineBasicBlock *BB = LIS->getMBBFromIndex(SI); |
441 | 71 | if (BB->pred_empty()) |
442 | 52 | continue; |
443 | 19 | // If the defs from this range reach SI via all predecessors, it is live. |
444 | 19 | // It can happen that SI is reached by the defs through some paths, but |
445 | 19 | // not all. In the IR coming into this optimization, SI would not be |
446 | 19 | // considered live, since the defs would then not jointly dominate SI. |
447 | 19 | // That means that SI is an overwriting def, and no implicit use is |
448 | 19 | // needed at this point. Do not add SI to the extension points, since |
449 | 19 | // extendToIndices will abort if there is no joint dominance. |
450 | 19 | // If the abort was avoided by adding extra undefs added to Undefs, |
451 | 19 | // extendToIndices could actually indicate that SI is live, contrary |
452 | 19 | // to the original IR. |
453 | 19 | if (19 Dominate(Defs, BB)19 ) |
454 | 5 | ExtTo.push_back(SI); |
455 | 183 | } |
456 | 226 | |
457 | 226 | if (!ExtTo.empty()) |
458 | 3 | LIS->extendToIndices(Range, ExtTo, Undefs); |
459 | 226 | |
460 | 226 | // Remove <dead> flags from all defs that are not dead after live range |
461 | 226 | // extension, and collect all def operands. They will be used to generate |
462 | 226 | // the necessary implicit uses. |
463 | 226 | // At the same time, add <dead> flag to all defs that are actually dead. |
464 | 226 | // This can happen, for example, when a mux with identical inputs is |
465 | 226 | // replaced with a COPY: the use of the predicate register disappears and |
466 | 226 | // the dead can become dead. |
467 | 226 | std::set<RegisterRef> DefRegs; |
468 | 389 | for (auto &Seg : Range) { |
469 | 389 | if (!Seg.start.isRegister()) |
470 | 26 | continue; |
471 | 363 | MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); |
472 | 1.06k | for (auto &Op : DefI->operands()) { |
473 | 1.06k | auto P = IsRegDef(Op); |
474 | 1.06k | if (P.second && 1.06k Seg.end.isDead()349 ) { |
475 | 4 | Op.setIsDead(true); |
476 | 1.06k | } else if (1.06k P.first1.06k ) { |
477 | 359 | DefRegs.insert(Op); |
478 | 359 | Op.setIsDead(false); |
479 | 359 | } |
480 | 1.06k | } |
481 | 389 | } |
482 | 226 | |
483 | 226 | // Now, add implicit uses to each predicated def that is reached |
484 | 226 | // by other defs. |
485 | 389 | for (auto &Seg : Range) { |
486 | 389 | if (!Seg.start.isRegister() || 389 !Range.liveAt(Seg.start.getPrevSlot())363 ) |
487 | 269 | continue; |
488 | 120 | MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); |
489 | 120 | if (!HII->isPredicated(*DefI)) |
490 | 10 | continue; |
491 | 110 | // Construct the set of all necessary implicit uses, based on the def |
492 | 110 | // operands in the instruction. We need to tie the implicit uses to |
493 | 110 | // the corresponding defs. |
494 | 110 | std::map<RegisterRef,unsigned> ImpUses; |
495 | 456 | for (unsigned i = 0, e = DefI->getNumOperands(); i != e456 ; ++i346 ) { |
496 | 346 | MachineOperand &Op = DefI->getOperand(i); |
497 | 346 | if (!Op.isReg() || 346 !DefRegs.count(Op)279 ) |
498 | 231 | continue; |
499 | 115 | if (115 Op.isDef()115 ) { |
500 | 109 | ImpUses.insert({Op, i}); |
501 | 115 | } else { |
502 | 6 | // This function can be called for the same register with different |
503 | 6 | // lane masks. If the def in this instruction was for the whole |
504 | 6 | // register, we can get here more than once. Avoid adding multiple |
505 | 6 | // implicit uses (or adding an implicit use when an explicit one is |
506 | 6 | // present). |
507 | 6 | ImpUses.erase(Op); |
508 | 6 | } |
509 | 346 | } |
510 | 110 | if (ImpUses.empty()) |
511 | 7 | continue; |
512 | 103 | MachineFunction &MF = *DefI->getParent()->getParent(); |
513 | 103 | for (std::pair<RegisterRef, unsigned> P : ImpUses) { |
514 | 103 | RegisterRef R = P.first; |
515 | 103 | MachineInstrBuilder(MF, DefI).addReg(R.Reg, RegState::Implicit, R.Sub); |
516 | 103 | DefI->tieOperands(P.second, DefI->getNumOperands()-1); |
517 | 103 | } |
518 | 389 | } |
519 | 285 | } |
520 | | |
521 | 275 | void HexagonExpandCondsets::updateDeadFlags(unsigned Reg) { |
522 | 275 | LiveInterval &LI = LIS->getInterval(Reg); |
523 | 275 | if (LI.hasSubRanges()275 ) { |
524 | 20 | for (LiveInterval::SubRange &S : LI.subranges()) { |
525 | 20 | updateDeadsInRange(Reg, S.LaneMask, S); |
526 | 20 | LIS->shrinkToUses(S, Reg); |
527 | 20 | } |
528 | 10 | LI.clear(); |
529 | 10 | LIS->constructMainRangeFromSubranges(LI); |
530 | 275 | } else { |
531 | 265 | updateDeadsInRange(Reg, MRI->getMaxLaneMaskForVReg(Reg), LI); |
532 | 265 | } |
533 | 275 | } |
534 | | |
535 | 275 | void HexagonExpandCondsets::recalculateLiveInterval(unsigned Reg) { |
536 | 275 | LIS->removeInterval(Reg); |
537 | 275 | LIS->createAndComputeVirtRegInterval(Reg); |
538 | 275 | } |
539 | | |
540 | 203 | void HexagonExpandCondsets::removeInstr(MachineInstr &MI) { |
541 | 203 | LIS->RemoveMachineInstrFromMaps(MI); |
542 | 203 | MI.eraseFromParent(); |
543 | 203 | } |
544 | | |
545 | | void HexagonExpandCondsets::updateLiveness(std::set<unsigned> &RegSet, |
546 | 1.72k | bool Recalc, bool UpdateKills, bool UpdateDeads) { |
547 | 1.72k | UpdateKills |= UpdateDeads; |
548 | 441 | for (auto R : RegSet) { |
549 | 441 | if (Recalc) |
550 | 275 | recalculateLiveInterval(R); |
551 | 441 | if (UpdateKills) |
552 | 441 | MRI->clearKillFlags(R); |
553 | 441 | if (UpdateDeads) |
554 | 275 | updateDeadFlags(R); |
555 | 441 | // Fixing <dead> flags may extend live ranges, so reset <kill> flags |
556 | 441 | // after that. |
557 | 441 | if (UpdateKills) |
558 | 441 | updateKillFlags(R); |
559 | 441 | LIS->getInterval(R).verify(); |
560 | 441 | } |
561 | 1.72k | } |
562 | | |
563 | | /// Get the opcode for a conditional transfer of the value in SO (source |
564 | | /// operand). The condition (true/false) is given in Cond. |
565 | | unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, |
566 | 212 | bool IfTrue) { |
567 | 212 | using namespace Hexagon; |
568 | 212 | |
569 | 212 | if (SO.isReg()212 ) { |
570 | 114 | unsigned PhysR; |
571 | 114 | RegisterRef RS = SO; |
572 | 114 | if (TargetRegisterInfo::isVirtualRegister(RS.Reg)114 ) { |
573 | 114 | const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg); |
574 | 114 | assert(VC->begin() != VC->end() && "Empty register class"); |
575 | 114 | PhysR = *VC->begin(); |
576 | 114 | } else { |
577 | 0 | assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg)); |
578 | 0 | PhysR = RS.Reg; |
579 | 0 | } |
580 | 114 | unsigned PhysS = (RS.Sub == 0) ? PhysR100 : TRI->getSubReg(PhysR, RS.Sub)14 ; |
581 | 114 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS); |
582 | 114 | switch (TRI->getRegSizeInBits(*RC)) { |
583 | 110 | case 32: |
584 | 110 | return IfTrue ? A2_tfrt61 : A2_tfrf49 ; |
585 | 4 | case 64: |
586 | 4 | return IfTrue ? A2_tfrpt2 : A2_tfrpf2 ; |
587 | 0 | } |
588 | 0 | llvm_unreachable0 ("Invalid register operand"); |
589 | 0 | } |
590 | 98 | switch (SO.getType()) { |
591 | 98 | case MachineOperand::MO_Immediate: |
592 | 98 | case MachineOperand::MO_FPImmediate: |
593 | 98 | case MachineOperand::MO_ConstantPoolIndex: |
594 | 98 | case MachineOperand::MO_TargetIndex: |
595 | 98 | case MachineOperand::MO_JumpTableIndex: |
596 | 98 | case MachineOperand::MO_ExternalSymbol: |
597 | 98 | case MachineOperand::MO_GlobalAddress: |
598 | 98 | case MachineOperand::MO_BlockAddress: |
599 | 98 | return IfTrue ? C2_cmoveit43 : C2_cmoveif55 ; |
600 | 0 | default: |
601 | 0 | break; |
602 | 0 | } |
603 | 0 | llvm_unreachable0 ("Unexpected source operand"); |
604 | 0 | } |
605 | | |
606 | | /// Generate a conditional transfer, copying the value SrcOp to the |
607 | | /// destination register DstR:DstSR, and using the predicate register from |
608 | | /// PredOp. The Cond argument specifies whether the predicate is to be |
609 | | /// if(PredOp), or if(!PredOp). |
610 | | MachineInstr *HexagonExpandCondsets::genCondTfrFor(MachineOperand &SrcOp, |
611 | | MachineBasicBlock::iterator At, |
612 | | unsigned DstR, unsigned DstSR, const MachineOperand &PredOp, |
613 | 212 | bool PredSense, bool ReadUndef, bool ImpUse) { |
614 | 212 | MachineInstr *MI = SrcOp.getParent(); |
615 | 212 | MachineBasicBlock &B = *At->getParent(); |
616 | 212 | const DebugLoc &DL = MI->getDebugLoc(); |
617 | 212 | |
618 | 212 | // Don't avoid identity copies here (i.e. if the source and the destination |
619 | 212 | // are the same registers). It is actually better to generate them here, |
620 | 212 | // since this would cause the copy to potentially be predicated in the next |
621 | 212 | // step. The predication will remove such a copy if it is unable to |
622 | 212 | /// predicate. |
623 | 212 | |
624 | 212 | unsigned Opc = getCondTfrOpcode(SrcOp, PredSense); |
625 | 212 | unsigned DstState = RegState::Define | (ReadUndef ? RegState::Undef4 : 0208 ); |
626 | 212 | unsigned PredState = getRegState(PredOp) & ~RegState::Kill; |
627 | 212 | MachineInstrBuilder MIB; |
628 | 212 | |
629 | 212 | if (SrcOp.isReg()212 ) { |
630 | 114 | unsigned SrcState = getRegState(SrcOp); |
631 | 114 | if (RegisterRef(SrcOp) == RegisterRef(DstR, DstSR)) |
632 | 33 | SrcState &= ~RegState::Kill; |
633 | 114 | MIB = BuildMI(B, At, DL, HII->get(Opc)) |
634 | 114 | .addReg(DstR, DstState, DstSR) |
635 | 114 | .addReg(PredOp.getReg(), PredState, PredOp.getSubReg()) |
636 | 114 | .addReg(SrcOp.getReg(), SrcState, SrcOp.getSubReg()); |
637 | 212 | } else { |
638 | 98 | MIB = BuildMI(B, At, DL, HII->get(Opc)) |
639 | 98 | .addReg(DstR, DstState, DstSR) |
640 | 98 | .addReg(PredOp.getReg(), PredState, PredOp.getSubReg()) |
641 | 98 | .add(SrcOp); |
642 | 98 | } |
643 | 212 | |
644 | 212 | DEBUG(dbgs() << "created an initial copy: " << *MIB); |
645 | 212 | return &*MIB; |
646 | 212 | } |
647 | | |
648 | | /// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function |
649 | | /// performs all necessary changes to complete the replacement. |
650 | | bool HexagonExpandCondsets::split(MachineInstr &MI, |
651 | 107 | std::set<unsigned> &UpdRegs) { |
652 | 107 | if (TfrLimitActive107 ) { |
653 | 0 | if (TfrCounter >= TfrLimit) |
654 | 0 | return false; |
655 | 0 | TfrCounter++; |
656 | 0 | } |
657 | 107 | DEBUG107 (dbgs() << "\nsplitting BB#" << MI.getParent()->getNumber() << ": " |
658 | 107 | << MI); |
659 | 107 | MachineOperand &MD = MI.getOperand(0); // Definition |
660 | 107 | MachineOperand &MP = MI.getOperand(1); // Predicate register |
661 | 107 | assert(MD.isDef()); |
662 | 107 | unsigned DR = MD.getReg(), DSR = MD.getSubReg(); |
663 | 107 | bool ReadUndef = MD.isUndef(); |
664 | 107 | MachineBasicBlock::iterator At = MI; |
665 | 107 | |
666 | 107 | auto updateRegs = [&UpdRegs] (const MachineInstr &MI) -> void { |
667 | 107 | for (auto &Op : MI.operands()) |
668 | 428 | if (428 Op.isReg()428 ) |
669 | 330 | UpdRegs.insert(Op.getReg()); |
670 | 107 | }; |
671 | 107 | |
672 | 107 | // If this is a mux of the same register, just replace it with COPY. |
673 | 107 | // Ideally, this would happen earlier, so that register coalescing would |
674 | 107 | // see it. |
675 | 107 | MachineOperand &ST = MI.getOperand(2); |
676 | 107 | MachineOperand &SF = MI.getOperand(3); |
677 | 107 | if (ST.isReg() && 107 SF.isReg()64 ) { |
678 | 48 | RegisterRef RT(ST); |
679 | 48 | if (RT == RegisterRef(SF)48 ) { |
680 | 1 | // Copy regs to update first. |
681 | 1 | updateRegs(MI); |
682 | 1 | MI.setDesc(HII->get(TargetOpcode::COPY)); |
683 | 1 | unsigned S = getRegState(ST); |
684 | 4 | while (MI.getNumOperands() > 1) |
685 | 3 | MI.RemoveOperand(MI.getNumOperands()-1); |
686 | 1 | MachineFunction &MF = *MI.getParent()->getParent(); |
687 | 1 | MachineInstrBuilder(MF, MI).addReg(RT.Reg, S, RT.Sub); |
688 | 1 | return true; |
689 | 1 | } |
690 | 106 | } |
691 | 106 | |
692 | 106 | // First, create the two invididual conditional transfers, and add each |
693 | 106 | // of them to the live intervals information. Do that first and then remove |
694 | 106 | // the old instruction from live intervals. |
695 | 106 | MachineInstr *TfrT = |
696 | 106 | genCondTfrFor(ST, At, DR, DSR, MP, true, ReadUndef, false); |
697 | 106 | MachineInstr *TfrF = |
698 | 106 | genCondTfrFor(SF, At, DR, DSR, MP, false, ReadUndef, true); |
699 | 106 | LIS->InsertMachineInstrInMaps(*TfrT); |
700 | 106 | LIS->InsertMachineInstrInMaps(*TfrF); |
701 | 106 | |
702 | 106 | // Will need to recalculate live intervals for all registers in MI. |
703 | 106 | updateRegs(MI); |
704 | 106 | |
705 | 106 | removeInstr(MI); |
706 | 106 | return true; |
707 | 106 | } |
708 | | |
709 | 50 | bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) { |
710 | 50 | if (HII->isPredicated(*MI) || 50 !HII->isPredicable(*MI)43 ) |
711 | 13 | return false; |
712 | 37 | if (37 MI->hasUnmodeledSideEffects() || 37 MI->mayStore()37 ) |
713 | 0 | return false; |
714 | 37 | // Reject instructions with multiple defs (e.g. post-increment loads). |
715 | 37 | bool HasDef = false; |
716 | 97 | for (auto &Op : MI->operands()) { |
717 | 97 | if (!Op.isReg() || 97 !Op.isDef()73 ) |
718 | 60 | continue; |
719 | 37 | if (37 HasDef37 ) |
720 | 0 | return false; |
721 | 37 | HasDef = true; |
722 | 37 | } |
723 | 37 | for (auto &Mo : MI->memoperands()) |
724 | 2 | if (2 Mo->isVolatile()2 ) |
725 | 0 | return false; |
726 | 37 | return true; |
727 | 37 | } |
728 | | |
729 | | /// Find the reaching definition for a predicated use of RD. The RD is used |
730 | | /// under the conditions given by PredR and Cond, and this function will ignore |
731 | | /// definitions that set RD under the opposite conditions. |
732 | | MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD, |
733 | 157 | MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) { |
734 | 157 | MachineBasicBlock &B = *UseIt->getParent(); |
735 | 157 | MachineBasicBlock::iterator I = UseIt, S = B.begin(); |
736 | 157 | if (I == S) |
737 | 6 | return nullptr; |
738 | 151 | |
739 | 151 | bool PredValid = true; |
740 | 349 | do { |
741 | 349 | --I; |
742 | 349 | MachineInstr *MI = &*I; |
743 | 349 | // Check if this instruction can be ignored, i.e. if it is predicated |
744 | 349 | // on the complementary condition. |
745 | 349 | if (PredValid && 349 HII->isPredicated(*MI)259 ) { |
746 | 45 | if (MI->readsRegister(PredR) && 45 (Cond != HII->isPredicatedTrue(*MI))28 ) |
747 | 21 | continue; |
748 | 328 | } |
749 | 328 | |
750 | 328 | // Check the defs. If the PredR is defined, invalidate it. If RD is |
751 | 328 | // defined, return the instruction or 0, depending on the circumstances. |
752 | 328 | for (auto &Op : MI->operands()) 328 { |
753 | 689 | if (!Op.isReg() || 689 !Op.isDef()594 ) |
754 | 351 | continue; |
755 | 338 | RegisterRef RR = Op; |
756 | 338 | if (RR.Reg == PredR338 ) { |
757 | 39 | PredValid = false; |
758 | 39 | continue; |
759 | 39 | } |
760 | 299 | if (299 RR.Reg != RD.Reg299 ) |
761 | 163 | continue; |
762 | 136 | // If the "Reg" part agrees, there is still the subregister to check. |
763 | 136 | // If we are looking for vreg1:loreg, we can skip vreg1:hireg, but |
764 | 136 | // not vreg1 (w/o subregisters). |
765 | 136 | if (136 RR.Sub == RD.Sub136 ) |
766 | 131 | return MI; |
767 | 5 | if (5 RR.Sub == 0 || 5 RD.Sub == 02 ) |
768 | 3 | return nullptr; |
769 | 215 | // We have different subregisters, so we can continue looking. |
770 | 215 | } |
771 | 215 | } while (I != S); |
772 | 151 | |
773 | 17 | return nullptr; |
774 | 157 | } |
775 | | |
776 | | /// Check if the instruction MI can be safely moved over a set of instructions |
777 | | /// whose side-effects (in terms of register defs and uses) are expressed in |
778 | | /// the maps Defs and Uses. These maps reflect the conditional defs and uses |
779 | | /// that depend on the same predicate register to allow moving instructions |
780 | | /// over instructions predicated on the opposite condition. |
781 | | bool HexagonExpandCondsets::canMoveOver(MachineInstr &MI, ReferenceMap &Defs, |
782 | 68 | ReferenceMap &Uses) { |
783 | 68 | // In order to be able to safely move MI over instructions that define |
784 | 68 | // "Defs" and use "Uses", no def operand from MI can be defined or used |
785 | 68 | // and no use operand can be defined. |
786 | 169 | for (auto &Op : MI.operands()) { |
787 | 169 | if (!Op.isReg()) |
788 | 20 | continue; |
789 | 149 | RegisterRef RR = Op; |
790 | 149 | // For physical register we would need to check register aliases, etc. |
791 | 149 | // and we don't want to bother with that. It would be of little value |
792 | 149 | // before the actual register rewriting (from virtual to physical). |
793 | 149 | if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) |
794 | 0 | return false; |
795 | 149 | // No redefs for any operand. |
796 | 149 | if (149 isRefInMap(RR, Defs, Exec_Then)149 ) |
797 | 11 | return false; |
798 | 138 | // For defs, there cannot be uses. |
799 | 138 | if (138 Op.isDef() && 138 isRefInMap(RR, Uses, Exec_Then)58 ) |
800 | 0 | return false; |
801 | 57 | } |
802 | 57 | return true; |
803 | 57 | } |
804 | | |
805 | | /// Check if the instruction accessing memory (TheI) can be moved to the |
806 | | /// location ToI. |
807 | | bool HexagonExpandCondsets::canMoveMemTo(MachineInstr &TheI, MachineInstr &ToI, |
808 | 1 | bool IsDown) { |
809 | 1 | bool IsLoad = TheI.mayLoad(), IsStore = TheI.mayStore(); |
810 | 1 | if (!IsLoad && 1 !IsStore0 ) |
811 | 0 | return true; |
812 | 1 | if (1 HII->areMemAccessesTriviallyDisjoint(TheI, ToI)1 ) |
813 | 0 | return true; |
814 | 1 | if (1 TheI.hasUnmodeledSideEffects()1 ) |
815 | 0 | return false; |
816 | 1 | |
817 | 1 | MachineBasicBlock::iterator StartI = IsDown ? 1 TheI1 : ToI0 ; |
818 | 1 | MachineBasicBlock::iterator EndI = IsDown ? ToI1 : TheI0 ; |
819 | 1 | bool Ordered = TheI.hasOrderedMemoryRef(); |
820 | 1 | |
821 | 1 | // Search for aliased memory reference in (StartI, EndI). |
822 | 1 | for (MachineBasicBlock::iterator I = std::next(StartI); I != EndI1 ; ++I0 ) { |
823 | 0 | MachineInstr *MI = &*I; |
824 | 0 | if (MI->hasUnmodeledSideEffects()) |
825 | 0 | return false; |
826 | 0 | bool L = MI->mayLoad(), S = MI->mayStore(); |
827 | 0 | if (!L && 0 !S0 ) |
828 | 0 | continue; |
829 | 0 | if (0 Ordered && 0 MI->hasOrderedMemoryRef()0 ) |
830 | 0 | return false; |
831 | 0 |
|
832 | 0 | bool Conflict = (L && 0 IsStore0 ) || S0 ; |
833 | 0 | if (Conflict) |
834 | 0 | return false; |
835 | 0 | } |
836 | 1 | return true; |
837 | 1 | } |
838 | | |
839 | | /// Generate a predicated version of MI (where the condition is given via |
840 | | /// PredR and Cond) at the point indicated by Where. |
841 | | void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp, |
842 | | MachineInstr &MI, |
843 | | MachineBasicBlock::iterator Where, |
844 | | const MachineOperand &PredOp, bool Cond, |
845 | 33 | std::set<unsigned> &UpdRegs) { |
846 | 33 | // The problem with updating live intervals is that we can move one def |
847 | 33 | // past another def. In particular, this can happen when moving an A2_tfrt |
848 | 33 | // over an A2_tfrf defining the same register. From the point of view of |
849 | 33 | // live intervals, these two instructions are two separate definitions, |
850 | 33 | // and each one starts another live segment. LiveIntervals's "handleMove" |
851 | 33 | // does not allow such moves, so we need to handle it ourselves. To avoid |
852 | 33 | // invalidating liveness data while we are using it, the move will be |
853 | 33 | // implemented in 4 steps: (1) add a clone of the instruction MI at the |
854 | 33 | // target location, (2) update liveness, (3) delete the old instruction, |
855 | 33 | // and (4) update liveness again. |
856 | 33 | |
857 | 33 | MachineBasicBlock &B = *MI.getParent(); |
858 | 33 | DebugLoc DL = Where->getDebugLoc(); // "Where" points to an instruction. |
859 | 33 | unsigned Opc = MI.getOpcode(); |
860 | 33 | unsigned PredOpc = HII->getCondOpcode(Opc, !Cond); |
861 | 33 | MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc)); |
862 | 33 | unsigned Ox = 0, NP = MI.getNumOperands(); |
863 | 33 | // Skip all defs from MI first. |
864 | 66 | while (Ox < NP66 ) { |
865 | 66 | MachineOperand &MO = MI.getOperand(Ox); |
866 | 66 | if (!MO.isReg() || 66 !MO.isDef()56 ) |
867 | 33 | break; |
868 | 33 | Ox++; |
869 | 33 | } |
870 | 33 | // Add the new def, then the predicate register, then the rest of the |
871 | 33 | // operands. |
872 | 33 | MB.addReg(DefOp.getReg(), getRegState(DefOp), DefOp.getSubReg()); |
873 | 33 | MB.addReg(PredOp.getReg(), PredOp.isUndef() ? RegState::Undef2 : 031 , |
874 | 33 | PredOp.getSubReg()); |
875 | 85 | while (Ox < NP85 ) { |
876 | 52 | MachineOperand &MO = MI.getOperand(Ox); |
877 | 52 | if (!MO.isReg() || 52 !MO.isImplicit()32 ) |
878 | 52 | MB.add(MO); |
879 | 52 | Ox++; |
880 | 52 | } |
881 | 33 | |
882 | 33 | MachineFunction &MF = *B.getParent(); |
883 | 33 | MachineInstr::mmo_iterator I = MI.memoperands_begin(); |
884 | 33 | unsigned NR = std::distance(I, MI.memoperands_end()); |
885 | 33 | MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR); |
886 | 34 | for (unsigned i = 0; i < NR34 ; ++i1 ) |
887 | 1 | MemRefs[i] = *I++; |
888 | 33 | MB.setMemRefs(MemRefs, MemRefs+NR); |
889 | 33 | |
890 | 33 | MachineInstr *NewI = MB; |
891 | 33 | NewI->clearKillInfo(); |
892 | 33 | LIS->InsertMachineInstrInMaps(*NewI); |
893 | 33 | |
894 | 33 | for (auto &Op : NewI->operands()) |
895 | 118 | if (118 Op.isReg()118 ) |
896 | 98 | UpdRegs.insert(Op.getReg()); |
897 | 33 | } |
898 | | |
899 | | /// In the range [First, Last], rename all references to the "old" register RO |
900 | | /// to the "new" register RN, but only in instructions predicated on the given |
901 | | /// condition. |
902 | | void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN, |
903 | | unsigned PredR, bool Cond, MachineBasicBlock::iterator First, |
904 | 33 | MachineBasicBlock::iterator Last) { |
905 | 33 | MachineBasicBlock::iterator End = std::next(Last); |
906 | 97 | for (MachineBasicBlock::iterator I = First; I != End97 ; ++I64 ) { |
907 | 64 | MachineInstr *MI = &*I; |
908 | 64 | // Do not touch instructions that are not predicated, or are predicated |
909 | 64 | // on the opposite condition. |
910 | 64 | if (!HII->isPredicated(*MI)) |
911 | 14 | continue; |
912 | 50 | if (50 !MI->readsRegister(PredR) || 50 (Cond != HII->isPredicatedTrue(*MI))50 ) |
913 | 8 | continue; |
914 | 42 | |
915 | 42 | for (auto &Op : MI->operands()) 42 { |
916 | 130 | if (!Op.isReg() || 130 RO != RegisterRef(Op)124 ) |
917 | 97 | continue; |
918 | 33 | Op.setReg(RN.Reg); |
919 | 33 | Op.setSubReg(RN.Sub); |
920 | 33 | // In practice, this isn't supposed to see any defs. |
921 | 33 | assert(!Op.isDef() && "Not expecting a def"); |
922 | 33 | } |
923 | 64 | } |
924 | 33 | } |
925 | | |
926 | | /// For a given conditional copy, predicate the definition of the source of |
927 | | /// the copy under the given condition (using the same predicate register as |
928 | | /// the copy). |
929 | | bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond, |
930 | 110 | std::set<unsigned> &UpdRegs) { |
931 | 110 | // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi). |
932 | 110 | unsigned Opc = TfrI.getOpcode(); |
933 | 110 | (void)Opc; |
934 | 110 | assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf); |
935 | 110 | DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false") |
936 | 110 | << ": " << TfrI); |
937 | 110 | |
938 | 110 | MachineOperand &MD = TfrI.getOperand(0); |
939 | 110 | MachineOperand &MP = TfrI.getOperand(1); |
940 | 110 | MachineOperand &MS = TfrI.getOperand(2); |
941 | 110 | // The source operand should be a <kill>. This is not strictly necessary, |
942 | 110 | // but it makes things a lot simpler. Otherwise, we would need to rename |
943 | 110 | // some registers, which would complicate the transformation considerably. |
944 | 110 | if (!MS.isKill()) |
945 | 57 | return false; |
946 | 53 | // Avoid predicating instructions that define a subregister if subregister |
947 | 53 | // liveness tracking is not enabled. |
948 | 53 | if (53 MD.getSubReg() && 53 !MRI->shouldTrackSubRegLiveness(MD.getReg())9 ) |
949 | 0 | return false; |
950 | 53 | |
951 | 53 | RegisterRef RT(MS); |
952 | 53 | unsigned PredR = MP.getReg(); |
953 | 53 | MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond); |
954 | 53 | if (!DefI || 53 !isPredicable(DefI)50 ) |
955 | 16 | return false; |
956 | 37 | |
957 | 37 | DEBUG37 (dbgs() << "Source def: " << *DefI); |
958 | 37 | |
959 | 37 | // Collect the information about registers defined and used between the |
960 | 37 | // DefI and the TfrI. |
961 | 37 | // Map: reg -> bitmask of subregs |
962 | 37 | ReferenceMap Uses, Defs; |
963 | 37 | MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI; |
964 | 37 | |
965 | 37 | // Check if the predicate register is valid between DefI and TfrI. |
966 | 37 | // If it is, we can then ignore instructions predicated on the negated |
967 | 37 | // conditions when collecting def and use information. |
968 | 37 | bool PredValid = true; |
969 | 59 | for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt59 ; ++I22 ) { |
970 | 26 | if (!I->modifiesRegister(PredR, nullptr)) |
971 | 22 | continue; |
972 | 4 | PredValid = false; |
973 | 4 | break; |
974 | 4 | } |
975 | 37 | |
976 | 69 | for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt69 ; ++I32 ) { |
977 | 32 | MachineInstr *MI = &*I; |
978 | 32 | // If this instruction is predicated on the same register, it could |
979 | 32 | // potentially be ignored. |
980 | 32 | // By default assume that the instruction executes on the same condition |
981 | 32 | // as TfrI (Exec_Then), and also on the opposite one (Exec_Else). |
982 | 32 | unsigned Exec = Exec_Then | Exec_Else; |
983 | 32 | if (PredValid && 32 HII->isPredicated(*MI)21 && MI->readsRegister(PredR)8 ) |
984 | 8 | Exec = (Cond == HII->isPredicatedTrue(*MI)) ? 8 Exec_Then0 : Exec_Else8 ; |
985 | 32 | |
986 | 95 | for (auto &Op : MI->operands()) { |
987 | 95 | if (!Op.isReg()) |
988 | 18 | continue; |
989 | 77 | // We don't want to deal with physical registers. The reason is that |
990 | 77 | // they can be aliased with other physical registers. Aliased virtual |
991 | 77 | // registers must share the same register number, and can only differ |
992 | 77 | // in the subregisters, which we are keeping track of. Physical |
993 | 77 | // registers ters no longer have subregisters---their super- and |
994 | 77 | // subregisters are other physical registers, and we are not checking |
995 | 77 | // that. |
996 | 77 | RegisterRef RR = Op; |
997 | 77 | if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) |
998 | 0 | return false; |
999 | 77 | |
1000 | 77 | ReferenceMap &Map = Op.isDef() ? 77 Defs31 : Uses46 ; |
1001 | 77 | if (Op.isDef() && 77 Op.isUndef()31 ) { |
1002 | 5 | assert(RR.Sub && "Expecting a subregister on <def,read-undef>"); |
1003 | 5 | // If this is a <def,read-undef>, then it invalidates the non-written |
1004 | 5 | // part of the register. For the purpose of checking the validity of |
1005 | 5 | // the move, assume that it modifies the whole register. |
1006 | 5 | RR.Sub = 0; |
1007 | 5 | } |
1008 | 95 | addRefToMap(RR, Map, Exec); |
1009 | 95 | } |
1010 | 32 | } |
1011 | 37 | |
1012 | 37 | // The situation: |
1013 | 37 | // RT = DefI |
1014 | 37 | // ... |
1015 | 37 | // RD = TfrI ..., RT |
1016 | 37 | |
1017 | 37 | // If the register-in-the-middle (RT) is used or redefined between |
1018 | 37 | // DefI and TfrI, we may not be able proceed with this transformation. |
1019 | 37 | // We can ignore a def that will not execute together with TfrI, and a |
1020 | 37 | // use that will. If there is such a use (that does execute together with |
1021 | 37 | // TfrI), we will not be able to move DefI down. If there is a use that |
1022 | 37 | // executed if TfrI's condition is false, then RT must be available |
1023 | 37 | // unconditionally (cannot be predicated). |
1024 | 37 | // Essentially, we need to be able to rename RT to RD in this segment. |
1025 | 37 | if (37 isRefInMap(RT, Defs, Exec_Then) || 37 isRefInMap(RT, Uses, Exec_Else)37 ) |
1026 | 3 | return false; |
1027 | 34 | RegisterRef RD = MD; |
1028 | 34 | // If the predicate register is defined between DefI and TfrI, the only |
1029 | 34 | // potential thing to do would be to move the DefI down to TfrI, and then |
1030 | 34 | // predicate. The reaching def (DefI) must be movable down to the location |
1031 | 34 | // of the TfrI. |
1032 | 34 | // If the target register of the TfrI (RD) is not used or defined between |
1033 | 34 | // DefI and TfrI, consider moving TfrI up to DefI. |
1034 | 34 | bool CanUp = canMoveOver(TfrI, Defs, Uses); |
1035 | 34 | bool CanDown = canMoveOver(*DefI, Defs, Uses); |
1036 | 34 | // The TfrI does not access memory, but DefI could. Check if it's safe |
1037 | 34 | // to move DefI down to TfrI. |
1038 | 34 | if (DefI->mayLoad() || 34 DefI->mayStore()33 ) |
1039 | 1 | if (1 !canMoveMemTo(*DefI, TfrI, true)1 ) |
1040 | 0 | CanDown = false; |
1041 | 34 | |
1042 | 34 | DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no") |
1043 | 34 | << ", can move down: " << (CanDown ? "yes\n" : "no\n")); |
1044 | 34 | MachineBasicBlock::iterator PastDefIt = std::next(DefIt); |
1045 | 34 | if (CanUp) |
1046 | 24 | predicateAt(MD, *DefI, PastDefIt, MP, Cond, UpdRegs); |
1047 | 10 | else if (10 CanDown10 ) |
1048 | 9 | predicateAt(MD, *DefI, TfrIt, MP, Cond, UpdRegs); |
1049 | 10 | else |
1050 | 1 | return false; |
1051 | 33 | |
1052 | 33 | if (33 RT != RD33 ) { |
1053 | 33 | renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt); |
1054 | 33 | UpdRegs.insert(RT.Reg); |
1055 | 33 | } |
1056 | 110 | |
1057 | 110 | removeInstr(TfrI); |
1058 | 110 | removeInstr(*DefI); |
1059 | 110 | return true; |
1060 | 110 | } |
1061 | | |
1062 | | /// Predicate all cases of conditional copies in the specified block. |
1063 | | bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B, |
1064 | 2.00k | std::set<unsigned> &UpdRegs) { |
1065 | 2.00k | bool Changed = false; |
1066 | 2.00k | MachineBasicBlock::iterator I, E, NextI; |
1067 | 15.4k | for (I = B.begin(), E = B.end(); I != E15.4k ; I = NextI13.4k ) { |
1068 | 13.4k | NextI = std::next(I); |
1069 | 13.4k | unsigned Opc = I->getOpcode(); |
1070 | 13.4k | if (Opc == Hexagon::A2_tfrt || 13.4k Opc == Hexagon::A2_tfrf13.3k ) { |
1071 | 110 | bool Done = predicate(*I, (Opc == Hexagon::A2_tfrt), UpdRegs); |
1072 | 110 | if (!Done110 ) { |
1073 | 77 | // If we didn't predicate I, we may need to remove it in case it is |
1074 | 77 | // an "identity" copy, e.g. vreg1 = A2_tfrt vreg2, vreg1. |
1075 | 77 | if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2))77 ) { |
1076 | 31 | for (auto &Op : I->operands()) |
1077 | 93 | if (93 Op.isReg()93 ) |
1078 | 93 | UpdRegs.insert(Op.getReg()); |
1079 | 31 | removeInstr(*I); |
1080 | 31 | } |
1081 | 77 | } |
1082 | 110 | Changed |= Done; |
1083 | 110 | } |
1084 | 13.4k | } |
1085 | 2.00k | return Changed; |
1086 | 2.00k | } |
1087 | | |
1088 | 124 | bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) { |
1089 | 124 | if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) |
1090 | 0 | return false; |
1091 | 124 | const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg); |
1092 | 124 | if (RC == &Hexagon::IntRegsRegClass124 ) { |
1093 | 92 | BW = 32; |
1094 | 92 | return true; |
1095 | 92 | } |
1096 | 32 | if (32 RC == &Hexagon::DoubleRegsRegClass32 ) { |
1097 | 32 | BW = (RR.Sub != 0) ? 3224 : 648 ; |
1098 | 32 | return true; |
1099 | 32 | } |
1100 | 0 | return false; |
1101 | 0 | } |
1102 | | |
1103 | 32 | bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) { |
1104 | 66 | for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E66 ; ++I34 ) { |
1105 | 39 | LiveRange::Segment &LR = *I; |
1106 | 39 | // Range must start at a register... |
1107 | 39 | if (!LR.start.isRegister()) |
1108 | 1 | return false; |
1109 | 38 | // ...and end in a register or in a dead slot. |
1110 | 38 | if (38 !LR.end.isRegister() && 38 !LR.end.isDead()4 ) |
1111 | 4 | return false; |
1112 | 39 | } |
1113 | 27 | return true; |
1114 | 32 | } |
1115 | | |
1116 | 62 | bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) { |
1117 | 62 | if (CoaLimitActive62 ) { |
1118 | 0 | if (CoaCounter >= CoaLimit) |
1119 | 0 | return false; |
1120 | 0 | CoaCounter++; |
1121 | 0 | } |
1122 | 62 | unsigned BW1, BW2; |
1123 | 62 | if (!isIntReg(R1, BW1) || 62 !isIntReg(R2, BW2)62 || BW1 != BW262 ) |
1124 | 0 | return false; |
1125 | 62 | if (62 MRI->isLiveIn(R1.Reg)62 ) |
1126 | 2 | return false; |
1127 | 60 | if (60 MRI->isLiveIn(R2.Reg)60 ) |
1128 | 8 | return false; |
1129 | 52 | |
1130 | 52 | LiveInterval &L1 = LIS->getInterval(R1.Reg); |
1131 | 52 | LiveInterval &L2 = LIS->getInterval(R2.Reg); |
1132 | 52 | if (L2.empty()) |
1133 | 6 | return false; |
1134 | 46 | if (46 L1.hasSubRanges() || 46 L2.hasSubRanges()44 ) |
1135 | 6 | return false; |
1136 | 40 | bool Overlap = L1.overlaps(L2); |
1137 | 40 | |
1138 | 40 | DEBUG(dbgs() << "compatible registers: (" |
1139 | 40 | << (Overlap ? "overlap" : "disjoint") << ")\n " |
1140 | 40 | << PrintReg(R1.Reg, TRI, R1.Sub) << " " << L1 << "\n " |
1141 | 40 | << PrintReg(R2.Reg, TRI, R2.Sub) << " " << L2 << "\n"); |
1142 | 40 | if (R1.Sub || 40 R2.Sub40 ) |
1143 | 0 | return false; |
1144 | 40 | if (40 Overlap40 ) |
1145 | 12 | return false; |
1146 | 28 | |
1147 | 28 | // Coalescing could have a negative impact on scheduling, so try to limit |
1148 | 28 | // to some reasonable extent. Only consider coalescing segments, when one |
1149 | 28 | // of them does not cross basic block boundaries. |
1150 | 28 | if (28 !isIntraBlocks(L1) && 28 !isIntraBlocks(L2)4 ) |
1151 | 1 | return false; |
1152 | 27 | |
1153 | 27 | MRI->replaceRegWith(R2.Reg, R1.Reg); |
1154 | 27 | |
1155 | 27 | // Move all live segments from L2 to L1. |
1156 | 27 | using ValueInfoMap = DenseMap<VNInfo *, VNInfo *>; |
1157 | 27 | ValueInfoMap VM; |
1158 | 109 | for (LiveInterval::iterator I = L2.begin(), E = L2.end(); I != E109 ; ++I82 ) { |
1159 | 82 | VNInfo *NewVN, *OldVN = I->valno; |
1160 | 82 | ValueInfoMap::iterator F = VM.find(OldVN); |
1161 | 82 | if (F == VM.end()82 ) { |
1162 | 81 | NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator()); |
1163 | 81 | VM.insert(std::make_pair(OldVN, NewVN)); |
1164 | 82 | } else { |
1165 | 1 | NewVN = F->second; |
1166 | 1 | } |
1167 | 82 | L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN)); |
1168 | 82 | } |
1169 | 109 | while (L2.begin() != L2.end()) |
1170 | 82 | L2.removeSegment(*L2.begin()); |
1171 | 27 | LIS->removeInterval(R2.Reg); |
1172 | 27 | |
1173 | 27 | updateKillFlags(R1.Reg); |
1174 | 27 | DEBUG(dbgs() << "coalesced: " << L1 << "\n"); |
1175 | 62 | L1.verify(); |
1176 | 62 | |
1177 | 62 | return true; |
1178 | 62 | } |
1179 | | |
1180 | | /// Attempt to coalesce one of the source registers to a MUX instruction with |
1181 | | /// the destination register. This could lead to having only one predicated |
1182 | | /// instruction in the end instead of two. |
1183 | | bool HexagonExpandCondsets::coalesceSegments( |
1184 | | const SmallVectorImpl<MachineInstr*> &Condsets, |
1185 | 860 | std::set<unsigned> &UpdRegs) { |
1186 | 860 | SmallVector<MachineInstr*,16> TwoRegs; |
1187 | 107 | for (MachineInstr *MI : Condsets) { |
1188 | 107 | MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3); |
1189 | 107 | if (!S1.isReg() && 107 !S2.isReg()43 ) |
1190 | 39 | continue; |
1191 | 68 | TwoRegs.push_back(MI); |
1192 | 68 | } |
1193 | 860 | |
1194 | 860 | bool Changed = false; |
1195 | 68 | for (MachineInstr *CI : TwoRegs) { |
1196 | 68 | RegisterRef RD = CI->getOperand(0); |
1197 | 68 | RegisterRef RP = CI->getOperand(1); |
1198 | 68 | MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3); |
1199 | 68 | bool Done = false; |
1200 | 68 | // Consider this case: |
1201 | 68 | // vreg1 = instr1 ... |
1202 | 68 | // vreg2 = instr2 ... |
1203 | 68 | // vreg0 = C2_mux ..., vreg1, vreg2 |
1204 | 68 | // If vreg0 was coalesced with vreg1, we could end up with the following |
1205 | 68 | // code: |
1206 | 68 | // vreg0 = instr1 ... |
1207 | 68 | // vreg2 = instr2 ... |
1208 | 68 | // vreg0 = A2_tfrf ..., vreg2 |
1209 | 68 | // which will later become: |
1210 | 68 | // vreg0 = instr1 ... |
1211 | 68 | // vreg0 = instr2_cNotPt ... |
1212 | 68 | // i.e. there will be an unconditional definition (instr1) of vreg0 |
1213 | 68 | // followed by a conditional one. The output dependency was there before |
1214 | 68 | // and it unavoidable, but if instr1 is predicable, we will no longer be |
1215 | 68 | // able to predicate it here. |
1216 | 68 | // To avoid this scenario, don't coalesce the destination register with |
1217 | 68 | // a source register that is defined by a predicable instruction. |
1218 | 68 | if (S1.isReg()68 ) { |
1219 | 64 | RegisterRef RS = S1; |
1220 | 64 | MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true); |
1221 | 64 | if (!RDef || 64 !HII->isPredicable(*RDef)50 ) { |
1222 | 41 | Done = coalesceRegisters(RD, RegisterRef(S1)); |
1223 | 41 | if (Done41 ) { |
1224 | 20 | UpdRegs.insert(RD.Reg); |
1225 | 20 | UpdRegs.insert(S1.getReg()); |
1226 | 20 | } |
1227 | 41 | } |
1228 | 64 | } |
1229 | 68 | if (!Done && 68 S2.isReg()48 ) { |
1230 | 40 | RegisterRef RS = S2; |
1231 | 40 | MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false); |
1232 | 40 | if (!RDef || 40 !HII->isPredicable(*RDef)31 ) { |
1233 | 21 | Done = coalesceRegisters(RD, RegisterRef(S2)); |
1234 | 21 | if (Done21 ) { |
1235 | 7 | UpdRegs.insert(RD.Reg); |
1236 | 7 | UpdRegs.insert(S2.getReg()); |
1237 | 7 | } |
1238 | 21 | } |
1239 | 40 | } |
1240 | 68 | Changed |= Done; |
1241 | 68 | } |
1242 | 860 | return Changed; |
1243 | 860 | } |
1244 | | |
1245 | 860 | bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { |
1246 | 860 | if (skipFunction(*MF.getFunction())) |
1247 | 0 | return false; |
1248 | 860 | |
1249 | 860 | HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo()); |
1250 | 860 | TRI = MF.getSubtarget().getRegisterInfo(); |
1251 | 860 | MDT = &getAnalysis<MachineDominatorTree>(); |
1252 | 860 | LIS = &getAnalysis<LiveIntervals>(); |
1253 | 860 | MRI = &MF.getRegInfo(); |
1254 | 860 | |
1255 | 860 | DEBUG(LIS->print(dbgs() << "Before expand-condsets\n", |
1256 | 860 | MF.getFunction()->getParent())); |
1257 | 860 | |
1258 | 860 | bool Changed = false; |
1259 | 860 | std::set<unsigned> CoalUpd, PredUpd; |
1260 | 860 | |
1261 | 860 | SmallVector<MachineInstr*,16> Condsets; |
1262 | 860 | for (auto &B : MF) |
1263 | 2.00k | for (auto &I : B) |
1264 | 13.3k | if (13.3k isCondset(I)13.3k ) |
1265 | 107 | Condsets.push_back(&I); |
1266 | 860 | |
1267 | 860 | // Try to coalesce the target of a mux with one of its sources. |
1268 | 860 | // This could eliminate a register copy in some circumstances. |
1269 | 860 | Changed |= coalesceSegments(Condsets, CoalUpd); |
1270 | 860 | |
1271 | 860 | // Update kill flags on all source operands. This is done here because |
1272 | 860 | // at this moment (when expand-condsets runs), there are no kill flags |
1273 | 860 | // in the IR (they have been removed by live range analysis). |
1274 | 860 | // Updating them right before we split is the easiest, because splitting |
1275 | 860 | // adds definitions which would interfere with updating kills afterwards. |
1276 | 860 | std::set<unsigned> KillUpd; |
1277 | 860 | for (MachineInstr *MI : Condsets) |
1278 | 107 | for (MachineOperand &Op : MI->operands()) |
1279 | 428 | if (428 Op.isReg() && 428 Op.isUse()330 ) |
1280 | 223 | if (223 !CoalUpd.count(Op.getReg())223 ) |
1281 | 184 | KillUpd.insert(Op.getReg()); |
1282 | 860 | updateLiveness(KillUpd, false, true, false); |
1283 | 860 | DEBUG(LIS->print(dbgs() << "After coalescing\n", |
1284 | 860 | MF.getFunction()->getParent())); |
1285 | 860 | |
1286 | 860 | // First, simply split all muxes into a pair of conditional transfers |
1287 | 860 | // and update the live intervals to reflect the new arrangement. The |
1288 | 860 | // goal is to update the kill flags, since predication will rely on |
1289 | 860 | // them. |
1290 | 860 | for (MachineInstr *MI : Condsets) |
1291 | 107 | Changed |= split(*MI, PredUpd); |
1292 | 860 | Condsets.clear(); // The contents of Condsets are invalid here anyway. |
1293 | 860 | |
1294 | 860 | // Do not update live ranges after splitting. Recalculation of live |
1295 | 860 | // intervals removes kill flags, which were preserved by splitting on |
1296 | 860 | // the source operands of condsets. These kill flags are needed by |
1297 | 860 | // predication, and after splitting they are difficult to recalculate |
1298 | 860 | // (because of predicated defs), so make sure they are left untouched. |
1299 | 860 | // Predication does not use live intervals. |
1300 | 860 | DEBUG(LIS->print(dbgs() << "After splitting\n", |
1301 | 860 | MF.getFunction()->getParent())); |
1302 | 860 | |
1303 | 860 | // Traverse all blocks and collapse predicable instructions feeding |
1304 | 860 | // conditional transfers into predicated instructions. |
1305 | 860 | // Walk over all the instructions again, so we may catch pre-existing |
1306 | 860 | // cases that were not created in the previous step. |
1307 | 860 | for (auto &B : MF) |
1308 | 2.00k | Changed |= predicateInBlock(B, PredUpd); |
1309 | 860 | DEBUG(LIS->print(dbgs() << "After predicating\n", |
1310 | 860 | MF.getFunction()->getParent())); |
1311 | 860 | |
1312 | 860 | PredUpd.insert(CoalUpd.begin(), CoalUpd.end()); |
1313 | 860 | updateLiveness(PredUpd, true, true, true); |
1314 | 860 | |
1315 | 860 | DEBUG({ |
1316 | 860 | if (Changed) |
1317 | 860 | LIS->print(dbgs() << "After expand-condsets\n", |
1318 | 860 | MF.getFunction()->getParent()); |
1319 | 860 | }); |
1320 | 860 | |
1321 | 860 | return Changed; |
1322 | 860 | } |
1323 | | |
1324 | | //===----------------------------------------------------------------------===// |
1325 | | // Public Constructor Functions |
1326 | | //===----------------------------------------------------------------------===// |
1327 | | |
1328 | 0 | FunctionPass *llvm::createHexagonExpandCondsets() { |
1329 | 0 | return new HexagonExpandCondsets(); |
1330 | 0 | } |