/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/ARM/MLxExpansionPass.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of |
11 | | // multiple and add / sub instructions) when special VMLx hazards are detected. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "ARM.h" |
16 | | #include "ARMBaseInstrInfo.h" |
17 | | #include "ARMSubtarget.h" |
18 | | #include "llvm/ADT/SmallPtrSet.h" |
19 | | #include "llvm/ADT/Statistic.h" |
20 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
21 | | #include "llvm/CodeGen/MachineInstr.h" |
22 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
23 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
24 | | #include "llvm/Support/CommandLine.h" |
25 | | #include "llvm/Support/Debug.h" |
26 | | #include "llvm/Support/raw_ostream.h" |
27 | | #include "llvm/Target/TargetRegisterInfo.h" |
28 | | using namespace llvm; |
29 | | |
30 | | #define DEBUG_TYPE "mlx-expansion" |
31 | | |
32 | | static cl::opt<bool> |
33 | | ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden); |
34 | | static cl::opt<unsigned> |
35 | | ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden); |
36 | | |
37 | | STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded"); |
38 | | |
39 | | namespace { |
40 | | struct MLxExpansion : public MachineFunctionPass { |
41 | | static char ID; |
42 | 4.12k | MLxExpansion() : MachineFunctionPass(ID) {} |
43 | | |
44 | | bool runOnMachineFunction(MachineFunction &Fn) override; |
45 | | |
46 | 4.12k | StringRef getPassName() const override { |
47 | 4.12k | return "ARM MLA / MLS expansion pass"; |
48 | 4.12k | } |
49 | | |
50 | | private: |
51 | | const ARMBaseInstrInfo *TII; |
52 | | const TargetRegisterInfo *TRI; |
53 | | MachineRegisterInfo *MRI; |
54 | | |
55 | | bool isLikeA9; |
56 | | bool isSwift; |
57 | | unsigned MIIdx; |
58 | | MachineInstr* LastMIs[4]; |
59 | | SmallPtrSet<MachineInstr*, 4> IgnoreStall; |
60 | | |
61 | | void clearStack(); |
62 | | void pushStack(MachineInstr *MI); |
63 | | MachineInstr *getAccDefMI(MachineInstr *MI) const; |
64 | | unsigned getDefReg(MachineInstr *MI) const; |
65 | | bool hasLoopHazard(MachineInstr *MI) const; |
66 | | bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const; |
67 | | bool FindMLxHazard(MachineInstr *MI); |
68 | | void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, |
69 | | unsigned MulOpc, unsigned AddSubOpc, |
70 | | bool NegAcc, bool HasLane); |
71 | | bool ExpandFPMLxInstructions(MachineBasicBlock &MBB); |
72 | | }; |
73 | | char MLxExpansion::ID = 0; |
74 | | } |
75 | | |
76 | 434 | void MLxExpansion::clearStack() { |
77 | 434 | std::fill(LastMIs, LastMIs + 4, nullptr); |
78 | 434 | MIIdx = 0; |
79 | 434 | } |
80 | | |
81 | 782 | void MLxExpansion::pushStack(MachineInstr *MI) { |
82 | 782 | LastMIs[MIIdx] = MI; |
83 | 782 | if (++MIIdx == 4) |
84 | 141 | MIIdx = 0; |
85 | 782 | } |
86 | | |
87 | 16 | MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const { |
88 | 16 | // Look past COPY and INSERT_SUBREG instructions to find the |
89 | 16 | // real definition MI. This is important for _sfp instructions. |
90 | 16 | unsigned Reg = MI->getOperand(1).getReg(); |
91 | 16 | if (TargetRegisterInfo::isPhysicalRegister(Reg)) |
92 | 0 | return nullptr; |
93 | 16 | |
94 | 16 | MachineBasicBlock *MBB = MI->getParent(); |
95 | 16 | MachineInstr *DefMI = MRI->getVRegDef(Reg); |
96 | 16 | while (true16 ) { |
97 | 16 | if (DefMI->getParent() != MBB) |
98 | 0 | break; |
99 | 16 | if (16 DefMI->isCopyLike()16 ) { |
100 | 6 | Reg = DefMI->getOperand(1).getReg(); |
101 | 6 | if (TargetRegisterInfo::isVirtualRegister(Reg)6 ) { |
102 | 0 | DefMI = MRI->getVRegDef(Reg); |
103 | 0 | continue; |
104 | 0 | } |
105 | 10 | } else if (10 DefMI->isInsertSubreg()10 ) { |
106 | 0 | Reg = DefMI->getOperand(2).getReg(); |
107 | 0 | if (TargetRegisterInfo::isVirtualRegister(Reg)0 ) { |
108 | 0 | DefMI = MRI->getVRegDef(Reg); |
109 | 0 | continue; |
110 | 0 | } |
111 | 16 | } |
112 | 16 | break; |
113 | 16 | } |
114 | 16 | return DefMI; |
115 | 16 | } |
116 | | |
117 | 8 | unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { |
118 | 8 | unsigned Reg = MI->getOperand(0).getReg(); |
119 | 8 | if (TargetRegisterInfo::isPhysicalRegister(Reg) || |
120 | 8 | !MRI->hasOneNonDBGUse(Reg)) |
121 | 0 | return Reg; |
122 | 8 | |
123 | 8 | MachineBasicBlock *MBB = MI->getParent(); |
124 | 8 | MachineInstr *UseMI = &*MRI->use_instr_nodbg_begin(Reg); |
125 | 8 | if (UseMI->getParent() != MBB) |
126 | 0 | return Reg; |
127 | 8 | |
128 | 8 | while (8 UseMI->isCopy() || 8 UseMI->isInsertSubreg()8 ) { |
129 | 0 | Reg = UseMI->getOperand(0).getReg(); |
130 | 0 | if (TargetRegisterInfo::isPhysicalRegister(Reg) || |
131 | 0 | !MRI->hasOneNonDBGUse(Reg)) |
132 | 0 | return Reg; |
133 | 0 | UseMI = &*MRI->use_instr_nodbg_begin(Reg); |
134 | 0 | if (UseMI->getParent() != MBB) |
135 | 0 | return Reg; |
136 | 0 | } |
137 | 8 | |
138 | 8 | return Reg; |
139 | 8 | } |
140 | | |
141 | | /// hasLoopHazard - Check whether an MLx instruction is chained to itself across |
142 | | /// a single-MBB loop. |
143 | 0 | bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const { |
144 | 0 | unsigned Reg = MI->getOperand(1).getReg(); |
145 | 0 | if (TargetRegisterInfo::isPhysicalRegister(Reg)) |
146 | 0 | return false; |
147 | 0 |
|
148 | 0 | MachineBasicBlock *MBB = MI->getParent(); |
149 | 0 | MachineInstr *DefMI = MRI->getVRegDef(Reg); |
150 | 0 | while (true0 ) { |
151 | 0 | outer_continue: |
152 | 0 | if (DefMI->getParent() != MBB) |
153 | 0 | break; |
154 | 0 |
|
155 | 0 | if (0 DefMI->isPHI()0 ) { |
156 | 0 | for (unsigned i = 1, e = DefMI->getNumOperands(); i < e0 ; i += 20 ) { |
157 | 0 | if (DefMI->getOperand(i + 1).getMBB() == MBB0 ) { |
158 | 0 | unsigned SrcReg = DefMI->getOperand(i).getReg(); |
159 | 0 | if (TargetRegisterInfo::isVirtualRegister(SrcReg)0 ) { |
160 | 0 | DefMI = MRI->getVRegDef(SrcReg); |
161 | 0 | goto outer_continue; |
162 | 0 | } |
163 | 0 | } |
164 | 0 | } |
165 | 0 | } else if (0 DefMI->isCopyLike()0 ) { |
166 | 0 | Reg = DefMI->getOperand(1).getReg(); |
167 | 0 | if (TargetRegisterInfo::isVirtualRegister(Reg)0 ) { |
168 | 0 | DefMI = MRI->getVRegDef(Reg); |
169 | 0 | continue; |
170 | 0 | } |
171 | 0 | } else if (0 DefMI->isInsertSubreg()0 ) { |
172 | 0 | Reg = DefMI->getOperand(2).getReg(); |
173 | 0 | if (TargetRegisterInfo::isVirtualRegister(Reg)0 ) { |
174 | 0 | DefMI = MRI->getVRegDef(Reg); |
175 | 0 | continue; |
176 | 0 | } |
177 | 0 | } |
178 | 0 |
|
179 | 0 | break; |
180 | 0 | } |
181 | 0 |
|
182 | 0 | return DefMI == MI; |
183 | 0 | } |
184 | | |
185 | 8 | bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { |
186 | 8 | // FIXME: Detect integer instructions properly. |
187 | 8 | const MCInstrDesc &MCID = MI->getDesc(); |
188 | 8 | unsigned Domain = MCID.TSFlags & ARMII::DomainMask; |
189 | 8 | if (MI->mayStore()) |
190 | 2 | return false; |
191 | 6 | unsigned Opcode = MCID.getOpcode(); |
192 | 6 | if (Opcode == ARM::VMOVRS || 6 Opcode == ARM::VMOVRRD4 ) |
193 | 4 | return false; |
194 | 2 | if (2 (Domain & ARMII::DomainVFP) || 2 (Domain & ARMII::DomainNEON)0 ) |
195 | 2 | return MI->readsRegister(Reg, TRI); |
196 | 0 | return false; |
197 | 0 | } |
198 | | |
199 | 0 | static bool isFpMulInstruction(unsigned Opcode) { |
200 | 0 | switch (Opcode) { |
201 | 0 | case ARM::VMULS: |
202 | 0 | case ARM::VMULfd: |
203 | 0 | case ARM::VMULfq: |
204 | 0 | case ARM::VMULD: |
205 | 0 | case ARM::VMULslfd: |
206 | 0 | case ARM::VMULslfq: |
207 | 0 | return true; |
208 | 0 | default: |
209 | 0 | return false; |
210 | 0 | } |
211 | 0 | } |
212 | | |
213 | 16 | bool MLxExpansion::FindMLxHazard(MachineInstr *MI) { |
214 | 16 | if (NumExpand >= ExpandLimit) |
215 | 0 | return false; |
216 | 16 | |
217 | 16 | if (16 ForceExapnd16 ) |
218 | 0 | return true; |
219 | 16 | |
220 | 16 | MachineInstr *DefMI = getAccDefMI(MI); |
221 | 16 | if (TII->isFpMLxInstruction(DefMI->getOpcode())16 ) { |
222 | 2 | // r0 = vmla |
223 | 2 | // r3 = vmla r0, r1, r2 |
224 | 2 | // takes 16 - 17 cycles |
225 | 2 | // |
226 | 2 | // r0 = vmla |
227 | 2 | // r4 = vmul r1, r2 |
228 | 2 | // r3 = vadd r0, r4 |
229 | 2 | // takes about 14 - 15 cycles even with vmul stalling for 4 cycles. |
230 | 2 | IgnoreStall.insert(DefMI); |
231 | 2 | return true; |
232 | 2 | } |
233 | 14 | |
234 | 14 | // On Swift, we mostly care about hazards from multiplication instructions |
235 | 14 | // writing the accumulator and the pipelining of loop iterations by out-of- |
236 | 14 | // order execution. |
237 | 14 | if (14 isSwift14 ) |
238 | 0 | return isFpMulInstruction(DefMI->getOpcode()) || 0 hasLoopHazard(MI)0 ; |
239 | 14 | |
240 | 14 | if (14 IgnoreStall.count(MI)14 ) |
241 | 2 | return false; |
242 | 12 | |
243 | 12 | // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the |
244 | 12 | // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall |
245 | 12 | // preserves the in-order retirement of the instructions. |
246 | 12 | // Look at the next few instructions, if *most* of them can cause hazards, |
247 | 12 | // then the scheduler can't *fix* this, we'd better break up the VMLA. |
248 | 12 | unsigned Limit1 = isLikeA9 ? 12 112 : 40 ; |
249 | 12 | unsigned Limit2 = isLikeA9 ? 112 : 40 ; |
250 | 60 | for (unsigned i = 1; i <= 460 ; ++i48 ) { |
251 | 48 | int Idx = ((int)MIIdx - i + 4) % 4; |
252 | 48 | MachineInstr *NextMI = LastMIs[Idx]; |
253 | 48 | if (!NextMI) |
254 | 30 | continue; |
255 | 18 | |
256 | 18 | if (18 TII->canCauseFpMLxStall(NextMI->getOpcode())18 ) { |
257 | 0 | if (i <= Limit1) |
258 | 0 | return true; |
259 | 18 | } |
260 | 18 | |
261 | 18 | // Look for VMLx RAW hazard. |
262 | 18 | if (18 i <= Limit2 && 18 hasRAWHazard(getDefReg(MI), NextMI)8 ) |
263 | 0 | return true; |
264 | 48 | } |
265 | 12 | |
266 | 12 | return false; |
267 | 16 | } |
268 | | |
269 | | /// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair |
270 | | /// of MUL + ADD / SUB instructions. |
271 | | void |
272 | | MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, |
273 | | unsigned MulOpc, unsigned AddSubOpc, |
274 | 2 | bool NegAcc, bool HasLane) { |
275 | 2 | unsigned DstReg = MI->getOperand(0).getReg(); |
276 | 2 | bool DstDead = MI->getOperand(0).isDead(); |
277 | 2 | unsigned AccReg = MI->getOperand(1).getReg(); |
278 | 2 | unsigned Src1Reg = MI->getOperand(2).getReg(); |
279 | 2 | unsigned Src2Reg = MI->getOperand(3).getReg(); |
280 | 2 | bool Src1Kill = MI->getOperand(2).isKill(); |
281 | 2 | bool Src2Kill = MI->getOperand(3).isKill(); |
282 | 2 | unsigned LaneImm = HasLane ? MI->getOperand(4).getImm()0 : 02 ; |
283 | 2 | unsigned NextOp = HasLane ? 50 : 42 ; |
284 | 2 | ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm(); |
285 | 2 | unsigned PredReg = MI->getOperand(++NextOp).getReg(); |
286 | 2 | |
287 | 2 | const MCInstrDesc &MCID1 = TII->get(MulOpc); |
288 | 2 | const MCInstrDesc &MCID2 = TII->get(AddSubOpc); |
289 | 2 | const MachineFunction &MF = *MI->getParent()->getParent(); |
290 | 2 | unsigned TmpReg = MRI->createVirtualRegister( |
291 | 2 | TII->getRegClass(MCID1, 0, TRI, MF)); |
292 | 2 | |
293 | 2 | MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg) |
294 | 2 | .addReg(Src1Reg, getKillRegState(Src1Kill)) |
295 | 2 | .addReg(Src2Reg, getKillRegState(Src2Kill)); |
296 | 2 | if (HasLane) |
297 | 0 | MIB.addImm(LaneImm); |
298 | 2 | MIB.addImm(Pred).addReg(PredReg); |
299 | 2 | |
300 | 2 | MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2) |
301 | 2 | .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead)); |
302 | 2 | |
303 | 2 | if (NegAcc2 ) { |
304 | 0 | bool AccKill = MRI->hasOneNonDBGUse(AccReg); |
305 | 0 | MIB.addReg(TmpReg, getKillRegState(true)) |
306 | 0 | .addReg(AccReg, getKillRegState(AccKill)); |
307 | 2 | } else { |
308 | 2 | MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true)); |
309 | 2 | } |
310 | 2 | MIB.addImm(Pred).addReg(PredReg); |
311 | 2 | |
312 | 2 | DEBUG({ |
313 | 2 | dbgs() << "Expanding: " << *MI; |
314 | 2 | dbgs() << " to:\n"; |
315 | 2 | MachineBasicBlock::iterator MII = MI; |
316 | 2 | MII = std::prev(MII); |
317 | 2 | MachineInstr &MI2 = *MII; |
318 | 2 | MII = std::prev(MII); |
319 | 2 | MachineInstr &MI1 = *MII; |
320 | 2 | dbgs() << " " << MI1; |
321 | 2 | dbgs() << " " << MI2; |
322 | 2 | }); |
323 | 2 | |
324 | 2 | MI->eraseFromParent(); |
325 | 2 | ++NumExpand; |
326 | 2 | } |
327 | | |
328 | 239 | bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { |
329 | 239 | bool Changed = false; |
330 | 239 | |
331 | 239 | clearStack(); |
332 | 239 | IgnoreStall.clear(); |
333 | 239 | |
334 | 239 | unsigned Skip = 0; |
335 | 239 | MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend(); |
336 | 2.69k | while (MII != E2.69k ) { |
337 | 2.45k | MachineInstr *MI = &*MII++; |
338 | 2.45k | |
339 | 2.45k | if (MI->isPosition() || 2.45k MI->isImplicitDef()2.45k || MI->isCopy()2.35k ) |
340 | 799 | continue; |
341 | 1.65k | |
342 | 1.65k | const MCInstrDesc &MCID = MI->getDesc(); |
343 | 1.65k | if (MI->isBarrier()1.65k ) { |
344 | 195 | clearStack(); |
345 | 195 | Skip = 0; |
346 | 195 | continue; |
347 | 195 | } |
348 | 1.46k | |
349 | 1.46k | unsigned Domain = MCID.TSFlags & ARMII::DomainMask; |
350 | 1.46k | if (Domain == ARMII::DomainGeneral1.46k ) { |
351 | 858 | if (++Skip == 2) |
352 | 858 | // Assume dual issues of non-VFP / NEON instructions. |
353 | 180 | pushStack(nullptr); |
354 | 1.46k | } else { |
355 | 604 | Skip = 0; |
356 | 604 | |
357 | 604 | unsigned MulOpc, AddSubOpc; |
358 | 604 | bool NegAcc, HasLane; |
359 | 604 | if (!TII->isFpMLxInstruction(MCID.getOpcode(), |
360 | 604 | MulOpc, AddSubOpc, NegAcc, HasLane) || |
361 | 16 | !FindMLxHazard(MI)) |
362 | 602 | pushStack(MI); |
363 | 2 | else { |
364 | 2 | ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane); |
365 | 2 | Changed = true; |
366 | 2 | } |
367 | 604 | } |
368 | 2.45k | } |
369 | 239 | |
370 | 239 | return Changed; |
371 | 239 | } |
372 | | |
373 | 15.8k | bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { |
374 | 15.8k | if (skipFunction(*Fn.getFunction())) |
375 | 8 | return false; |
376 | 15.8k | |
377 | 15.8k | TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo()); |
378 | 15.8k | TRI = Fn.getSubtarget().getRegisterInfo(); |
379 | 15.8k | MRI = &Fn.getRegInfo(); |
380 | 15.8k | const ARMSubtarget *STI = &Fn.getSubtarget<ARMSubtarget>(); |
381 | 15.8k | if (!STI->expandMLx()) |
382 | 15.7k | return false; |
383 | 152 | isLikeA9 = STI->isLikeA9() || 152 STI->isSwift()0 ; |
384 | 152 | isSwift = STI->isSwift(); |
385 | 152 | |
386 | 152 | bool Modified = false; |
387 | 152 | for (MachineBasicBlock &MBB : Fn) |
388 | 239 | Modified |= ExpandFPMLxInstructions(MBB); |
389 | 15.8k | |
390 | 15.8k | return Modified; |
391 | 15.8k | } |
392 | | |
393 | 4.12k | FunctionPass *llvm::createMLxExpansionPass() { |
394 | 4.12k | return new MLxExpansion(); |
395 | 4.12k | } |