/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file contains the PowerPC implementation of the TargetInstrInfo class. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "PPCInstrInfo.h" |
15 | | #include "MCTargetDesc/PPCPredicates.h" |
16 | | #include "PPC.h" |
17 | | #include "PPCHazardRecognizers.h" |
18 | | #include "PPCInstrBuilder.h" |
19 | | #include "PPCMachineFunctionInfo.h" |
20 | | #include "PPCTargetMachine.h" |
21 | | #include "llvm/ADT/STLExtras.h" |
22 | | #include "llvm/ADT/Statistic.h" |
23 | | #include "llvm/CodeGen/LiveIntervalAnalysis.h" |
24 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
25 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
26 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
27 | | #include "llvm/CodeGen/MachineMemOperand.h" |
28 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
29 | | #include "llvm/CodeGen/PseudoSourceValue.h" |
30 | | #include "llvm/CodeGen/ScheduleDAG.h" |
31 | | #include "llvm/CodeGen/SlotIndexes.h" |
32 | | #include "llvm/CodeGen/StackMaps.h" |
33 | | #include "llvm/MC/MCAsmInfo.h" |
34 | | #include "llvm/MC/MCInst.h" |
35 | | #include "llvm/Support/CommandLine.h" |
36 | | #include "llvm/Support/Debug.h" |
37 | | #include "llvm/Support/ErrorHandling.h" |
38 | | #include "llvm/Support/TargetRegistry.h" |
39 | | #include "llvm/Support/raw_ostream.h" |
40 | | |
41 | | using namespace llvm; |
42 | | |
43 | | #define DEBUG_TYPE "ppc-instr-info" |
44 | | |
45 | | #define GET_INSTRMAP_INFO |
46 | | #define GET_INSTRINFO_CTOR_DTOR |
47 | | #include "PPCGenInstrInfo.inc" |
48 | | |
49 | | STATISTIC(NumStoreSPILLVSRRCAsVec, |
50 | | "Number of spillvsrrc spilled to stack as vec"); |
51 | | STATISTIC(NumStoreSPILLVSRRCAsGpr, |
52 | | "Number of spillvsrrc spilled to stack as gpr"); |
53 | | STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc"); |
54 | | |
55 | | static cl:: |
56 | | opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, |
57 | | cl::desc("Disable analysis for CTR loops")); |
58 | | |
59 | | static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt", |
60 | | cl::desc("Disable compare instruction optimization"), cl::Hidden); |
61 | | |
62 | | static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", |
63 | | cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), |
64 | | cl::Hidden); |
65 | | |
66 | | static cl::opt<bool> |
67 | | UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, |
68 | | cl::desc("Use the old (incorrect) instruction latency calculation")); |
69 | | |
70 | | // Pin the vtable to this file. |
71 | 0 | void PPCInstrInfo::anchor() {} |
72 | | |
73 | | PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI) |
74 | | : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP, |
75 | | /* CatchRetOpcode */ -1, |
76 | | STI.isPPC64() ? PPC::BLR8 : PPC::BLR), |
77 | 1.40k | Subtarget(STI), RI(STI.getTargetMachine()) {} |
78 | | |
79 | | /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for |
80 | | /// this target when scheduling the DAG. |
81 | | ScheduleHazardRecognizer * |
82 | | PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, |
83 | 2.34k | const ScheduleDAG *DAG) const { |
84 | 2.34k | unsigned Directive = |
85 | 2.34k | static_cast<const PPCSubtarget *>(STI)->getDarwinDirective(); |
86 | 2.34k | if (Directive == PPC::DIR_440 || 2.34k Directive == PPC::DIR_A22.34k || |
87 | 2.34k | Directive == PPC::DIR_E500mc2.34k || Directive == PPC::DIR_E55002.34k ) { |
88 | 0 | const InstrItineraryData *II = |
89 | 0 | static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData(); |
90 | 0 | return new ScoreboardHazardRecognizer(II, DAG); |
91 | 0 | } |
92 | 2.34k | |
93 | 2.34k | return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); |
94 | 2.34k | } |
95 | | |
96 | | /// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer |
97 | | /// to use for this target when scheduling the DAG. |
98 | | ScheduleHazardRecognizer * |
99 | | PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, |
100 | 6.66k | const ScheduleDAG *DAG) const { |
101 | 6.66k | unsigned Directive = |
102 | 6.66k | DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective(); |
103 | 6.66k | |
104 | 6.66k | // FIXME: Leaving this as-is until we have POWER9 scheduling info |
105 | 6.66k | if (Directive == PPC::DIR_PWR7 || 6.66k Directive == PPC::DIR_PWR85.71k ) |
106 | 3.93k | return new PPCDispatchGroupSBHazardRecognizer(II, DAG); |
107 | 2.73k | |
108 | 2.73k | // Most subtargets use a PPC970 recognizer. |
109 | 2.73k | if (2.73k Directive != PPC::DIR_440 && 2.73k Directive != PPC::DIR_A22.72k && |
110 | 2.73k | Directive != PPC::DIR_E500mc2.45k && Directive != PPC::DIR_E55002.44k ) { |
111 | 2.44k | assert(DAG->TII && "No InstrInfo?"); |
112 | 2.44k | |
113 | 2.44k | return new PPCHazardRecognizer970(*DAG); |
114 | 2.44k | } |
115 | 288 | |
116 | 288 | return new ScoreboardHazardRecognizer(II, DAG); |
117 | 288 | } |
118 | | |
119 | | unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, |
120 | | const MachineInstr &MI, |
121 | 151k | unsigned *PredCost) const { |
122 | 151k | if (!ItinData || 151k UseOldLatencyCalc151k ) |
123 | 0 | return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost); |
124 | 151k | |
125 | 151k | // The default implementation of getInstrLatency calls getStageLatency, but |
126 | 151k | // getStageLatency does not do the right thing for us. While we have |
127 | 151k | // itinerary, most cores are fully pipelined, and so the itineraries only |
128 | 151k | // express the first part of the pipeline, not every stage. Instead, we need |
129 | 151k | // to use the listed output operand cycle number (using operand 0 here, which |
130 | 151k | // is an output). |
131 | 151k | |
132 | 151k | unsigned Latency = 1; |
133 | 151k | unsigned DefClass = MI.getDesc().getSchedClass(); |
134 | 624k | for (unsigned i = 0, e = MI.getNumOperands(); i != e624k ; ++i472k ) { |
135 | 472k | const MachineOperand &MO = MI.getOperand(i); |
136 | 472k | if (!MO.isReg() || 472k !MO.isDef()386k || MO.isImplicit()151k ) |
137 | 340k | continue; |
138 | 131k | |
139 | 131k | int Cycle = ItinData->getOperandCycle(DefClass, i); |
140 | 131k | if (Cycle < 0) |
141 | 75.7k | continue; |
142 | 56.0k | |
143 | 56.0k | Latency = std::max(Latency, (unsigned) Cycle); |
144 | 56.0k | } |
145 | 151k | |
146 | 151k | return Latency; |
147 | 151k | } |
148 | | |
149 | | int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, |
150 | | const MachineInstr &DefMI, unsigned DefIdx, |
151 | | const MachineInstr &UseMI, |
152 | 72.7k | unsigned UseIdx) const { |
153 | 72.7k | int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx, |
154 | 72.7k | UseMI, UseIdx); |
155 | 72.7k | |
156 | 72.7k | if (!DefMI.getParent()) |
157 | 88 | return Latency; |
158 | 72.7k | |
159 | 72.7k | const MachineOperand &DefMO = DefMI.getOperand(DefIdx); |
160 | 72.7k | unsigned Reg = DefMO.getReg(); |
161 | 72.7k | |
162 | 72.7k | bool IsRegCR; |
163 | 72.7k | if (TargetRegisterInfo::isVirtualRegister(Reg)72.7k ) { |
164 | 27.4k | const MachineRegisterInfo *MRI = |
165 | 27.4k | &DefMI.getParent()->getParent()->getRegInfo(); |
166 | 27.4k | IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) || |
167 | 26.1k | MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass); |
168 | 72.7k | } else { |
169 | 45.2k | IsRegCR = PPC::CRRCRegClass.contains(Reg) || |
170 | 37.2k | PPC::CRBITRCRegClass.contains(Reg); |
171 | 45.2k | } |
172 | 72.7k | |
173 | 72.7k | if (UseMI.isBranch() && 72.7k IsRegCR618 ) { |
174 | 618 | if (Latency < 0) |
175 | 155 | Latency = getInstrLatency(ItinData, DefMI); |
176 | 618 | |
177 | 618 | // On some cores, there is an additional delay between writing to a condition |
178 | 618 | // register, and using it from a branch. |
179 | 618 | unsigned Directive = Subtarget.getDarwinDirective(); |
180 | 618 | switch (Directive) { |
181 | 107 | default: break; |
182 | 511 | case PPC::DIR_7400: |
183 | 511 | case PPC::DIR_750: |
184 | 511 | case PPC::DIR_970: |
185 | 511 | case PPC::DIR_E5500: |
186 | 511 | case PPC::DIR_PWR4: |
187 | 511 | case PPC::DIR_PWR5: |
188 | 511 | case PPC::DIR_PWR5X: |
189 | 511 | case PPC::DIR_PWR6: |
190 | 511 | case PPC::DIR_PWR6X: |
191 | 511 | case PPC::DIR_PWR7: |
192 | 511 | case PPC::DIR_PWR8: |
193 | 511 | // FIXME: Is this needed for POWER9? |
194 | 511 | Latency += 2; |
195 | 511 | break; |
196 | 72.7k | } |
197 | 72.7k | } |
198 | 72.7k | |
199 | 72.7k | return Latency; |
200 | 72.7k | } |
201 | | |
202 | | // This function does not list all associative and commutative operations, but |
203 | | // only those worth feeding through the machine combiner in an attempt to |
204 | | // reduce the critical path. Mostly, this means floating-point operations, |
205 | | // because they have high latencies (compared to other operations, such and |
206 | | // and/or, which are also associative and commutative, but have low latencies). |
207 | 311 | bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { |
208 | 311 | switch (Inst.getOpcode()) { |
209 | 311 | // FP Add: |
210 | 80 | case PPC::FADD: |
211 | 80 | case PPC::FADDS: |
212 | 80 | // FP Multiply: |
213 | 80 | case PPC::FMUL: |
214 | 80 | case PPC::FMULS: |
215 | 80 | // Altivec Add: |
216 | 80 | case PPC::VADDFP: |
217 | 80 | // VSX Add: |
218 | 80 | case PPC::XSADDDP: |
219 | 80 | case PPC::XVADDDP: |
220 | 80 | case PPC::XVADDSP: |
221 | 80 | case PPC::XSADDSP: |
222 | 80 | // VSX Multiply: |
223 | 80 | case PPC::XSMULDP: |
224 | 80 | case PPC::XVMULDP: |
225 | 80 | case PPC::XVMULSP: |
226 | 80 | case PPC::XSMULSP: |
227 | 80 | // QPX Add: |
228 | 80 | case PPC::QVFADD: |
229 | 80 | case PPC::QVFADDS: |
230 | 80 | case PPC::QVFADDSs: |
231 | 80 | // QPX Multiply: |
232 | 80 | case PPC::QVFMUL: |
233 | 80 | case PPC::QVFMULS: |
234 | 80 | case PPC::QVFMULSs: |
235 | 80 | return true; |
236 | 231 | default: |
237 | 231 | return false; |
238 | 0 | } |
239 | 0 | } |
240 | | |
241 | | bool PPCInstrInfo::getMachineCombinerPatterns( |
242 | | MachineInstr &Root, |
243 | 81.7k | SmallVectorImpl<MachineCombinerPattern> &Patterns) const { |
244 | 81.7k | // Using the machine combiner in this way is potentially expensive, so |
245 | 81.7k | // restrict to when aggressive optimizations are desired. |
246 | 81.7k | if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive) |
247 | 80.2k | return false; |
248 | 1.47k | |
249 | 1.47k | // FP reassociation is only legal when we don't need strict IEEE semantics. |
250 | 1.47k | if (1.47k !Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath1.47k ) |
251 | 1.16k | return false; |
252 | 311 | |
253 | 311 | return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); |
254 | 311 | } |
255 | | |
256 | | // Detect 32 -> 64-bit extensions where we may reuse the low sub-register. |
257 | | bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, |
258 | | unsigned &SrcReg, unsigned &DstReg, |
259 | 77.0k | unsigned &SubIdx) const { |
260 | 77.0k | switch (MI.getOpcode()) { |
261 | 76.8k | default: return false; |
262 | 185 | case PPC::EXTSW: |
263 | 185 | case PPC::EXTSW_32_64: |
264 | 185 | SrcReg = MI.getOperand(1).getReg(); |
265 | 185 | DstReg = MI.getOperand(0).getReg(); |
266 | 185 | SubIdx = PPC::sub_32; |
267 | 185 | return true; |
268 | 0 | } |
269 | 0 | } |
270 | | |
271 | | unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, |
272 | 9.69k | int &FrameIndex) const { |
273 | 9.69k | // Note: This list must be kept consistent with LoadRegFromStackSlot. |
274 | 9.69k | switch (MI.getOpcode()) { |
275 | 8.97k | default: break; |
276 | 725 | case PPC::LD: |
277 | 725 | case PPC::LWZ: |
278 | 725 | case PPC::LFS: |
279 | 725 | case PPC::LFD: |
280 | 725 | case PPC::RESTORE_CR: |
281 | 725 | case PPC::RESTORE_CRBIT: |
282 | 725 | case PPC::LVX: |
283 | 725 | case PPC::LXVD2X: |
284 | 725 | case PPC::LXVX: |
285 | 725 | case PPC::QVLFDX: |
286 | 725 | case PPC::QVLFSXs: |
287 | 725 | case PPC::QVLFDXb: |
288 | 725 | case PPC::RESTORE_VRSAVE: |
289 | 725 | case PPC::SPILLTOVSR_LD: |
290 | 725 | // Check for the operands added by addFrameReference (the immediate is the |
291 | 725 | // offset which defaults to 0). |
292 | 725 | if (MI.getOperand(1).isImm() && 725 !MI.getOperand(1).getImm()456 && |
293 | 725 | MI.getOperand(2).isFI()264 ) { |
294 | 202 | FrameIndex = MI.getOperand(2).getIndex(); |
295 | 202 | return MI.getOperand(0).getReg(); |
296 | 202 | } |
297 | 523 | break; |
298 | 9.49k | } |
299 | 9.49k | return 0; |
300 | 9.49k | } |
301 | | |
302 | | // For opcodes with the ReMaterializable flag set, this function is called to |
303 | | // verify the instruction is really rematable. |
304 | | bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, |
305 | 3.93k | AliasAnalysis *AA) const { |
306 | 3.93k | switch (MI.getOpcode()) { |
307 | 0 | default: |
308 | 0 | // This function should only be called for opcodes with the ReMaterializable |
309 | 0 | // flag set. |
310 | 0 | llvm_unreachable("Unknown rematerializable operation!"); |
311 | 0 | break; |
312 | 3.93k | case PPC::LI: |
313 | 3.93k | case PPC::LI8: |
314 | 3.93k | case PPC::LIS: |
315 | 3.93k | case PPC::LIS8: |
316 | 3.93k | case PPC::QVGPCI: |
317 | 3.93k | case PPC::ADDIStocHA: |
318 | 3.93k | case PPC::ADDItocL: |
319 | 3.93k | case PPC::LOAD_STACK_GUARD: |
320 | 3.93k | return true; |
321 | 0 | } |
322 | 0 | return false; |
323 | 0 | } |
324 | | |
325 | | unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI, |
326 | 7.85k | int &FrameIndex) const { |
327 | 7.85k | // Note: This list must be kept consistent with StoreRegToStackSlot. |
328 | 7.85k | switch (MI.getOpcode()) { |
329 | 7.56k | default: break; |
330 | 289 | case PPC::STD: |
331 | 289 | case PPC::STW: |
332 | 289 | case PPC::STFS: |
333 | 289 | case PPC::STFD: |
334 | 289 | case PPC::SPILL_CR: |
335 | 289 | case PPC::SPILL_CRBIT: |
336 | 289 | case PPC::STVX: |
337 | 289 | case PPC::STXVD2X: |
338 | 289 | case PPC::STXVX: |
339 | 289 | case PPC::QVSTFDX: |
340 | 289 | case PPC::QVSTFSXs: |
341 | 289 | case PPC::QVSTFDXb: |
342 | 289 | case PPC::SPILL_VRSAVE: |
343 | 289 | case PPC::SPILLTOVSR_ST: |
344 | 289 | // Check for the operands added by addFrameReference (the immediate is the |
345 | 289 | // offset which defaults to 0). |
346 | 289 | if (MI.getOperand(1).isImm() && 289 !MI.getOperand(1).getImm()273 && |
347 | 289 | MI.getOperand(2).isFI()129 ) { |
348 | 59 | FrameIndex = MI.getOperand(2).getIndex(); |
349 | 59 | return MI.getOperand(0).getReg(); |
350 | 59 | } |
351 | 230 | break; |
352 | 7.79k | } |
353 | 7.79k | return 0; |
354 | 7.79k | } |
355 | | |
356 | | MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, |
357 | | unsigned OpIdx1, |
358 | 5.15k | unsigned OpIdx2) const { |
359 | 5.15k | MachineFunction &MF = *MI.getParent()->getParent(); |
360 | 5.15k | |
361 | 5.15k | // Normal instructions can be commuted the obvious way. |
362 | 5.15k | if (MI.getOpcode() != PPC::RLWIMI && 5.15k MI.getOpcode() != PPC::RLWIMIo5.04k ) |
363 | 5.04k | return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); |
364 | 106 | // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a |
365 | 106 | // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because |
366 | 106 | // changing the relative order of the mask operands might change what happens |
367 | 106 | // to the high-bits of the mask (and, thus, the result). |
368 | 106 | |
369 | 106 | // Cannot commute if it has a non-zero rotate count. |
370 | 106 | if (106 MI.getOperand(3).getImm() != 0106 ) |
371 | 59 | return nullptr; |
372 | 47 | |
373 | 47 | // If we have a zero rotate count, we have: |
374 | 47 | // M = mask(MB,ME) |
375 | 47 | // Op0 = (Op1 & ~M) | (Op2 & M) |
376 | 47 | // Change this to: |
377 | 47 | // M = mask((ME+1)&31, (MB-1)&31) |
378 | 47 | // Op0 = (Op2 & ~M) | (Op1 & M) |
379 | 47 | |
380 | 47 | // Swap op1/op2 |
381 | 106 | assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) && |
382 | 47 | "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMIo."); |
383 | 47 | unsigned Reg0 = MI.getOperand(0).getReg(); |
384 | 47 | unsigned Reg1 = MI.getOperand(1).getReg(); |
385 | 47 | unsigned Reg2 = MI.getOperand(2).getReg(); |
386 | 47 | unsigned SubReg1 = MI.getOperand(1).getSubReg(); |
387 | 47 | unsigned SubReg2 = MI.getOperand(2).getSubReg(); |
388 | 47 | bool Reg1IsKill = MI.getOperand(1).isKill(); |
389 | 47 | bool Reg2IsKill = MI.getOperand(2).isKill(); |
390 | 47 | bool ChangeReg0 = false; |
391 | 47 | // If machine instrs are no longer in two-address forms, update |
392 | 47 | // destination register as well. |
393 | 47 | if (Reg0 == Reg147 ) { |
394 | 1 | // Must be two address instruction! |
395 | 1 | assert(MI.getDesc().getOperandConstraint(0, MCOI::TIED_TO) && |
396 | 1 | "Expecting a two-address instruction!"); |
397 | 1 | assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch"); |
398 | 1 | Reg2IsKill = false; |
399 | 1 | ChangeReg0 = true; |
400 | 1 | } |
401 | 47 | |
402 | 47 | // Masks. |
403 | 47 | unsigned MB = MI.getOperand(4).getImm(); |
404 | 47 | unsigned ME = MI.getOperand(5).getImm(); |
405 | 47 | |
406 | 47 | // We can't commute a trivial mask (there is no way to represent an all-zero |
407 | 47 | // mask). |
408 | 47 | if (MB == 0 && 47 ME == 3119 ) |
409 | 3 | return nullptr; |
410 | 44 | |
411 | 44 | if (44 NewMI44 ) { |
412 | 0 | // Create a new instruction. |
413 | 0 | unsigned Reg0 = ChangeReg0 ? Reg20 : MI.getOperand(0).getReg()0 ; |
414 | 0 | bool Reg0IsDead = MI.getOperand(0).isDead(); |
415 | 0 | return BuildMI(MF, MI.getDebugLoc(), MI.getDesc()) |
416 | 0 | .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead)) |
417 | 0 | .addReg(Reg2, getKillRegState(Reg2IsKill)) |
418 | 0 | .addReg(Reg1, getKillRegState(Reg1IsKill)) |
419 | 0 | .addImm((ME + 1) & 31) |
420 | 0 | .addImm((MB - 1) & 31); |
421 | 0 | } |
422 | 44 | |
423 | 44 | if (44 ChangeReg044 ) { |
424 | 1 | MI.getOperand(0).setReg(Reg2); |
425 | 1 | MI.getOperand(0).setSubReg(SubReg2); |
426 | 1 | } |
427 | 5.15k | MI.getOperand(2).setReg(Reg1); |
428 | 5.15k | MI.getOperand(1).setReg(Reg2); |
429 | 5.15k | MI.getOperand(2).setSubReg(SubReg1); |
430 | 5.15k | MI.getOperand(1).setSubReg(SubReg2); |
431 | 5.15k | MI.getOperand(2).setIsKill(Reg1IsKill); |
432 | 5.15k | MI.getOperand(1).setIsKill(Reg2IsKill); |
433 | 5.15k | |
434 | 5.15k | // Swap the mask around. |
435 | 5.15k | MI.getOperand(4).setImm((ME + 1) & 31); |
436 | 5.15k | MI.getOperand(5).setImm((MB - 1) & 31); |
437 | 5.15k | return &MI; |
438 | 5.15k | } |
439 | | |
440 | | bool PPCInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, |
441 | 5.60k | unsigned &SrcOpIdx2) const { |
442 | 5.60k | // For VSX A-Type FMA instructions, it is the first two operands that can be |
443 | 5.60k | // commuted, however, because the non-encoded tied input operand is listed |
444 | 5.60k | // first, the operands to swap are actually the second and third. |
445 | 5.60k | |
446 | 5.60k | int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode()); |
447 | 5.60k | if (AltOpc == -1) |
448 | 4.83k | return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); |
449 | 774 | |
450 | 774 | // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1 |
451 | 774 | // and SrcOpIdx2. |
452 | 774 | return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3); |
453 | 774 | } |
454 | | |
455 | | void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, |
456 | 431 | MachineBasicBlock::iterator MI) const { |
457 | 431 | // This function is used for scheduling, and the nop wanted here is the type |
458 | 431 | // that terminates dispatch groups on the POWER cores. |
459 | 431 | unsigned Directive = Subtarget.getDarwinDirective(); |
460 | 431 | unsigned Opcode; |
461 | 431 | switch (Directive) { |
462 | 116 | default: Opcode = PPC::NOP; break; |
463 | 8 | case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break; |
464 | 184 | case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break; |
465 | 104 | case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */ |
466 | 431 | // FIXME: Update when POWER9 scheduling model is ready. |
467 | 19 | case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break; |
468 | 431 | } |
469 | 431 | |
470 | 431 | DebugLoc DL; |
471 | 431 | BuildMI(MBB, MI, DL, get(Opcode)); |
472 | 431 | } |
473 | | |
474 | | /// Return the noop instruction to use for a noop. |
475 | 5 | void PPCInstrInfo::getNoop(MCInst &NopInst) const { |
476 | 5 | NopInst.setOpcode(PPC::NOP); |
477 | 5 | } |
478 | | |
479 | | // Branch analysis. |
480 | | // Note: If the condition register is set to CTR or CTR8 then this is a |
481 | | // BDNZ (imm == 1) or BDZ (imm == 0) branch. |
482 | | bool PPCInstrInfo::analyzeBranch(MachineBasicBlock &MBB, |
483 | | MachineBasicBlock *&TBB, |
484 | | MachineBasicBlock *&FBB, |
485 | | SmallVectorImpl<MachineOperand> &Cond, |
486 | 366k | bool AllowModify) const { |
487 | 366k | bool isPPC64 = Subtarget.isPPC64(); |
488 | 366k | |
489 | 366k | // If the block has no terminators, it just falls into the block after it. |
490 | 366k | MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); |
491 | 366k | if (I == MBB.end()) |
492 | 11.6k | return false; |
493 | 354k | |
494 | 354k | if (354k !isUnpredicatedTerminator(*I)354k ) |
495 | 50.2k | return false; |
496 | 304k | |
497 | 304k | if (304k AllowModify304k ) { |
498 | 58.8k | // If the BB ends with an unconditional branch to the fallthrough BB, |
499 | 58.8k | // we eliminate the branch instruction. |
500 | 58.8k | if (I->getOpcode() == PPC::B && |
501 | 58.8k | MBB.isLayoutSuccessor(I->getOperand(0).getMBB())6.98k ) { |
502 | 1.08k | I->eraseFromParent(); |
503 | 1.08k | |
504 | 1.08k | // We update iterator after deleting the last branch. |
505 | 1.08k | I = MBB.getLastNonDebugInstr(); |
506 | 1.08k | if (I == MBB.end() || 1.08k !isUnpredicatedTerminator(*I)1.08k ) |
507 | 118 | return false; |
508 | 304k | } |
509 | 58.8k | } |
510 | 304k | |
511 | 304k | // Get the last instruction in the block. |
512 | 304k | MachineInstr &LastInst = *I; |
513 | 304k | |
514 | 304k | // If there is only one terminator instruction, process it. |
515 | 304k | if (I == MBB.begin() || 304k !isUnpredicatedTerminator(*--I)275k ) { |
516 | 281k | if (LastInst.getOpcode() == PPC::B281k ) { |
517 | 13.5k | if (!LastInst.getOperand(0).isMBB()) |
518 | 0 | return true; |
519 | 13.5k | TBB = LastInst.getOperand(0).getMBB(); |
520 | 13.5k | return false; |
521 | 267k | } else if (267k LastInst.getOpcode() == PPC::BCC267k ) { |
522 | 39.6k | if (!LastInst.getOperand(2).isMBB()) |
523 | 0 | return true; |
524 | 39.6k | // Block ends with fall-through condbranch. |
525 | 39.6k | TBB = LastInst.getOperand(2).getMBB(); |
526 | 39.6k | Cond.push_back(LastInst.getOperand(0)); |
527 | 39.6k | Cond.push_back(LastInst.getOperand(1)); |
528 | 39.6k | return false; |
529 | 228k | } else if (228k LastInst.getOpcode() == PPC::BC228k ) { |
530 | 12.0k | if (!LastInst.getOperand(1).isMBB()) |
531 | 0 | return true; |
532 | 12.0k | // Block ends with fall-through condbranch. |
533 | 12.0k | TBB = LastInst.getOperand(1).getMBB(); |
534 | 12.0k | Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); |
535 | 12.0k | Cond.push_back(LastInst.getOperand(0)); |
536 | 12.0k | return false; |
537 | 216k | } else if (216k LastInst.getOpcode() == PPC::BCn216k ) { |
538 | 3.33k | if (!LastInst.getOperand(1).isMBB()) |
539 | 0 | return true; |
540 | 3.33k | // Block ends with fall-through condbranch. |
541 | 3.33k | TBB = LastInst.getOperand(1).getMBB(); |
542 | 3.33k | Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET)); |
543 | 3.33k | Cond.push_back(LastInst.getOperand(0)); |
544 | 3.33k | return false; |
545 | 212k | } else if (212k LastInst.getOpcode() == PPC::BDNZ8 || |
546 | 212k | LastInst.getOpcode() == PPC::BDNZ209k ) { |
547 | 3.11k | if (!LastInst.getOperand(0).isMBB()) |
548 | 0 | return true; |
549 | 3.11k | if (3.11k DisableCTRLoopAnal3.11k ) |
550 | 0 | return true; |
551 | 3.11k | TBB = LastInst.getOperand(0).getMBB(); |
552 | 3.11k | Cond.push_back(MachineOperand::CreateImm(1)); |
553 | 3.11k | Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR82.97k : PPC::CTR144 , |
554 | 3.11k | true)); |
555 | 3.11k | return false; |
556 | 209k | } else if (209k LastInst.getOpcode() == PPC::BDZ8 || |
557 | 209k | LastInst.getOpcode() == PPC::BDZ209k ) { |
558 | 207 | if (!LastInst.getOperand(0).isMBB()) |
559 | 0 | return true; |
560 | 207 | if (207 DisableCTRLoopAnal207 ) |
561 | 0 | return true; |
562 | 207 | TBB = LastInst.getOperand(0).getMBB(); |
563 | 207 | Cond.push_back(MachineOperand::CreateImm(0)); |
564 | 207 | Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8173 : PPC::CTR34 , |
565 | 267k | true)); |
566 | 267k | return false; |
567 | 267k | } |
568 | 209k | |
569 | 209k | // Otherwise, don't know what this is. |
570 | 209k | return true; |
571 | 209k | } |
572 | 23.0k | |
573 | 23.0k | // Get the instruction before it if it's a terminator. |
574 | 23.0k | MachineInstr &SecondLastInst = *I; |
575 | 23.0k | |
576 | 23.0k | // If there are three terminators, we don't know what sort of block this is. |
577 | 23.0k | if (I != MBB.begin() && 23.0k isUnpredicatedTerminator(*--I)19.4k ) |
578 | 0 | return true; |
579 | 23.0k | |
580 | 23.0k | // If the block ends with PPC::B and PPC:BCC, handle it. |
581 | 23.0k | if (23.0k SecondLastInst.getOpcode() == PPC::BCC && |
582 | 23.0k | LastInst.getOpcode() == PPC::B14.7k ) { |
583 | 14.7k | if (!SecondLastInst.getOperand(2).isMBB() || |
584 | 14.7k | !LastInst.getOperand(0).isMBB()) |
585 | 0 | return true; |
586 | 14.7k | TBB = SecondLastInst.getOperand(2).getMBB(); |
587 | 14.7k | Cond.push_back(SecondLastInst.getOperand(0)); |
588 | 14.7k | Cond.push_back(SecondLastInst.getOperand(1)); |
589 | 14.7k | FBB = LastInst.getOperand(0).getMBB(); |
590 | 14.7k | return false; |
591 | 8.30k | } else if (8.30k SecondLastInst.getOpcode() == PPC::BC && |
592 | 8.30k | LastInst.getOpcode() == PPC::B3.76k ) { |
593 | 3.74k | if (!SecondLastInst.getOperand(1).isMBB() || |
594 | 3.74k | !LastInst.getOperand(0).isMBB()) |
595 | 0 | return true; |
596 | 3.74k | TBB = SecondLastInst.getOperand(1).getMBB(); |
597 | 3.74k | Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); |
598 | 3.74k | Cond.push_back(SecondLastInst.getOperand(0)); |
599 | 3.74k | FBB = LastInst.getOperand(0).getMBB(); |
600 | 3.74k | return false; |
601 | 4.55k | } else if (4.55k SecondLastInst.getOpcode() == PPC::BCn && |
602 | 4.55k | LastInst.getOpcode() == PPC::B827 ) { |
603 | 827 | if (!SecondLastInst.getOperand(1).isMBB() || |
604 | 827 | !LastInst.getOperand(0).isMBB()) |
605 | 0 | return true; |
606 | 827 | TBB = SecondLastInst.getOperand(1).getMBB(); |
607 | 827 | Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET)); |
608 | 827 | Cond.push_back(SecondLastInst.getOperand(0)); |
609 | 827 | FBB = LastInst.getOperand(0).getMBB(); |
610 | 827 | return false; |
611 | 3.73k | } else if (3.73k (SecondLastInst.getOpcode() == PPC::BDNZ8 || |
612 | 1.41k | SecondLastInst.getOpcode() == PPC::BDNZ) && |
613 | 3.73k | LastInst.getOpcode() == PPC::B2.42k ) { |
614 | 2.42k | if (!SecondLastInst.getOperand(0).isMBB() || |
615 | 2.42k | !LastInst.getOperand(0).isMBB()) |
616 | 0 | return true; |
617 | 2.42k | if (2.42k DisableCTRLoopAnal2.42k ) |
618 | 0 | return true; |
619 | 2.42k | TBB = SecondLastInst.getOperand(0).getMBB(); |
620 | 2.42k | Cond.push_back(MachineOperand::CreateImm(1)); |
621 | 2.42k | Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR82.31k : PPC::CTR115 , |
622 | 2.42k | true)); |
623 | 2.42k | FBB = LastInst.getOperand(0).getMBB(); |
624 | 2.42k | return false; |
625 | 1.30k | } else if (1.30k (SecondLastInst.getOpcode() == PPC::BDZ8 || |
626 | 1.18k | SecondLastInst.getOpcode() == PPC::BDZ) && |
627 | 1.30k | LastInst.getOpcode() == PPC::B139 ) { |
628 | 139 | if (!SecondLastInst.getOperand(0).isMBB() || |
629 | 139 | !LastInst.getOperand(0).isMBB()) |
630 | 0 | return true; |
631 | 139 | if (139 DisableCTRLoopAnal139 ) |
632 | 0 | return true; |
633 | 139 | TBB = SecondLastInst.getOperand(0).getMBB(); |
634 | 139 | Cond.push_back(MachineOperand::CreateImm(0)); |
635 | 139 | Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8116 : PPC::CTR23 , |
636 | 8.30k | true)); |
637 | 8.30k | FBB = LastInst.getOperand(0).getMBB(); |
638 | 8.30k | return false; |
639 | 8.30k | } |
640 | 1.16k | |
641 | 1.16k | // If the block ends with two PPC:Bs, handle it. The second one is not |
642 | 1.16k | // executed, so remove it. |
643 | 1.16k | if (1.16k SecondLastInst.getOpcode() == PPC::B && 1.16k LastInst.getOpcode() == PPC::B0 ) { |
644 | 0 | if (!SecondLastInst.getOperand(0).isMBB()) |
645 | 0 | return true; |
646 | 0 | TBB = SecondLastInst.getOperand(0).getMBB(); |
647 | 0 | I = LastInst; |
648 | 0 | if (AllowModify) |
649 | 0 | I->eraseFromParent(); |
650 | 0 | return false; |
651 | 0 | } |
652 | 1.16k | |
653 | 1.16k | // Otherwise, can't handle this. |
654 | 1.16k | return true; |
655 | 1.16k | } |
656 | | |
657 | | unsigned PPCInstrInfo::removeBranch(MachineBasicBlock &MBB, |
658 | 8.03k | int *BytesRemoved) const { |
659 | 8.03k | assert(!BytesRemoved && "code size not handled"); |
660 | 8.03k | |
661 | 8.03k | MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); |
662 | 8.03k | if (I == MBB.end()) |
663 | 2 | return 0; |
664 | 8.03k | |
665 | 8.03k | if (8.03k I->getOpcode() != PPC::B && 8.03k I->getOpcode() != PPC::BCC6.14k && |
666 | 8.03k | I->getOpcode() != PPC::BC1.95k && I->getOpcode() != PPC::BCn1.01k && |
667 | 8.03k | I->getOpcode() != PPC::BDNZ8238 && I->getOpcode() != PPC::BDNZ210 && |
668 | 8.03k | I->getOpcode() != PPC::BDZ8204 && I->getOpcode() != PPC::BDZ175 ) |
669 | 169 | return 0; |
670 | 7.86k | |
671 | 7.86k | // Remove the branch. |
672 | 7.86k | I->eraseFromParent(); |
673 | 7.86k | |
674 | 7.86k | I = MBB.end(); |
675 | 7.86k | |
676 | 7.86k | if (I == MBB.begin()7.86k ) return 11.27k ; |
677 | 6.58k | --I; |
678 | 6.58k | if (I->getOpcode() != PPC::BCC && |
679 | 6.58k | I->getOpcode() != PPC::BC5.79k && I->getOpcode() != PPC::BCn5.72k && |
680 | 6.58k | I->getOpcode() != PPC::BDNZ85.67k && I->getOpcode() != PPC::BDNZ5.66k && |
681 | 6.58k | I->getOpcode() != PPC::BDZ85.66k && I->getOpcode() != PPC::BDZ5.66k ) |
682 | 5.66k | return 1; |
683 | 920 | |
684 | 920 | // Remove the branch. |
685 | 920 | I->eraseFromParent(); |
686 | 920 | return 2; |
687 | 920 | } |
688 | | |
689 | | unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB, |
690 | | MachineBasicBlock *TBB, |
691 | | MachineBasicBlock *FBB, |
692 | | ArrayRef<MachineOperand> Cond, |
693 | | const DebugLoc &DL, |
694 | 8.25k | int *BytesAdded) const { |
695 | 8.25k | // Shouldn't be a fall through. |
696 | 8.25k | assert(TBB && "insertBranch must not be told to insert a fallthrough"); |
697 | 8.25k | assert((Cond.size() == 2 || Cond.size() == 0) && |
698 | 8.25k | "PPC branch conditions have two components!"); |
699 | 8.25k | assert(!BytesAdded && "code size not handled"); |
700 | 8.25k | |
701 | 8.25k | bool isPPC64 = Subtarget.isPPC64(); |
702 | 8.25k | |
703 | 8.25k | // One-way branch. |
704 | 8.25k | if (!FBB8.25k ) { |
705 | 8.10k | if (Cond.empty()) // Unconditional branch |
706 | 1.61k | BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB); |
707 | 6.49k | else if (6.49k Cond[1].getReg() == PPC::CTR || 6.49k Cond[1].getReg() == PPC::CTR86.47k ) |
708 | 73 | BuildMI(&MBB, DL, get(Cond[0].getImm() ? |
709 | 39 | (isPPC64 ? 39 PPC::BDNZ834 : PPC::BDNZ5 ) : |
710 | 73 | (isPPC64 ? 34 PPC::BDZ828 : PPC::BDZ6 ))).addMBB(TBB); |
711 | 6.41k | else if (6.41k Cond[0].getImm() == PPC::PRED_BIT_SET6.41k ) |
712 | 792 | BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB); |
713 | 5.62k | else if (5.62k Cond[0].getImm() == PPC::PRED_BIT_UNSET5.62k ) |
714 | 872 | BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB); |
715 | 5.62k | else // Conditional branch |
716 | 4.75k | BuildMI(&MBB, DL, get(PPC::BCC)) |
717 | 4.75k | .addImm(Cond[0].getImm()) |
718 | 4.75k | .add(Cond[1]) |
719 | 4.75k | .addMBB(TBB); |
720 | 8.10k | return 1; |
721 | 8.10k | } |
722 | 149 | |
723 | 149 | // Two-way Conditional Branch. |
724 | 149 | if (149 Cond[1].getReg() == PPC::CTR || 149 Cond[1].getReg() == PPC::CTR8149 ) |
725 | 5 | BuildMI(&MBB, DL, get(Cond[0].getImm() ? |
726 | 5 | (isPPC64 ? 5 PPC::BDNZ85 : PPC::BDNZ0 ) : |
727 | 5 | (isPPC64 ? 0 PPC::BDZ80 : PPC::BDZ0 ))).addMBB(TBB); |
728 | 144 | else if (144 Cond[0].getImm() == PPC::PRED_BIT_SET144 ) |
729 | 21 | BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB); |
730 | 123 | else if (123 Cond[0].getImm() == PPC::PRED_BIT_UNSET123 ) |
731 | 32 | BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB); |
732 | 123 | else |
733 | 91 | BuildMI(&MBB, DL, get(PPC::BCC)) |
734 | 91 | .addImm(Cond[0].getImm()) |
735 | 91 | .add(Cond[1]) |
736 | 91 | .addMBB(TBB); |
737 | 8.25k | BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB); |
738 | 8.25k | return 2; |
739 | 8.25k | } |
740 | | |
741 | | // Select analysis. |
742 | | bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, |
743 | | ArrayRef<MachineOperand> Cond, |
744 | | unsigned TrueReg, unsigned FalseReg, |
745 | 293 | int &CondCycles, int &TrueCycles, int &FalseCycles) const { |
746 | 293 | if (Cond.size() != 2) |
747 | 0 | return false; |
748 | 293 | |
749 | 293 | // If this is really a bdnz-like condition, then it cannot be turned into a |
750 | 293 | // select. |
751 | 293 | if (293 Cond[1].getReg() == PPC::CTR || 293 Cond[1].getReg() == PPC::CTR8293 ) |
752 | 0 | return false; |
753 | 293 | |
754 | 293 | // Check register classes. |
755 | 293 | const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
756 | 293 | const TargetRegisterClass *RC = |
757 | 293 | RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); |
758 | 293 | if (!RC) |
759 | 0 | return false; |
760 | 293 | |
761 | 293 | // isel is for regular integer GPRs only. |
762 | 293 | if (293 !PPC::GPRCRegClass.hasSubClassEq(RC) && |
763 | 266 | !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) && |
764 | 266 | !PPC::G8RCRegClass.hasSubClassEq(RC) && |
765 | 233 | !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) |
766 | 233 | return false; |
767 | 60 | |
768 | 60 | // FIXME: These numbers are for the A2, how well they work for other cores is |
769 | 60 | // an open question. On the A2, the isel instruction has a 2-cycle latency |
770 | 60 | // but single-cycle throughput. These numbers are used in combination with |
771 | 60 | // the MispredictPenalty setting from the active SchedMachineModel. |
772 | 60 | CondCycles = 1; |
773 | 60 | TrueCycles = 1; |
774 | 60 | FalseCycles = 1; |
775 | 60 | |
776 | 60 | return true; |
777 | 60 | } |
778 | | |
779 | | void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, |
780 | | MachineBasicBlock::iterator MI, |
781 | | const DebugLoc &dl, unsigned DestReg, |
782 | | ArrayRef<MachineOperand> Cond, unsigned TrueReg, |
783 | 713 | unsigned FalseReg) const { |
784 | 713 | assert(Cond.size() == 2 && |
785 | 713 | "PPC branch conditions have two components!"); |
786 | 713 | |
787 | 713 | // Get the register classes. |
788 | 713 | MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
789 | 713 | const TargetRegisterClass *RC = |
790 | 713 | RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); |
791 | 713 | assert(RC && "TrueReg and FalseReg must have overlapping register classes"); |
792 | 713 | |
793 | 713 | bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) || |
794 | 281 | PPC::G8RC_NOX0RegClass.hasSubClassEq(RC); |
795 | 713 | assert((Is64Bit || |
796 | 713 | PPC::GPRCRegClass.hasSubClassEq(RC) || |
797 | 713 | PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) && |
798 | 713 | "isel is for regular integer GPRs only"); |
799 | 713 | |
800 | 713 | unsigned OpCode = Is64Bit ? PPC::ISEL8432 : PPC::ISEL281 ; |
801 | 713 | auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm()); |
802 | 713 | |
803 | 713 | unsigned SubIdx = 0; |
804 | 713 | bool SwapOps = false; |
805 | 713 | switch (SelectPred) { |
806 | 16 | case PPC::PRED_EQ: |
807 | 16 | case PPC::PRED_EQ_MINUS: |
808 | 16 | case PPC::PRED_EQ_PLUS: |
809 | 16 | SubIdx = PPC::sub_eq; SwapOps = false; break; |
810 | 11 | case PPC::PRED_NE: |
811 | 11 | case PPC::PRED_NE_MINUS: |
812 | 11 | case PPC::PRED_NE_PLUS: |
813 | 11 | SubIdx = PPC::sub_eq; SwapOps = true; break; |
814 | 55 | case PPC::PRED_LT: |
815 | 55 | case PPC::PRED_LT_MINUS: |
816 | 55 | case PPC::PRED_LT_PLUS: |
817 | 55 | SubIdx = PPC::sub_lt; SwapOps = false; break; |
818 | 6 | case PPC::PRED_GE: |
819 | 6 | case PPC::PRED_GE_MINUS: |
820 | 6 | case PPC::PRED_GE_PLUS: |
821 | 6 | SubIdx = PPC::sub_lt; SwapOps = true; break; |
822 | 81 | case PPC::PRED_GT: |
823 | 81 | case PPC::PRED_GT_MINUS: |
824 | 81 | case PPC::PRED_GT_PLUS: |
825 | 81 | SubIdx = PPC::sub_gt; SwapOps = false; break; |
826 | 3 | case PPC::PRED_LE: |
827 | 3 | case PPC::PRED_LE_MINUS: |
828 | 3 | case PPC::PRED_LE_PLUS: |
829 | 3 | SubIdx = PPC::sub_gt; SwapOps = true; break; |
830 | 0 | case PPC::PRED_UN: |
831 | 0 | case PPC::PRED_UN_MINUS: |
832 | 0 | case PPC::PRED_UN_PLUS: |
833 | 0 | SubIdx = PPC::sub_un; SwapOps = false; break; |
834 | 0 | case PPC::PRED_NU: |
835 | 0 | case PPC::PRED_NU_MINUS: |
836 | 0 | case PPC::PRED_NU_PLUS: |
837 | 0 | SubIdx = PPC::sub_un; SwapOps = true; break; |
838 | 541 | case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break; |
839 | 0 | case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break; |
840 | 713 | } |
841 | 713 | |
842 | 713 | unsigned FirstReg = SwapOps ? 713 FalseReg20 : TrueReg693 , |
843 | 713 | SecondReg = SwapOps ? TrueReg20 : FalseReg693 ; |
844 | 713 | |
845 | 713 | // The first input register of isel cannot be r0. If it is a member |
846 | 713 | // of a register class that can be r0, then copy it first (the |
847 | 713 | // register allocator should eliminate the copy). |
848 | 713 | if (MRI.getRegClass(FirstReg)->contains(PPC::R0) || |
849 | 713 | MRI.getRegClass(FirstReg)->contains(PPC::X0)709 ) { |
850 | 10 | const TargetRegisterClass *FirstRC = |
851 | 10 | MRI.getRegClass(FirstReg)->contains(PPC::X0) ? |
852 | 10 | &PPC::G8RC_NOX0RegClass6 : &PPC::GPRC_NOR0RegClass4 ; |
853 | 10 | unsigned OldFirstReg = FirstReg; |
854 | 10 | FirstReg = MRI.createVirtualRegister(FirstRC); |
855 | 10 | BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg) |
856 | 10 | .addReg(OldFirstReg); |
857 | 10 | } |
858 | 713 | |
859 | 713 | BuildMI(MBB, MI, dl, get(OpCode), DestReg) |
860 | 713 | .addReg(FirstReg).addReg(SecondReg) |
861 | 713 | .addReg(Cond[1].getReg(), 0, SubIdx); |
862 | 713 | } |
863 | | |
864 | 1 | static unsigned getCRBitValue(unsigned CRBit) { |
865 | 1 | unsigned Ret = 4; |
866 | 1 | if (CRBit == PPC::CR0LT || 1 CRBit == PPC::CR1LT1 || |
867 | 1 | CRBit == PPC::CR2LT1 || CRBit == PPC::CR3LT1 || |
868 | 1 | CRBit == PPC::CR4LT1 || CRBit == PPC::CR5LT1 || |
869 | 1 | CRBit == PPC::CR6LT1 || CRBit == PPC::CR7LT1 ) |
870 | 0 | Ret = 3; |
871 | 1 | if (CRBit == PPC::CR0GT || 1 CRBit == PPC::CR1GT1 || |
872 | 1 | CRBit == PPC::CR2GT1 || CRBit == PPC::CR3GT1 || |
873 | 1 | CRBit == PPC::CR4GT1 || CRBit == PPC::CR5GT1 || |
874 | 1 | CRBit == PPC::CR6GT1 || CRBit == PPC::CR7GT1 ) |
875 | 0 | Ret = 2; |
876 | 1 | if (CRBit == PPC::CR0EQ || 1 CRBit == PPC::CR1EQ0 || |
877 | 1 | CRBit == PPC::CR2EQ0 || CRBit == PPC::CR3EQ0 || |
878 | 1 | CRBit == PPC::CR4EQ0 || CRBit == PPC::CR5EQ0 || |
879 | 1 | CRBit == PPC::CR6EQ0 || CRBit == PPC::CR7EQ0 ) |
880 | 1 | Ret = 1; |
881 | 1 | if (CRBit == PPC::CR0UN || 1 CRBit == PPC::CR1UN1 || |
882 | 1 | CRBit == PPC::CR2UN1 || CRBit == PPC::CR3UN1 || |
883 | 1 | CRBit == PPC::CR4UN1 || CRBit == PPC::CR5UN1 || |
884 | 1 | CRBit == PPC::CR6UN1 || CRBit == PPC::CR7UN1 ) |
885 | 0 | Ret = 0; |
886 | 1 | |
887 | 1 | assert(Ret != 4 && "Invalid CR bit register"); |
888 | 1 | return Ret; |
889 | 1 | } |
890 | | |
891 | | void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, |
892 | | MachineBasicBlock::iterator I, |
893 | | const DebugLoc &DL, unsigned DestReg, |
894 | 3.33k | unsigned SrcReg, bool KillSrc) const { |
895 | 3.33k | // We can end up with self copies and similar things as a result of VSX copy |
896 | 3.33k | // legalization. Promote them here. |
897 | 3.33k | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
898 | 3.33k | if (PPC::F8RCRegClass.contains(DestReg) && |
899 | 3.33k | PPC::VSRCRegClass.contains(SrcReg)1.03k ) { |
900 | 0 | unsigned SuperReg = |
901 | 0 | TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass); |
902 | 0 |
|
903 | 0 | if (VSXSelfCopyCrash && 0 SrcReg == SuperReg0 ) |
904 | 0 | llvm_unreachable("nop VSX copy"); |
905 | 0 |
|
906 | 0 | DestReg = SuperReg; |
907 | 3.33k | } else if (3.33k PPC::F8RCRegClass.contains(SrcReg) && |
908 | 3.33k | PPC::VSRCRegClass.contains(DestReg)1.02k ) { |
909 | 0 | unsigned SuperReg = |
910 | 0 | TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass); |
911 | 0 |
|
912 | 0 | if (VSXSelfCopyCrash && 0 DestReg == SuperReg0 ) |
913 | 0 | llvm_unreachable("nop VSX copy"); |
914 | 0 |
|
915 | 0 | SrcReg = SuperReg; |
916 | 0 | } |
917 | 3.33k | |
918 | 3.33k | // Different class register copy |
919 | 3.33k | if (3.33k PPC::CRBITRCRegClass.contains(SrcReg) && |
920 | 3.33k | PPC::GPRCRegClass.contains(DestReg)27 ) { |
921 | 1 | unsigned CRReg = getCRFromCRBit(SrcReg); |
922 | 1 | BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg); |
923 | 1 | getKillRegState(KillSrc); |
924 | 1 | // Rotate the CR bit in the CR fields to be the least significant bit and |
925 | 1 | // then mask with 0x1 (MB = ME = 31). |
926 | 1 | BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg) |
927 | 1 | .addReg(DestReg, RegState::Kill) |
928 | 1 | .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg))) |
929 | 1 | .addImm(31) |
930 | 1 | .addImm(31); |
931 | 1 | return; |
932 | 3.33k | } else if (3.33k PPC::CRRCRegClass.contains(SrcReg) && |
933 | 3.33k | PPC::G8RCRegClass.contains(DestReg)10 ) { |
934 | 0 | BuildMI(MBB, I, DL, get(PPC::MFOCRF8), DestReg).addReg(SrcReg); |
935 | 0 | getKillRegState(KillSrc); |
936 | 0 | return; |
937 | 3.33k | } else if (3.33k PPC::CRRCRegClass.contains(SrcReg) && |
938 | 3.33k | PPC::GPRCRegClass.contains(DestReg)10 ) { |
939 | 1 | BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(SrcReg); |
940 | 1 | getKillRegState(KillSrc); |
941 | 1 | return; |
942 | 3.33k | } else if (3.33k PPC::G8RCRegClass.contains(SrcReg) && |
943 | 3.33k | PPC::VSFRCRegClass.contains(DestReg)1.31k ) { |
944 | 1 | BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg); |
945 | 1 | NumGPRtoVSRSpill++; |
946 | 1 | getKillRegState(KillSrc); |
947 | 1 | return; |
948 | 3.33k | } else if (3.33k PPC::VSFRCRegClass.contains(SrcReg) && |
949 | 3.33k | PPC::G8RCRegClass.contains(DestReg)1.03k ) { |
950 | 2 | BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg); |
951 | 2 | getKillRegState(KillSrc); |
952 | 2 | return; |
953 | 2 | } |
954 | 3.32k | |
955 | 3.32k | unsigned Opc; |
956 | 3.32k | if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) |
957 | 440 | Opc = PPC::OR; |
958 | 2.88k | else if (2.88k PPC::G8RCRegClass.contains(DestReg, SrcReg)2.88k ) |
959 | 1.31k | Opc = PPC::OR8; |
960 | 1.57k | else if (1.57k PPC::F4RCRegClass.contains(DestReg, SrcReg)1.57k ) |
961 | 1.01k | Opc = PPC::FMR; |
962 | 560 | else if (560 PPC::CRRCRegClass.contains(DestReg, SrcReg)560 ) |
963 | 9 | Opc = PPC::MCRF; |
964 | 551 | else if (551 PPC::VRRCRegClass.contains(DestReg, SrcReg)551 ) |
965 | 347 | Opc = PPC::VOR; |
966 | 204 | else if (204 PPC::VSRCRegClass.contains(DestReg, SrcReg)204 ) |
967 | 204 | // There are two different ways this can be done: |
968 | 204 | // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only |
969 | 204 | // issue in VSU pipeline 0. |
970 | 204 | // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but |
971 | 204 | // can go to either pipeline. |
972 | 204 | // We'll always use xxlor here, because in practically all cases where |
973 | 204 | // copies are generated, they are close enough to some use that the |
974 | 204 | // lower-latency form is preferable. |
975 | 38 | Opc = PPC::XXLOR; |
976 | 166 | else if (166 PPC::VSFRCRegClass.contains(DestReg, SrcReg) || |
977 | 146 | PPC::VSSRCRegClass.contains(DestReg, SrcReg)) |
978 | 20 | Opc = PPC::XXLORf; |
979 | 146 | else if (146 PPC::QFRCRegClass.contains(DestReg, SrcReg)146 ) |
980 | 120 | Opc = PPC::QVFMR; |
981 | 26 | else if (26 PPC::QSRCRegClass.contains(DestReg, SrcReg)26 ) |
982 | 0 | Opc = PPC::QVFMRs; |
983 | 26 | else if (26 PPC::QBRCRegClass.contains(DestReg, SrcReg)26 ) |
984 | 0 | Opc = PPC::QVFMRb; |
985 | 26 | else if (26 PPC::CRBITRCRegClass.contains(DestReg, SrcReg)26 ) |
986 | 26 | Opc = PPC::CROR; |
987 | 26 | else |
988 | 0 | llvm_unreachable("Impossible reg-to-reg copy"); |
989 | 3.32k | |
990 | 3.32k | const MCInstrDesc &MCID = get(Opc); |
991 | 3.32k | if (MCID.getNumOperands() == 3) |
992 | 2.18k | BuildMI(MBB, I, DL, MCID, DestReg) |
993 | 2.18k | .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc)); |
994 | 3.32k | else |
995 | 1.14k | BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc)); |
996 | 3.33k | } |
997 | | |
998 | | // This function returns true if a CR spill is necessary and false otherwise. |
999 | | bool |
1000 | | PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, |
1001 | | unsigned SrcReg, bool isKill, |
1002 | | int FrameIdx, |
1003 | | const TargetRegisterClass *RC, |
1004 | | SmallVectorImpl<MachineInstr*> &NewMIs, |
1005 | 2.09k | bool &NonRI, bool &SpillsVRS) const{ |
1006 | 2.09k | // Note: If additional store instructions are added here, |
1007 | 2.09k | // update isStoreToStackSlot. |
1008 | 2.09k | |
1009 | 2.09k | DebugLoc DL; |
1010 | 2.09k | if (PPC::GPRCRegClass.hasSubClassEq(RC) || |
1011 | 2.09k | PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)1.84k ) { |
1012 | 254 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) |
1013 | 254 | .addReg(SrcReg, |
1014 | 254 | getKillRegState(isKill)), |
1015 | 254 | FrameIdx)); |
1016 | 2.09k | } else if (1.84k PPC::G8RCRegClass.hasSubClassEq(RC) || |
1017 | 1.84k | PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)879 ) { |
1018 | 964 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) |
1019 | 964 | .addReg(SrcReg, |
1020 | 964 | getKillRegState(isKill)), |
1021 | 964 | FrameIdx)); |
1022 | 1.84k | } else if (878 PPC::F8RCRegClass.hasSubClassEq(RC)878 ) { |
1023 | 625 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD)) |
1024 | 625 | .addReg(SrcReg, |
1025 | 625 | getKillRegState(isKill)), |
1026 | 625 | FrameIdx)); |
1027 | 878 | } else if (253 PPC::F4RCRegClass.hasSubClassEq(RC)253 ) { |
1028 | 9 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS)) |
1029 | 9 | .addReg(SrcReg, |
1030 | 9 | getKillRegState(isKill)), |
1031 | 9 | FrameIdx)); |
1032 | 253 | } else if (244 PPC::CRRCRegClass.hasSubClassEq(RC)244 ) { |
1033 | 39 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) |
1034 | 39 | .addReg(SrcReg, |
1035 | 39 | getKillRegState(isKill)), |
1036 | 39 | FrameIdx)); |
1037 | 39 | return true; |
1038 | 205 | } else if (205 PPC::CRBITRCRegClass.hasSubClassEq(RC)205 ) { |
1039 | 8 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CRBIT)) |
1040 | 8 | .addReg(SrcReg, |
1041 | 8 | getKillRegState(isKill)), |
1042 | 8 | FrameIdx)); |
1043 | 8 | return true; |
1044 | 197 | } else if (197 PPC::VRRCRegClass.hasSubClassEq(RC)197 ) { |
1045 | 1 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX)) |
1046 | 1 | .addReg(SrcReg, |
1047 | 1 | getKillRegState(isKill)), |
1048 | 1 | FrameIdx)); |
1049 | 1 | NonRI = true; |
1050 | 197 | } else if (196 PPC::VSRCRegClass.hasSubClassEq(RC)196 ) { |
1051 | 157 | unsigned Op = Subtarget.hasP9Vector() ? PPC::STXVX24 : PPC::STXVD2X133 ; |
1052 | 157 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Op)) |
1053 | 157 | .addReg(SrcReg, |
1054 | 157 | getKillRegState(isKill)), |
1055 | 157 | FrameIdx)); |
1056 | 157 | NonRI = true; |
1057 | 196 | } else if (39 PPC::VSFRCRegClass.hasSubClassEq(RC)39 ) { |
1058 | 19 | unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFSTOREf644 : PPC::STXSDX15 ; |
1059 | 19 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc)) |
1060 | 19 | .addReg(SrcReg, |
1061 | 19 | getKillRegState(isKill)), |
1062 | 19 | FrameIdx)); |
1063 | 19 | NonRI = true; |
1064 | 39 | } else if (20 PPC::VSSRCRegClass.hasSubClassEq(RC)20 ) { |
1065 | 0 | unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFSTOREf320 : PPC::STXSSPX0 ; |
1066 | 0 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc)) |
1067 | 0 | .addReg(SrcReg, |
1068 | 0 | getKillRegState(isKill)), |
1069 | 0 | FrameIdx)); |
1070 | 0 | NonRI = true; |
1071 | 20 | } else if (20 PPC::VRSAVERCRegClass.hasSubClassEq(RC)20 ) { |
1072 | 0 | assert(Subtarget.isDarwin() && |
1073 | 0 | "VRSAVE only needs spill/restore on Darwin"); |
1074 | 0 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE)) |
1075 | 0 | .addReg(SrcReg, |
1076 | 0 | getKillRegState(isKill)), |
1077 | 0 | FrameIdx)); |
1078 | 0 | SpillsVRS = true; |
1079 | 20 | } else if (20 PPC::QFRCRegClass.hasSubClassEq(RC)20 ) { |
1080 | 12 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDX)) |
1081 | 12 | .addReg(SrcReg, |
1082 | 12 | getKillRegState(isKill)), |
1083 | 12 | FrameIdx)); |
1084 | 12 | NonRI = true; |
1085 | 20 | } else if (8 PPC::QSRCRegClass.hasSubClassEq(RC)8 ) { |
1086 | 8 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFSXs)) |
1087 | 8 | .addReg(SrcReg, |
1088 | 8 | getKillRegState(isKill)), |
1089 | 8 | FrameIdx)); |
1090 | 8 | NonRI = true; |
1091 | 8 | } else if (0 PPC::QBRCRegClass.hasSubClassEq(RC)0 ) { |
1092 | 0 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDXb)) |
1093 | 0 | .addReg(SrcReg, |
1094 | 0 | getKillRegState(isKill)), |
1095 | 0 | FrameIdx)); |
1096 | 0 | NonRI = true; |
1097 | 0 | } else if (0 PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)0 ) { |
1098 | 0 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILLTOVSR_ST)) |
1099 | 0 | .addReg(SrcReg, |
1100 | 0 | getKillRegState(isKill)), |
1101 | 0 | FrameIdx)); |
1102 | 0 | } else { |
1103 | 0 | llvm_unreachable("Unknown regclass!"); |
1104 | 1.84k | } |
1105 | 2.04k | |
1106 | 2.04k | return false; |
1107 | 2.04k | } |
1108 | | |
1109 | | void |
1110 | | PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, |
1111 | | MachineBasicBlock::iterator MI, |
1112 | | unsigned SrcReg, bool isKill, int FrameIdx, |
1113 | | const TargetRegisterClass *RC, |
1114 | 2.09k | const TargetRegisterInfo *TRI) const { |
1115 | 2.09k | MachineFunction &MF = *MBB.getParent(); |
1116 | 2.09k | SmallVector<MachineInstr*, 4> NewMIs; |
1117 | 2.09k | |
1118 | 2.09k | PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
1119 | 2.09k | FuncInfo->setHasSpills(); |
1120 | 2.09k | |
1121 | 2.09k | // We need to avoid a situation in which the value from a VRRC register is |
1122 | 2.09k | // spilled using an Altivec instruction and reloaded into a VSRC register |
1123 | 2.09k | // using a VSX instruction. The issue with this is that the VSX |
1124 | 2.09k | // load/store instructions swap the doublewords in the vector and the Altivec |
1125 | 2.09k | // ones don't. The register classes on the spill/reload may be different if |
1126 | 2.09k | // the register is defined using an Altivec instruction and is then used by a |
1127 | 2.09k | // VSX instruction. |
1128 | 2.09k | RC = updatedRC(RC); |
1129 | 2.09k | |
1130 | 2.09k | bool NonRI = false, SpillsVRS = false; |
1131 | 2.09k | if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs, |
1132 | 2.09k | NonRI, SpillsVRS)) |
1133 | 47 | FuncInfo->setSpillsCR(); |
1134 | 2.09k | |
1135 | 2.09k | if (SpillsVRS) |
1136 | 0 | FuncInfo->setSpillsVRSAVE(); |
1137 | 2.09k | |
1138 | 2.09k | if (NonRI) |
1139 | 197 | FuncInfo->setHasNonRISpills(); |
1140 | 2.09k | |
1141 | 4.19k | for (unsigned i = 0, e = NewMIs.size(); i != e4.19k ; ++i2.09k ) |
1142 | 2.09k | MBB.insert(MI, NewMIs[i]); |
1143 | 2.09k | |
1144 | 2.09k | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
1145 | 2.09k | MachineMemOperand *MMO = MF.getMachineMemOperand( |
1146 | 2.09k | MachinePointerInfo::getFixedStack(MF, FrameIdx), |
1147 | 2.09k | MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx), |
1148 | 2.09k | MFI.getObjectAlignment(FrameIdx)); |
1149 | 2.09k | NewMIs.back()->addMemOperand(MF, MMO); |
1150 | 2.09k | } |
1151 | | |
1152 | | bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL, |
1153 | | unsigned DestReg, int FrameIdx, |
1154 | | const TargetRegisterClass *RC, |
1155 | | SmallVectorImpl<MachineInstr *> &NewMIs, |
1156 | 2.07k | bool &NonRI, bool &SpillsVRS) const { |
1157 | 2.07k | // Note: If additional load instructions are added here, |
1158 | 2.07k | // update isLoadFromStackSlot. |
1159 | 2.07k | |
1160 | 2.07k | if (PPC::GPRCRegClass.hasSubClassEq(RC) || |
1161 | 2.07k | PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)1.78k ) { |
1162 | 284 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), |
1163 | 284 | DestReg), FrameIdx)); |
1164 | 2.07k | } else if (1.78k PPC::G8RCRegClass.hasSubClassEq(RC) || |
1165 | 1.78k | PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)860 ) { |
1166 | 927 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg), |
1167 | 927 | FrameIdx)); |
1168 | 1.78k | } else if (859 PPC::F8RCRegClass.hasSubClassEq(RC)859 ) { |
1169 | 614 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg), |
1170 | 614 | FrameIdx)); |
1171 | 859 | } else if (245 PPC::F4RCRegClass.hasSubClassEq(RC)245 ) { |
1172 | 3 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), |
1173 | 3 | FrameIdx)); |
1174 | 245 | } else if (242 PPC::CRRCRegClass.hasSubClassEq(RC)242 ) { |
1175 | 55 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, |
1176 | 55 | get(PPC::RESTORE_CR), DestReg), |
1177 | 55 | FrameIdx)); |
1178 | 55 | return true; |
1179 | 187 | } else if (187 PPC::CRBITRCRegClass.hasSubClassEq(RC)187 ) { |
1180 | 8 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, |
1181 | 8 | get(PPC::RESTORE_CRBIT), DestReg), |
1182 | 8 | FrameIdx)); |
1183 | 8 | return true; |
1184 | 179 | } else if (179 PPC::VRRCRegClass.hasSubClassEq(RC)179 ) { |
1185 | 0 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg), |
1186 | 0 | FrameIdx)); |
1187 | 0 | NonRI = true; |
1188 | 179 | } else if (179 PPC::VSRCRegClass.hasSubClassEq(RC)179 ) { |
1189 | 134 | unsigned Op = Subtarget.hasP9Vector() ? PPC::LXVX24 : PPC::LXVD2X110 ; |
1190 | 134 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Op), DestReg), |
1191 | 134 | FrameIdx)); |
1192 | 134 | NonRI = true; |
1193 | 179 | } else if (45 PPC::VSFRCRegClass.hasSubClassEq(RC)45 ) { |
1194 | 18 | unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFLOADf644 : PPC::LXSDX14 ; |
1195 | 18 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc), |
1196 | 18 | DestReg), FrameIdx)); |
1197 | 18 | NonRI = true; |
1198 | 45 | } else if (27 PPC::VSSRCRegClass.hasSubClassEq(RC)27 ) { |
1199 | 0 | unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFLOADf320 : PPC::LXSSPX0 ; |
1200 | 0 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc), |
1201 | 0 | DestReg), FrameIdx)); |
1202 | 0 | NonRI = true; |
1203 | 27 | } else if (27 PPC::VRSAVERCRegClass.hasSubClassEq(RC)27 ) { |
1204 | 0 | assert(Subtarget.isDarwin() && |
1205 | 0 | "VRSAVE only needs spill/restore on Darwin"); |
1206 | 0 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, |
1207 | 0 | get(PPC::RESTORE_VRSAVE), |
1208 | 0 | DestReg), |
1209 | 0 | FrameIdx)); |
1210 | 0 | SpillsVRS = true; |
1211 | 27 | } else if (27 PPC::QFRCRegClass.hasSubClassEq(RC)27 ) { |
1212 | 15 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDX), DestReg), |
1213 | 15 | FrameIdx)); |
1214 | 15 | NonRI = true; |
1215 | 27 | } else if (12 PPC::QSRCRegClass.hasSubClassEq(RC)12 ) { |
1216 | 12 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFSXs), DestReg), |
1217 | 12 | FrameIdx)); |
1218 | 12 | NonRI = true; |
1219 | 12 | } else if (0 PPC::QBRCRegClass.hasSubClassEq(RC)0 ) { |
1220 | 0 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDXb), DestReg), |
1221 | 0 | FrameIdx)); |
1222 | 0 | NonRI = true; |
1223 | 0 | } else if (0 PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)0 ) { |
1224 | 0 | NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILLTOVSR_LD), |
1225 | 0 | DestReg), FrameIdx)); |
1226 | 0 | } else { |
1227 | 0 | llvm_unreachable("Unknown regclass!"); |
1228 | 1.78k | } |
1229 | 2.00k | |
1230 | 2.00k | return false; |
1231 | 2.00k | } |
1232 | | |
1233 | | void |
1234 | | PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, |
1235 | | MachineBasicBlock::iterator MI, |
1236 | | unsigned DestReg, int FrameIdx, |
1237 | | const TargetRegisterClass *RC, |
1238 | 2.07k | const TargetRegisterInfo *TRI) const { |
1239 | 2.07k | MachineFunction &MF = *MBB.getParent(); |
1240 | 2.07k | SmallVector<MachineInstr*, 4> NewMIs; |
1241 | 2.07k | DebugLoc DL; |
1242 | 2.07k | if (MI != MBB.end()2.07k ) DL = MI->getDebugLoc()2.06k ; |
1243 | 2.07k | |
1244 | 2.07k | PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
1245 | 2.07k | FuncInfo->setHasSpills(); |
1246 | 2.07k | |
1247 | 2.07k | // We need to avoid a situation in which the value from a VRRC register is |
1248 | 2.07k | // spilled using an Altivec instruction and reloaded into a VSRC register |
1249 | 2.07k | // using a VSX instruction. The issue with this is that the VSX |
1250 | 2.07k | // load/store instructions swap the doublewords in the vector and the Altivec |
1251 | 2.07k | // ones don't. The register classes on the spill/reload may be different if |
1252 | 2.07k | // the register is defined using an Altivec instruction and is then used by a |
1253 | 2.07k | // VSX instruction. |
1254 | 2.07k | if (Subtarget.hasVSX() && 2.07k RC == &PPC::VRRCRegClass1.04k ) |
1255 | 126 | RC = &PPC::VSRCRegClass; |
1256 | 2.07k | |
1257 | 2.07k | bool NonRI = false, SpillsVRS = false; |
1258 | 2.07k | if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs, |
1259 | 2.07k | NonRI, SpillsVRS)) |
1260 | 63 | FuncInfo->setSpillsCR(); |
1261 | 2.07k | |
1262 | 2.07k | if (SpillsVRS) |
1263 | 0 | FuncInfo->setSpillsVRSAVE(); |
1264 | 2.07k | |
1265 | 2.07k | if (NonRI) |
1266 | 179 | FuncInfo->setHasNonRISpills(); |
1267 | 2.07k | |
1268 | 4.14k | for (unsigned i = 0, e = NewMIs.size(); i != e4.14k ; ++i2.07k ) |
1269 | 2.07k | MBB.insert(MI, NewMIs[i]); |
1270 | 2.07k | |
1271 | 2.07k | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
1272 | 2.07k | MachineMemOperand *MMO = MF.getMachineMemOperand( |
1273 | 2.07k | MachinePointerInfo::getFixedStack(MF, FrameIdx), |
1274 | 2.07k | MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx), |
1275 | 2.07k | MFI.getObjectAlignment(FrameIdx)); |
1276 | 2.07k | NewMIs.back()->addMemOperand(MF, MMO); |
1277 | 2.07k | } |
1278 | | |
1279 | | bool PPCInstrInfo:: |
1280 | 10.3k | reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { |
1281 | 10.3k | assert(Cond.size() == 2 && "Invalid PPC branch opcode!"); |
1282 | 10.3k | if (Cond[1].getReg() == PPC::CTR8 || 10.3k Cond[1].getReg() == PPC::CTR10.1k ) |
1283 | 203 | Cond[0].setImm(Cond[0].getImm() == 0 ? 203 151 : 0152 ); |
1284 | 10.3k | else |
1285 | 10.3k | // Leave the CR# the same, but invert the condition. |
1286 | 10.1k | Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm())); |
1287 | 10.3k | return false; |
1288 | 10.3k | } |
1289 | | |
1290 | | bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, |
1291 | 3.15k | unsigned Reg, MachineRegisterInfo *MRI) const { |
1292 | 3.15k | // For some instructions, it is legal to fold ZERO into the RA register field. |
1293 | 3.15k | // A zero immediate should always be loaded with a single li. |
1294 | 3.15k | unsigned DefOpc = DefMI.getOpcode(); |
1295 | 3.15k | if (DefOpc != PPC::LI && 3.15k DefOpc != PPC::LI82.25k ) |
1296 | 419 | return false; |
1297 | 2.73k | if (2.73k !DefMI.getOperand(1).isImm()2.73k ) |
1298 | 25 | return false; |
1299 | 2.70k | if (2.70k DefMI.getOperand(1).getImm() != 02.70k ) |
1300 | 1.31k | return false; |
1301 | 1.39k | |
1302 | 1.39k | // Note that we cannot here invert the arguments of an isel in order to fold |
1303 | 1.39k | // a ZERO into what is presented as the second argument. All we have here |
1304 | 1.39k | // is the condition bit, and that might come from a CR-logical bit operation. |
1305 | 1.39k | |
1306 | 1.39k | const MCInstrDesc &UseMCID = UseMI.getDesc(); |
1307 | 1.39k | |
1308 | 1.39k | // Only fold into real machine instructions. |
1309 | 1.39k | if (UseMCID.isPseudo()) |
1310 | 395 | return false; |
1311 | 1.00k | |
1312 | 1.00k | unsigned UseIdx; |
1313 | 1.80k | for (UseIdx = 0; UseIdx < UseMI.getNumOperands()1.80k ; ++UseIdx802 ) |
1314 | 1.80k | if (1.80k UseMI.getOperand(UseIdx).isReg() && |
1315 | 1.80k | UseMI.getOperand(UseIdx).getReg() == Reg) |
1316 | 1.00k | break; |
1317 | 1.00k | |
1318 | 1.00k | assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI"); |
1319 | 1.00k | assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg"); |
1320 | 1.00k | |
1321 | 1.00k | const MCOperandInfo *UseInfo = &UseMCID.OpInfo[UseIdx]; |
1322 | 1.00k | |
1323 | 1.00k | // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0 |
1324 | 1.00k | // register (which might also be specified as a pointer class kind). |
1325 | 1.00k | if (UseInfo->isLookupPtrRegClass()1.00k ) { |
1326 | 16 | if (UseInfo->RegClass /* Kind */ != 1) |
1327 | 16 | return false; |
1328 | 984 | } else { |
1329 | 984 | if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID && |
1330 | 919 | UseInfo->RegClass != PPC::G8RC_NOX0RegClassID) |
1331 | 789 | return false; |
1332 | 195 | } |
1333 | 195 | |
1334 | 195 | // Make sure this is not tied to an output register (or otherwise |
1335 | 195 | // constrained). This is true for ST?UX registers, for example, which |
1336 | 195 | // are tied to their output registers. |
1337 | 195 | if (195 UseInfo->Constraints != 0195 ) |
1338 | 0 | return false; |
1339 | 195 | |
1340 | 195 | unsigned ZeroReg; |
1341 | 195 | if (UseInfo->isLookupPtrRegClass()195 ) { |
1342 | 0 | bool isPPC64 = Subtarget.isPPC64(); |
1343 | 0 | ZeroReg = isPPC64 ? PPC::ZERO80 : PPC::ZERO0 ; |
1344 | 195 | } else { |
1345 | 195 | ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ? |
1346 | 195 | PPC::ZERO8130 : PPC::ZERO65 ; |
1347 | 195 | } |
1348 | 195 | |
1349 | 195 | bool DeleteDef = MRI->hasOneNonDBGUse(Reg); |
1350 | 195 | UseMI.getOperand(UseIdx).setReg(ZeroReg); |
1351 | 195 | |
1352 | 195 | if (DeleteDef) |
1353 | 186 | DefMI.eraseFromParent(); |
1354 | 3.15k | |
1355 | 3.15k | return true; |
1356 | 3.15k | } |
1357 | | |
1358 | 0 | static bool MBBDefinesCTR(MachineBasicBlock &MBB) { |
1359 | 0 | for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); |
1360 | 0 | I != IE0 ; ++I0 ) |
1361 | 0 | if (0 I->definesRegister(PPC::CTR) || 0 I->definesRegister(PPC::CTR8)0 ) |
1362 | 0 | return true; |
1363 | 0 | return false; |
1364 | 0 | } |
1365 | | |
1366 | | // We should make sure that, if we're going to predicate both sides of a |
1367 | | // condition (a diamond), that both sides don't define the counter register. We |
1368 | | // can predicate counter-decrement-based branches, but while that predicates |
1369 | | // the branching, it does not predicate the counter decrement. If we tried to |
1370 | | // merge the triangle into one predicated block, we'd decrement the counter |
1371 | | // twice. |
1372 | | bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, |
1373 | | unsigned NumT, unsigned ExtraT, |
1374 | | MachineBasicBlock &FMBB, |
1375 | | unsigned NumF, unsigned ExtraF, |
1376 | 0 | BranchProbability Probability) const { |
1377 | 0 | return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB)); |
1378 | 0 | } |
1379 | | |
1380 | | |
1381 | 581k | bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const { |
1382 | 581k | // The predicated branches are identified by their type, not really by the |
1383 | 581k | // explicit presence of a predicate. Furthermore, some of them can be |
1384 | 581k | // predicated more than once. Because if conversion won't try to predicate |
1385 | 581k | // any instruction which already claims to be predicated (by returning true |
1386 | 581k | // here), always return false. In doing so, we let isPredicable() be the |
1387 | 581k | // final word on whether not the instruction can be (further) predicated. |
1388 | 581k | |
1389 | 581k | return false; |
1390 | 581k | } |
1391 | | |
1392 | 651k | bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { |
1393 | 651k | if (!MI.isTerminator()) |
1394 | 322k | return false; |
1395 | 329k | |
1396 | 329k | // Conditional branch is a special case. |
1397 | 329k | if (329k MI.isBranch() && 329k !MI.isBarrier()122k ) |
1398 | 83.7k | return true; |
1399 | 245k | |
1400 | 245k | return !isPredicated(MI); |
1401 | 245k | } |
1402 | | |
1403 | | bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, |
1404 | 209 | ArrayRef<MachineOperand> Pred) const { |
1405 | 209 | unsigned OpC = MI.getOpcode(); |
1406 | 209 | if (OpC == PPC::BLR || 209 OpC == PPC::BLR8202 ) { |
1407 | 209 | if (Pred[1].getReg() == PPC::CTR8 || 209 Pred[1].getReg() == PPC::CTR207 ) { |
1408 | 3 | bool isPPC64 = Subtarget.isPPC64(); |
1409 | 0 | MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? 0 PPC::BDNZLR80 : PPC::BDNZLR0 ) |
1410 | 3 | : (isPPC64 ? 3 PPC::BDZLR82 : PPC::BDZLR1 ))); |
1411 | 209 | } else if (206 Pred[0].getImm() == PPC::PRED_BIT_SET206 ) { |
1412 | 70 | MI.setDesc(get(PPC::BCLR)); |
1413 | 70 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) |
1414 | 70 | .addReg(Pred[1].getReg()); |
1415 | 206 | } else if (136 Pred[0].getImm() == PPC::PRED_BIT_UNSET136 ) { |
1416 | 5 | MI.setDesc(get(PPC::BCLRn)); |
1417 | 5 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) |
1418 | 5 | .addReg(Pred[1].getReg()); |
1419 | 136 | } else { |
1420 | 131 | MI.setDesc(get(PPC::BCCLR)); |
1421 | 131 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) |
1422 | 131 | .addImm(Pred[0].getImm()) |
1423 | 131 | .addReg(Pred[1].getReg()); |
1424 | 131 | } |
1425 | 209 | |
1426 | 209 | return true; |
1427 | 0 | } else if (0 OpC == PPC::B0 ) { |
1428 | 0 | if (Pred[1].getReg() == PPC::CTR8 || 0 Pred[1].getReg() == PPC::CTR0 ) { |
1429 | 0 | bool isPPC64 = Subtarget.isPPC64(); |
1430 | 0 | MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? 0 PPC::BDNZ80 : PPC::BDNZ0 ) |
1431 | 0 | : (isPPC64 ? 0 PPC::BDZ80 : PPC::BDZ0 ))); |
1432 | 0 | } else if (0 Pred[0].getImm() == PPC::PRED_BIT_SET0 ) { |
1433 | 0 | MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); |
1434 | 0 | MI.RemoveOperand(0); |
1435 | 0 |
|
1436 | 0 | MI.setDesc(get(PPC::BC)); |
1437 | 0 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) |
1438 | 0 | .addReg(Pred[1].getReg()) |
1439 | 0 | .addMBB(MBB); |
1440 | 0 | } else if (0 Pred[0].getImm() == PPC::PRED_BIT_UNSET0 ) { |
1441 | 0 | MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); |
1442 | 0 | MI.RemoveOperand(0); |
1443 | 0 |
|
1444 | 0 | MI.setDesc(get(PPC::BCn)); |
1445 | 0 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) |
1446 | 0 | .addReg(Pred[1].getReg()) |
1447 | 0 | .addMBB(MBB); |
1448 | 0 | } else { |
1449 | 0 | MachineBasicBlock *MBB = MI.getOperand(0).getMBB(); |
1450 | 0 | MI.RemoveOperand(0); |
1451 | 0 |
|
1452 | 0 | MI.setDesc(get(PPC::BCC)); |
1453 | 0 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) |
1454 | 0 | .addImm(Pred[0].getImm()) |
1455 | 0 | .addReg(Pred[1].getReg()) |
1456 | 0 | .addMBB(MBB); |
1457 | 0 | } |
1458 | 0 |
|
1459 | 0 | return true; |
1460 | 0 | } else if (0 OpC == PPC::BCTR || 0 OpC == PPC::BCTR80 || |
1461 | 0 | OpC == PPC::BCTRL0 || OpC == PPC::BCTRL80 ) { |
1462 | 0 | if (Pred[1].getReg() == PPC::CTR8 || 0 Pred[1].getReg() == PPC::CTR0 ) |
1463 | 0 | llvm_unreachable("Cannot predicate bctr[l] on the ctr register"); |
1464 | 0 |
|
1465 | 0 | bool setLR = OpC == PPC::BCTRL || 0 OpC == PPC::BCTRL80 ; |
1466 | 0 | bool isPPC64 = Subtarget.isPPC64(); |
1467 | 0 |
|
1468 | 0 | if (Pred[0].getImm() == PPC::PRED_BIT_SET0 ) { |
1469 | 0 | MI.setDesc(get(isPPC64 ? (setLR ? 0 PPC::BCCTRL80 : PPC::BCCTR80 ) |
1470 | 0 | : (setLR ? 0 PPC::BCCTRL0 : PPC::BCCTR0 ))); |
1471 | 0 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) |
1472 | 0 | .addReg(Pred[1].getReg()); |
1473 | 0 | return true; |
1474 | 0 | } else if (0 Pred[0].getImm() == PPC::PRED_BIT_UNSET0 ) { |
1475 | 0 | MI.setDesc(get(isPPC64 ? (setLR ? 0 PPC::BCCTRL8n0 : PPC::BCCTR8n0 ) |
1476 | 0 | : (setLR ? 0 PPC::BCCTRLn0 : PPC::BCCTRn0 ))); |
1477 | 0 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) |
1478 | 0 | .addReg(Pred[1].getReg()); |
1479 | 0 | return true; |
1480 | 0 | } |
1481 | 0 |
|
1482 | 0 | MI.setDesc(get(isPPC64 ? 0 (setLR ? 0 PPC::BCCCTRL80 : PPC::BCCCTR80 ) |
1483 | 0 | : (setLR ? 0 PPC::BCCCTRL0 : PPC::BCCCTR0 ))); |
1484 | 0 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) |
1485 | 0 | .addImm(Pred[0].getImm()) |
1486 | 0 | .addReg(Pred[1].getReg()); |
1487 | 0 | return true; |
1488 | 0 | } |
1489 | 0 |
|
1490 | 0 | return false; |
1491 | 0 | } |
1492 | | |
1493 | | bool PPCInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, |
1494 | 2 | ArrayRef<MachineOperand> Pred2) const { |
1495 | 2 | assert(Pred1.size() == 2 && "Invalid PPC first predicate"); |
1496 | 2 | assert(Pred2.size() == 2 && "Invalid PPC second predicate"); |
1497 | 2 | |
1498 | 2 | if (Pred1[1].getReg() == PPC::CTR8 || 2 Pred1[1].getReg() == PPC::CTR2 ) |
1499 | 0 | return false; |
1500 | 2 | if (2 Pred2[1].getReg() == PPC::CTR8 || 2 Pred2[1].getReg() == PPC::CTR2 ) |
1501 | 0 | return false; |
1502 | 2 | |
1503 | 2 | // P1 can only subsume P2 if they test the same condition register. |
1504 | 2 | if (2 Pred1[1].getReg() != Pred2[1].getReg()2 ) |
1505 | 1 | return false; |
1506 | 1 | |
1507 | 1 | PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm(); |
1508 | 1 | PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm(); |
1509 | 1 | |
1510 | 1 | if (P1 == P2) |
1511 | 1 | return true; |
1512 | 0 |
|
1513 | 0 | // Does P1 subsume P2, e.g. GE subsumes GT. |
1514 | 0 | if (0 P1 == PPC::PRED_LE && |
1515 | 0 | (P2 == PPC::PRED_LT || 0 P2 == PPC::PRED_EQ0 )) |
1516 | 0 | return true; |
1517 | 0 | if (0 P1 == PPC::PRED_GE && |
1518 | 0 | (P2 == PPC::PRED_GT || 0 P2 == PPC::PRED_EQ0 )) |
1519 | 0 | return true; |
1520 | 0 |
|
1521 | 0 | return false; |
1522 | 0 | } |
1523 | | |
1524 | | bool PPCInstrInfo::DefinesPredicate(MachineInstr &MI, |
1525 | 10.5k | std::vector<MachineOperand> &Pred) const { |
1526 | 10.5k | // Note: At the present time, the contents of Pred from this function is |
1527 | 10.5k | // unused by IfConversion. This implementation follows ARM by pushing the |
1528 | 10.5k | // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of |
1529 | 10.5k | // predicate, instructions defining CTR or CTR8 are also included as |
1530 | 10.5k | // predicate-defining instructions. |
1531 | 10.5k | |
1532 | 10.5k | const TargetRegisterClass *RCs[] = |
1533 | 10.5k | { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass, |
1534 | 10.5k | &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass }; |
1535 | 10.5k | |
1536 | 10.5k | bool Found = false; |
1537 | 42.4k | for (unsigned i = 0, e = MI.getNumOperands(); i != e42.4k ; ++i31.9k ) { |
1538 | 31.9k | const MachineOperand &MO = MI.getOperand(i); |
1539 | 147k | for (unsigned c = 0; c < array_lengthof(RCs) && 147k !Found118k ; ++c115k ) { |
1540 | 115k | const TargetRegisterClass *RC = RCs[c]; |
1541 | 115k | if (MO.isReg()115k ) { |
1542 | 91.5k | if (MO.isDef() && 91.5k RC->contains(MO.getReg())36.3k ) { |
1543 | 870 | Pred.push_back(MO); |
1544 | 870 | Found = true; |
1545 | 870 | } |
1546 | 115k | } else if (23.5k MO.isRegMask()23.5k ) { |
1547 | 321 | for (TargetRegisterClass::iterator I = RC->begin(), |
1548 | 2.88k | IE = RC->end(); I != IE2.88k ; ++I2.56k ) |
1549 | 2.56k | if (2.56k MO.clobbersPhysReg(*I)2.56k ) { |
1550 | 1.60k | Pred.push_back(MO); |
1551 | 1.60k | Found = true; |
1552 | 1.60k | } |
1553 | 23.5k | } |
1554 | 115k | } |
1555 | 31.9k | } |
1556 | 10.5k | |
1557 | 10.5k | return Found; |
1558 | 10.5k | } |
1559 | | |
1560 | 10.5k | bool PPCInstrInfo::isPredicable(const MachineInstr &MI) const { |
1561 | 10.5k | unsigned OpC = MI.getOpcode(); |
1562 | 10.5k | switch (OpC) { |
1563 | 10.0k | default: |
1564 | 10.0k | return false; |
1565 | 506 | case PPC::B: |
1566 | 506 | case PPC::BLR: |
1567 | 506 | case PPC::BLR8: |
1568 | 506 | case PPC::BCTR: |
1569 | 506 | case PPC::BCTR8: |
1570 | 506 | case PPC::BCTRL: |
1571 | 506 | case PPC::BCTRL8: |
1572 | 506 | return true; |
1573 | 0 | } |
1574 | 0 | } |
1575 | | |
1576 | | bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, |
1577 | | unsigned &SrcReg2, int &Mask, |
1578 | 1.94k | int &Value) const { |
1579 | 1.94k | unsigned Opc = MI.getOpcode(); |
1580 | 1.94k | |
1581 | 1.94k | switch (Opc) { |
1582 | 0 | default: return false; |
1583 | 698 | case PPC::CMPWI: |
1584 | 698 | case PPC::CMPLWI: |
1585 | 698 | case PPC::CMPDI: |
1586 | 698 | case PPC::CMPLDI: |
1587 | 698 | SrcReg = MI.getOperand(1).getReg(); |
1588 | 698 | SrcReg2 = 0; |
1589 | 698 | Value = MI.getOperand(2).getImm(); |
1590 | 698 | Mask = 0xFFFF; |
1591 | 698 | return true; |
1592 | 1.24k | case PPC::CMPW: |
1593 | 1.24k | case PPC::CMPLW: |
1594 | 1.24k | case PPC::CMPD: |
1595 | 1.24k | case PPC::CMPLD: |
1596 | 1.24k | case PPC::FCMPUS: |
1597 | 1.24k | case PPC::FCMPUD: |
1598 | 1.24k | SrcReg = MI.getOperand(1).getReg(); |
1599 | 1.24k | SrcReg2 = MI.getOperand(2).getReg(); |
1600 | 1.24k | Value = 0; |
1601 | 1.24k | Mask = 0; |
1602 | 1.24k | return true; |
1603 | 0 | } |
1604 | 0 | } |
1605 | | |
1606 | | bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, |
1607 | | unsigned SrcReg2, int Mask, int Value, |
1608 | 1.94k | const MachineRegisterInfo *MRI) const { |
1609 | 1.94k | if (DisableCmpOpt) |
1610 | 0 | return false; |
1611 | 1.94k | |
1612 | 1.94k | int OpC = CmpInstr.getOpcode(); |
1613 | 1.94k | unsigned CRReg = CmpInstr.getOperand(0).getReg(); |
1614 | 1.94k | |
1615 | 1.94k | // FP record forms set CR1 based on the execption status bits, not a |
1616 | 1.94k | // comparison with zero. |
1617 | 1.94k | if (OpC == PPC::FCMPUS || 1.94k OpC == PPC::FCMPUD1.64k ) |
1618 | 400 | return false; |
1619 | 1.54k | |
1620 | 1.54k | // The record forms set the condition register based on a signed comparison |
1621 | 1.54k | // with zero (so says the ISA manual). This is not as straightforward as it |
1622 | 1.54k | // seems, however, because this is always a 64-bit comparison on PPC64, even |
1623 | 1.54k | // for instructions that are 32-bit in nature (like slw for example). |
1624 | 1.54k | // So, on PPC32, for unsigned comparisons, we can use the record forms only |
1625 | 1.54k | // for equality checks (as those don't depend on the sign). On PPC64, |
1626 | 1.54k | // we are restricted to equality for unsigned 64-bit comparisons and for |
1627 | 1.54k | // signed 32-bit comparisons the applicability is more restricted. |
1628 | 1.54k | bool isPPC64 = Subtarget.isPPC64(); |
1629 | 1.32k | bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW; |
1630 | 1.21k | bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW; |
1631 | 1.46k | bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD; |
1632 | 1.54k | |
1633 | 1.54k | // Get the unique definition of SrcReg. |
1634 | 1.54k | MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); |
1635 | 1.54k | if (!MI1.54k ) return false0 ; |
1636 | 1.54k | int MIOpC = MI->getOpcode(); |
1637 | 1.54k | |
1638 | 1.54k | bool equalityOnly = false; |
1639 | 1.54k | bool noSub = false; |
1640 | 1.54k | if (isPPC641.54k ) { |
1641 | 1.42k | if (is32BitSignedCompare1.42k ) { |
1642 | 599 | // We can perform this optimization only if MI is sign-extending. |
1643 | 599 | if (MIOpC == PPC::SRAW || 599 MIOpC == PPC::SRAWo599 || |
1644 | 599 | MIOpC == PPC::SRAWI599 || MIOpC == PPC::SRAWIo599 || |
1645 | 599 | MIOpC == PPC::EXTSB599 || MIOpC == PPC::EXTSBo598 || |
1646 | 599 | MIOpC == PPC::EXTSH598 || MIOpC == PPC::EXTSHo598 || |
1647 | 599 | MIOpC == PPC::EXTSW598 || MIOpC == PPC::EXTSWo598 ) { |
1648 | 1 | noSub = true; |
1649 | 1 | } else |
1650 | 598 | return false; |
1651 | 829 | } else if (829 is32BitUnsignedCompare829 ) { |
1652 | 422 | // 32-bit rotate and mask instructions are zero extending only if MB <= ME |
1653 | 422 | bool isZeroExtendingRotate = |
1654 | 340 | (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINMo || |
1655 | 422 | MIOpC == PPC::RLWNM340 || MIOpC == PPC::RLWNMo339 ) |
1656 | 83 | && MI->getOperand(3).getImm() <= MI->getOperand(4).getImm(); |
1657 | 422 | |
1658 | 422 | // We can perform this optimization, equality only, if MI is |
1659 | 422 | // zero-extending. |
1660 | 422 | // FIXME: Other possible target instructions include ANDISo and |
1661 | 422 | // RLWINM aliases, such as ROTRWI, EXTLWI, SLWI and SRWI. |
1662 | 422 | if (MIOpC == PPC::CNTLZW || 422 MIOpC == PPC::CNTLZWo422 || |
1663 | 422 | MIOpC == PPC::SLW422 || MIOpC == PPC::SLWo418 || |
1664 | 422 | MIOpC == PPC::SRW418 || MIOpC == PPC::SRWo418 || |
1665 | 418 | MIOpC == PPC::ANDIo || |
1666 | 422 | isZeroExtendingRotate417 ) { |
1667 | 87 | noSub = true; |
1668 | 87 | equalityOnly = true; |
1669 | 87 | } else |
1670 | 335 | return false; |
1671 | 829 | } else |
1672 | 407 | equalityOnly = is64BitUnsignedCompare; |
1673 | 1.42k | } else |
1674 | 117 | equalityOnly = is32BitUnsignedCompare; |
1675 | 1.54k | |
1676 | 612 | if (612 equalityOnly612 ) { |
1677 | 325 | // We need to check the uses of the condition register in order to reject |
1678 | 325 | // non-equality comparisons. |
1679 | 325 | for (MachineRegisterInfo::use_instr_iterator |
1680 | 325 | I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end(); |
1681 | 567 | I != IE567 ; ++I242 ) { |
1682 | 330 | MachineInstr *UseMI = &*I; |
1683 | 330 | if (UseMI->getOpcode() == PPC::BCC330 ) { |
1684 | 280 | PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm(); |
1685 | 280 | unsigned PredCond = PPC::getPredicateCondition(Pred); |
1686 | 280 | // We ignore hint bits when checking for non-equality comparisons. |
1687 | 280 | if (PredCond != PPC::PRED_EQ && 280 PredCond != PPC::PRED_NE148 ) |
1688 | 48 | return false; |
1689 | 50 | } else if (50 UseMI->getOpcode() == PPC::ISEL || |
1690 | 50 | UseMI->getOpcode() == PPC::ISEL835 ) { |
1691 | 34 | unsigned SubIdx = UseMI->getOperand(3).getSubReg(); |
1692 | 34 | if (SubIdx != PPC::sub_eq) |
1693 | 24 | return false; |
1694 | 50 | } else |
1695 | 16 | return false; |
1696 | 330 | } |
1697 | 325 | } |
1698 | 612 | |
1699 | 524 | MachineBasicBlock::iterator I = CmpInstr; |
1700 | 524 | |
1701 | 524 | // Scan forward to find the first use of the compare. |
1702 | 1.11k | for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL; |
1703 | 592 | ++I592 ) { |
1704 | 1.11k | bool FoundUse = false; |
1705 | 1.11k | for (MachineRegisterInfo::use_instr_iterator |
1706 | 1.11k | J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end(); |
1707 | 1.79k | J != JE1.79k ; ++J676 ) |
1708 | 1.19k | if (1.19k &*J == &*I1.19k ) { |
1709 | 522 | FoundUse = true; |
1710 | 522 | break; |
1711 | 522 | } |
1712 | 1.11k | |
1713 | 1.11k | if (FoundUse) |
1714 | 522 | break; |
1715 | 1.11k | } |
1716 | 524 | |
1717 | 524 | SmallVector<std::pair<MachineOperand*, PPC::Predicate>, 4> PredsToUpdate; |
1718 | 524 | SmallVector<std::pair<MachineOperand*, unsigned>, 4> SubRegsToUpdate; |
1719 | 524 | |
1720 | 524 | // There are two possible candidates which can be changed to set CR[01]. |
1721 | 524 | // One is MI, the other is a SUB instruction. |
1722 | 524 | // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). |
1723 | 524 | MachineInstr *Sub = nullptr; |
1724 | 524 | if (SrcReg2 != 0) |
1725 | 524 | // MI is not a candidate for CMPrr. |
1726 | 225 | MI = nullptr; |
1727 | 524 | // FIXME: Conservatively refuse to convert an instruction which isn't in the |
1728 | 524 | // same BB as the comparison. This is to allow the check below to avoid calls |
1729 | 524 | // (and other explicit clobbers); instead we should really check for these |
1730 | 524 | // more explicitly (in at least a few predecessors). |
1731 | 299 | else if (299 MI->getParent() != CmpInstr.getParent()299 ) |
1732 | 30 | return false; |
1733 | 269 | else if (269 Value != 0269 ) { |
1734 | 79 | // The record-form instructions set CR bit based on signed comparison against 0. |
1735 | 79 | // We try to convert a compare against 1 or -1 into a compare against 0. |
1736 | 79 | bool Success = false; |
1737 | 79 | if (!equalityOnly && 79 MRI->hasOneUse(CRReg)36 ) { |
1738 | 35 | MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg); |
1739 | 35 | if (UseMI->getOpcode() == PPC::BCC35 ) { |
1740 | 21 | PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm(); |
1741 | 21 | unsigned PredCond = PPC::getPredicateCondition(Pred); |
1742 | 21 | unsigned PredHint = PPC::getPredicateHint(Pred); |
1743 | 21 | int16_t Immed = (int16_t)Value; |
1744 | 21 | |
1745 | 21 | // When modyfing the condition in the predicate, we propagate hint bits |
1746 | 21 | // from the original predicate to the new one. |
1747 | 21 | if (Immed == -1 && 21 PredCond == PPC::PRED_GT2 ) { |
1748 | 0 | // We convert "greater than -1" into "greater than or equal to 0", |
1749 | 0 | // since we are assuming signed comparison by !equalityOnly |
1750 | 0 | PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), |
1751 | 0 | PPC::getPredicate(PPC::PRED_GE, PredHint))); |
1752 | 0 | Success = true; |
1753 | 0 | } |
1754 | 21 | else if (21 Immed == 1 && 21 PredCond == PPC::PRED_LT3 ) { |
1755 | 3 | // We convert "less than 1" into "less than or equal to 0". |
1756 | 3 | PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), |
1757 | 3 | PPC::getPredicate(PPC::PRED_LE, PredHint))); |
1758 | 3 | Success = true; |
1759 | 3 | } |
1760 | 21 | } |
1761 | 35 | } |
1762 | 79 | |
1763 | 79 | // PPC does not have a record-form SUBri. |
1764 | 79 | if (!Success) |
1765 | 76 | return false; |
1766 | 418 | } |
1767 | 418 | |
1768 | 418 | // Search for Sub. |
1769 | 418 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
1770 | 418 | --I; |
1771 | 418 | |
1772 | 418 | // Get ready to iterate backward from CmpInstr. |
1773 | 418 | MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin(); |
1774 | 418 | |
1775 | 1.51k | for (; I != E && 1.51k !noSub1.38k ; --I1.10k ) { |
1776 | 1.33k | const MachineInstr &Instr = *I; |
1777 | 1.33k | unsigned IOpC = Instr.getOpcode(); |
1778 | 1.33k | |
1779 | 1.33k | if (&*I != &CmpInstr && 1.33k (Instr.modifiesRegister(PPC::CR0, TRI) || |
1780 | 949 | Instr.readsRegister(PPC::CR0, TRI))) |
1781 | 1.33k | // This instruction modifies or uses the record condition register after |
1782 | 1.33k | // the one we want to change. While we could do this transformation, it |
1783 | 1.33k | // would likely not be profitable. This transformation removes one |
1784 | 1.33k | // instruction, and so even forcing RA to generate one move probably |
1785 | 1.33k | // makes it unprofitable. |
1786 | 18 | return false; |
1787 | 1.31k | |
1788 | 1.31k | // Check whether CmpInstr can be made redundant by the current instruction. |
1789 | 1.31k | if (1.31k (OpC == PPC::CMPW || 1.31k OpC == PPC::CMPLW1.26k || |
1790 | 1.31k | OpC == PPC::CMPD1.25k || OpC == PPC::CMPLD519 ) && |
1791 | 895 | (IOpC == PPC::SUBF || 895 IOpC == PPC::SUBF8893 ) && |
1792 | 7 | ((Instr.getOperand(1).getReg() == SrcReg && |
1793 | 2 | Instr.getOperand(2).getReg() == SrcReg2) || |
1794 | 5 | (Instr.getOperand(1).getReg() == SrcReg2 && |
1795 | 1.31k | Instr.getOperand(2).getReg() == SrcReg4 ))) { |
1796 | 5 | Sub = &*I; |
1797 | 5 | break; |
1798 | 5 | } |
1799 | 1.30k | |
1800 | 1.30k | if (1.30k I == B1.30k ) |
1801 | 1.30k | // The 'and' is below the comparison instruction. |
1802 | 208 | return false; |
1803 | 1.33k | } |
1804 | 418 | |
1805 | 418 | // Return false if no candidates exist. |
1806 | 192 | if (192 !MI && 192 !Sub5 ) |
1807 | 0 | return false; |
1808 | 192 | |
1809 | 192 | // The single candidate is called MI. |
1810 | 192 | if (192 !MI192 ) MI = Sub5 ; |
1811 | 192 | |
1812 | 192 | int NewOpC = -1; |
1813 | 192 | MIOpC = MI->getOpcode(); |
1814 | 192 | if (MIOpC == PPC::ANDIo || 192 MIOpC == PPC::ANDIo8191 ) |
1815 | 2 | NewOpC = MIOpC; |
1816 | 190 | else { |
1817 | 190 | NewOpC = PPC::getRecordFormOpcode(MIOpC); |
1818 | 190 | if (NewOpC == -1 && 190 PPC::getNonRecordFormOpcode(MIOpC) != -1106 ) |
1819 | 0 | NewOpC = MIOpC; |
1820 | 190 | } |
1821 | 192 | |
1822 | 192 | // FIXME: On the non-embedded POWER architectures, only some of the record |
1823 | 192 | // forms are fast, and we should use only the fast ones. |
1824 | 192 | |
1825 | 192 | // The defining instruction has a record form (or is already a record |
1826 | 192 | // form). It is possible, however, that we'll need to reverse the condition |
1827 | 192 | // code of the users. |
1828 | 192 | if (NewOpC == -1) |
1829 | 106 | return false; |
1830 | 86 | |
1831 | 86 | // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP |
1832 | 86 | // needs to be updated to be based on SUB. Push the condition code |
1833 | 86 | // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the |
1834 | 86 | // condition code of these operands will be modified. |
1835 | 86 | // Here, Value == 0 means we haven't converted comparison against 1 or -1 to |
1836 | 86 | // comparison against 0, which may modify predicate. |
1837 | 86 | bool ShouldSwap = false; |
1838 | 86 | if (Sub && 86 Value == 05 ) { |
1839 | 5 | ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && |
1840 | 3 | Sub->getOperand(2).getReg() == SrcReg; |
1841 | 5 | |
1842 | 5 | // The operands to subf are the opposite of sub, so only in the fixed-point |
1843 | 5 | // case, invert the order. |
1844 | 5 | ShouldSwap = !ShouldSwap; |
1845 | 5 | } |
1846 | 86 | |
1847 | 86 | if (ShouldSwap) |
1848 | 2 | for (MachineRegisterInfo::use_instr_iterator |
1849 | 2 | I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end(); |
1850 | 4 | I != IE4 ; ++I2 ) { |
1851 | 2 | MachineInstr *UseMI = &*I; |
1852 | 2 | if (UseMI->getOpcode() == PPC::BCC2 ) { |
1853 | 0 | PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm(); |
1854 | 0 | unsigned PredCond = PPC::getPredicateCondition(Pred); |
1855 | 0 | assert((!equalityOnly || |
1856 | 0 | PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE) && |
1857 | 0 | "Invalid predicate for equality-only optimization"); |
1858 | 0 | (void)PredCond; // To suppress warning in release build. |
1859 | 0 | PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), |
1860 | 0 | PPC::getSwappedPredicate(Pred))); |
1861 | 2 | } else if (2 UseMI->getOpcode() == PPC::ISEL || |
1862 | 2 | UseMI->getOpcode() == PPC::ISEL82 ) { |
1863 | 2 | unsigned NewSubReg = UseMI->getOperand(3).getSubReg(); |
1864 | 2 | assert((!equalityOnly || NewSubReg == PPC::sub_eq) && |
1865 | 2 | "Invalid CR bit for equality-only optimization"); |
1866 | 2 | |
1867 | 2 | if (NewSubReg == PPC::sub_lt) |
1868 | 1 | NewSubReg = PPC::sub_gt; |
1869 | 1 | else if (1 NewSubReg == PPC::sub_gt1 ) |
1870 | 1 | NewSubReg = PPC::sub_lt; |
1871 | 2 | |
1872 | 2 | SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)), |
1873 | 2 | NewSubReg)); |
1874 | 2 | } else // We need to abort on a user we don't understand. |
1875 | 0 | return false; |
1876 | 2 | } |
1877 | 86 | assert(!(Value != 0 && ShouldSwap) && |
1878 | 86 | "Non-zero immediate support and ShouldSwap" |
1879 | 86 | "may conflict in updating predicate"); |
1880 | 86 | |
1881 | 86 | // Create a new virtual register to hold the value of the CR set by the |
1882 | 86 | // record-form instruction. If the instruction was not previously in |
1883 | 86 | // record form, then set the kill flag on the CR. |
1884 | 86 | CmpInstr.eraseFromParent(); |
1885 | 86 | |
1886 | 86 | MachineBasicBlock::iterator MII = MI; |
1887 | 86 | BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(), |
1888 | 86 | get(TargetOpcode::COPY), CRReg) |
1889 | 86 | .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill84 : 02 ); |
1890 | 86 | |
1891 | 86 | // Even if CR0 register were dead before, it is alive now since the |
1892 | 86 | // instruction we just built uses it. |
1893 | 86 | MI->clearRegisterDeads(PPC::CR0); |
1894 | 86 | |
1895 | 86 | if (MIOpC != NewOpC86 ) { |
1896 | 84 | // We need to be careful here: we're replacing one instruction with |
1897 | 84 | // another, and we need to make sure that we get all of the right |
1898 | 84 | // implicit uses and defs. On the other hand, the caller may be holding |
1899 | 84 | // an iterator to this instruction, and so we can't delete it (this is |
1900 | 84 | // specifically the case if this is the instruction directly after the |
1901 | 84 | // compare). |
1902 | 84 | |
1903 | 84 | const MCInstrDesc &NewDesc = get(NewOpC); |
1904 | 84 | MI->setDesc(NewDesc); |
1905 | 84 | |
1906 | 84 | if (NewDesc.ImplicitDefs) |
1907 | 84 | for (const MCPhysReg *ImpDefs = NewDesc.getImplicitDefs(); |
1908 | 170 | *ImpDefs170 ; ++ImpDefs86 ) |
1909 | 86 | if (86 !MI->definesRegister(*ImpDefs)86 ) |
1910 | 84 | MI->addOperand(*MI->getParent()->getParent(), |
1911 | 84 | MachineOperand::CreateReg(*ImpDefs, true, true)); |
1912 | 84 | if (NewDesc.ImplicitUses) |
1913 | 1 | for (const MCPhysReg *ImpUses = NewDesc.getImplicitUses(); |
1914 | 2 | *ImpUses2 ; ++ImpUses1 ) |
1915 | 1 | if (1 !MI->readsRegister(*ImpUses)1 ) |
1916 | 0 | MI->addOperand(*MI->getParent()->getParent(), |
1917 | 0 | MachineOperand::CreateReg(*ImpUses, false, true)); |
1918 | 84 | } |
1919 | 86 | assert(MI->definesRegister(PPC::CR0) && |
1920 | 86 | "Record-form instruction does not define cr0?"); |
1921 | 86 | |
1922 | 86 | // Modify the condition code of operands in OperandsToUpdate. |
1923 | 86 | // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to |
1924 | 86 | // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. |
1925 | 86 | for (unsigned i = 0, e = PredsToUpdate.size(); i < e86 ; i++0 ) |
1926 | 0 | PredsToUpdate[i].first->setImm(PredsToUpdate[i].second); |
1927 | 86 | |
1928 | 88 | for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e88 ; i++2 ) |
1929 | 2 | SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second); |
1930 | 86 | |
1931 | 86 | return true; |
1932 | 1.94k | } |
1933 | | |
1934 | | /// GetInstSize - Return the number of bytes of code the specified |
1935 | | /// instruction may be. This returns the maximum number of bytes. |
1936 | | /// |
1937 | 73.7k | unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { |
1938 | 73.7k | unsigned Opcode = MI.getOpcode(); |
1939 | 73.7k | |
1940 | 73.7k | if (Opcode == PPC::INLINEASM73.7k ) { |
1941 | 280 | const MachineFunction *MF = MI.getParent()->getParent(); |
1942 | 280 | const char *AsmStr = MI.getOperand(0).getSymbolName(); |
1943 | 280 | return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); |
1944 | 73.4k | } else if (73.4k Opcode == TargetOpcode::STACKMAP73.4k ) { |
1945 | 17 | StackMapOpers Opers(&MI); |
1946 | 17 | return Opers.getNumPatchBytes(); |
1947 | 73.4k | } else if (73.4k Opcode == TargetOpcode::PATCHPOINT73.4k ) { |
1948 | 39 | PatchPointOpers Opers(&MI); |
1949 | 39 | return Opers.getNumPatchBytes(); |
1950 | 0 | } else { |
1951 | 73.4k | return get(Opcode).getSize(); |
1952 | 73.4k | } |
1953 | 0 | } |
1954 | | |
1955 | | std::pair<unsigned, unsigned> |
1956 | 1 | PPCInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { |
1957 | 1 | const unsigned Mask = PPCII::MO_ACCESS_MASK; |
1958 | 1 | return std::make_pair(TF & Mask, TF & ~Mask); |
1959 | 1 | } |
1960 | | |
1961 | | ArrayRef<std::pair<unsigned, const char *>> |
1962 | 3 | PPCInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { |
1963 | 3 | using namespace PPCII; |
1964 | 3 | static const std::pair<unsigned, const char *> TargetFlags[] = { |
1965 | 3 | {MO_LO, "ppc-lo"}, |
1966 | 3 | {MO_HA, "ppc-ha"}, |
1967 | 3 | {MO_TPREL_LO, "ppc-tprel-lo"}, |
1968 | 3 | {MO_TPREL_HA, "ppc-tprel-ha"}, |
1969 | 3 | {MO_DTPREL_LO, "ppc-dtprel-lo"}, |
1970 | 3 | {MO_TLSLD_LO, "ppc-tlsld-lo"}, |
1971 | 3 | {MO_TOC_LO, "ppc-toc-lo"}, |
1972 | 3 | {MO_TLS, "ppc-tls"}}; |
1973 | 3 | return makeArrayRef(TargetFlags); |
1974 | 3 | } |
1975 | | |
1976 | | ArrayRef<std::pair<unsigned, const char *>> |
1977 | 0 | PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { |
1978 | 0 | using namespace PPCII; |
1979 | 0 | static const std::pair<unsigned, const char *> TargetFlags[] = { |
1980 | 0 | {MO_PLT, "ppc-plt"}, |
1981 | 0 | {MO_PIC_FLAG, "ppc-pic"}, |
1982 | 0 | {MO_NLP_FLAG, "ppc-nlp"}, |
1983 | 0 | {MO_NLP_HIDDEN_FLAG, "ppc-nlp-hidden"}}; |
1984 | 0 | return makeArrayRef(TargetFlags); |
1985 | 0 | } |
1986 | | |
1987 | 6.25k | bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { |
1988 | 6.25k | auto &MBB = *MI.getParent(); |
1989 | 6.25k | auto DL = MI.getDebugLoc(); |
1990 | 6.25k | switch (MI.getOpcode()) { |
1991 | 4 | case TargetOpcode::LOAD_STACK_GUARD: { |
1992 | 4 | assert(Subtarget.isTargetLinux() && |
1993 | 4 | "Only Linux target is expected to contain LOAD_STACK_GUARD"); |
1994 | 4 | const int64_t Offset = Subtarget.isPPC64() ? -0x70102 : -0x70082 ; |
1995 | 4 | const unsigned Reg = Subtarget.isPPC64() ? PPC::X132 : PPC::R22 ; |
1996 | 4 | MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD2 : PPC::LWZ2 )); |
1997 | 4 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) |
1998 | 4 | .addImm(Offset) |
1999 | 4 | .addReg(Reg); |
2000 | 4 | return true; |
2001 | 6.25k | } |
2002 | 199 | case PPC::DFLOADf32: |
2003 | 199 | case PPC::DFLOADf64: |
2004 | 199 | case PPC::DFSTOREf32: |
2005 | 199 | case PPC::DFSTOREf64: { |
2006 | 199 | assert(Subtarget.hasP9Vector() && |
2007 | 199 | "Invalid D-Form Pseudo-ops on non-P9 target."); |
2008 | 199 | assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() && |
2009 | 199 | "D-form op must have register and immediate operands"); |
2010 | 199 | unsigned UpperOpcode, LowerOpcode; |
2011 | 199 | switch (MI.getOpcode()) { |
2012 | 105 | case PPC::DFLOADf32: |
2013 | 105 | UpperOpcode = PPC::LXSSP; |
2014 | 105 | LowerOpcode = PPC::LFS; |
2015 | 105 | break; |
2016 | 70 | case PPC::DFLOADf64: |
2017 | 70 | UpperOpcode = PPC::LXSD; |
2018 | 70 | LowerOpcode = PPC::LFD; |
2019 | 70 | break; |
2020 | 11 | case PPC::DFSTOREf32: |
2021 | 11 | UpperOpcode = PPC::STXSSP; |
2022 | 11 | LowerOpcode = PPC::STFS; |
2023 | 11 | break; |
2024 | 13 | case PPC::DFSTOREf64: |
2025 | 13 | UpperOpcode = PPC::STXSD; |
2026 | 13 | LowerOpcode = PPC::STFD; |
2027 | 13 | break; |
2028 | 199 | } |
2029 | 199 | unsigned TargetReg = MI.getOperand(0).getReg(); |
2030 | 199 | unsigned Opcode; |
2031 | 199 | if ((TargetReg >= PPC::F0 && 199 TargetReg <= PPC::F31199 ) || |
2032 | 12 | (TargetReg >= PPC::VSL0 && 12 TargetReg <= PPC::VSL310 )) |
2033 | 187 | Opcode = LowerOpcode; |
2034 | 199 | else |
2035 | 12 | Opcode = UpperOpcode; |
2036 | 199 | MI.setDesc(get(Opcode)); |
2037 | 199 | return true; |
2038 | 199 | } |
2039 | 0 | case PPC::SPILLTOVSR_LD: { |
2040 | 0 | unsigned TargetReg = MI.getOperand(0).getReg(); |
2041 | 0 | if (PPC::VSFRCRegClass.contains(TargetReg)0 ) { |
2042 | 0 | MI.setDesc(get(PPC::DFLOADf64)); |
2043 | 0 | return expandPostRAPseudo(MI); |
2044 | 0 | } |
2045 | 0 | else |
2046 | 0 | MI.setDesc(get(PPC::LD)); |
2047 | 0 | return true; |
2048 | 0 | } |
2049 | 0 | case PPC::SPILLTOVSR_ST: { |
2050 | 0 | unsigned SrcReg = MI.getOperand(0).getReg(); |
2051 | 0 | if (PPC::VSFRCRegClass.contains(SrcReg)0 ) { |
2052 | 0 | NumStoreSPILLVSRRCAsVec++; |
2053 | 0 | MI.setDesc(get(PPC::DFSTOREf64)); |
2054 | 0 | return expandPostRAPseudo(MI); |
2055 | 0 | } else { |
2056 | 0 | NumStoreSPILLVSRRCAsGpr++; |
2057 | 0 | MI.setDesc(get(PPC::STD)); |
2058 | 0 | } |
2059 | 0 | return true; |
2060 | 0 | } |
2061 | 0 | case PPC::SPILLTOVSR_LDX: { |
2062 | 0 | unsigned TargetReg = MI.getOperand(0).getReg(); |
2063 | 0 | if (PPC::VSFRCRegClass.contains(TargetReg)) |
2064 | 0 | MI.setDesc(get(PPC::LXSDX)); |
2065 | 0 | else |
2066 | 0 | MI.setDesc(get(PPC::LDX)); |
2067 | 0 | return true; |
2068 | 0 | } |
2069 | 0 | case PPC::SPILLTOVSR_STX: { |
2070 | 0 | unsigned SrcReg = MI.getOperand(0).getReg(); |
2071 | 0 | if (PPC::VSFRCRegClass.contains(SrcReg)0 ) { |
2072 | 0 | NumStoreSPILLVSRRCAsVec++; |
2073 | 0 | MI.setDesc(get(PPC::STXSDX)); |
2074 | 0 | } else { |
2075 | 0 | NumStoreSPILLVSRRCAsGpr++; |
2076 | 0 | MI.setDesc(get(PPC::STDX)); |
2077 | 0 | } |
2078 | 0 | return true; |
2079 | 0 | } |
2080 | 0 |
|
2081 | 23 | case PPC::CFENCE8: { |
2082 | 23 | auto Val = MI.getOperand(0).getReg(); |
2083 | 23 | BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val); |
2084 | 23 | BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP)) |
2085 | 23 | .addImm(PPC::PRED_NE_MINUS) |
2086 | 23 | .addReg(PPC::CR7) |
2087 | 23 | .addImm(1); |
2088 | 23 | MI.setDesc(get(PPC::ISYNC)); |
2089 | 23 | MI.RemoveOperand(0); |
2090 | 23 | return true; |
2091 | 6.02k | } |
2092 | 6.02k | } |
2093 | 6.02k | return false; |
2094 | 6.02k | } |
2095 | | |
2096 | | const TargetRegisterClass * |
2097 | 2.09k | PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const { |
2098 | 2.09k | if (Subtarget.hasVSX() && 2.09k RC == &PPC::VRRCRegClass1.09k ) |
2099 | 145 | return &PPC::VSRCRegClass; |
2100 | 1.95k | return RC; |
2101 | 1.95k | } |
2102 | | |
2103 | 0 | int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) { |
2104 | 0 | return PPC::getRecordFormOpcode(Opcode); |
2105 | 0 | } |