/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file contains the PPC implementation of TargetFrameLowering class. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "PPCFrameLowering.h" |
14 | | #include "PPCInstrBuilder.h" |
15 | | #include "PPCInstrInfo.h" |
16 | | #include "PPCMachineFunctionInfo.h" |
17 | | #include "PPCSubtarget.h" |
18 | | #include "PPCTargetMachine.h" |
19 | | #include "llvm/ADT/Statistic.h" |
20 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
21 | | #include "llvm/CodeGen/MachineFunction.h" |
22 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
23 | | #include "llvm/CodeGen/MachineModuleInfo.h" |
24 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
25 | | #include "llvm/CodeGen/RegisterScavenging.h" |
26 | | #include "llvm/IR/Function.h" |
27 | | #include "llvm/Target/TargetOptions.h" |
28 | | |
29 | | using namespace llvm; |
30 | | |
31 | | #define DEBUG_TYPE "framelowering" |
32 | | STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); |
33 | | STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); |
34 | | |
35 | | static cl::opt<bool> |
36 | | EnablePEVectorSpills("ppc-enable-pe-vector-spills", |
37 | | cl::desc("Enable spills in prologue to vector registers."), |
38 | | cl::init(false), cl::Hidden); |
39 | | |
40 | | /// VRRegNo - Map from a numbered VR register to its enum value. |
41 | | /// |
42 | | static const MCPhysReg VRRegNo[] = { |
43 | | PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , |
44 | | PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, |
45 | | PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, |
46 | | PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 |
47 | | }; |
48 | | |
49 | 1.85k | static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { |
50 | 1.85k | if (STI.isDarwinABI()) |
51 | 0 | return STI.isPPC64() ? 16 : 8; |
52 | 1.85k | // SVR4 ABI: |
53 | 1.85k | return STI.isPPC64() ? 161.46k : 4387 ; |
54 | 1.85k | } |
55 | | |
56 | 1.85k | static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { |
57 | 1.85k | return STI.isELFv2ABI() ? 24598 : 401.25k ; |
58 | 1.85k | } |
59 | | |
60 | 1.85k | static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { |
61 | 1.85k | // For the Darwin ABI: |
62 | 1.85k | // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area |
63 | 1.85k | // for saving the frame pointer (if needed.) While the published ABI has |
64 | 1.85k | // not used this slot since at least MacOSX 10.2, there is older code |
65 | 1.85k | // around that does use it, and that needs to continue to work. |
66 | 1.85k | if (STI.isDarwinABI()) |
67 | 0 | return STI.isPPC64() ? -8U : -4U; |
68 | 1.85k | |
69 | 1.85k | // SVR4 ABI: First slot in the general register save area. |
70 | 1.85k | return STI.isPPC64() ? -8U1.46k : -4U387 ; |
71 | 1.85k | } |
72 | | |
73 | 1.85k | static unsigned computeLinkageSize(const PPCSubtarget &STI) { |
74 | 1.85k | if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64()1.84k ) |
75 | 1.46k | return (STI.isELFv2ABI() ? 4598 : 6869 ) * (STI.isPPC64() ? 81.46k : 42 ); |
76 | 385 | |
77 | 385 | // 32-bit SVR4 ABI: |
78 | 385 | return 8; |
79 | 385 | } |
80 | | |
81 | 1.85k | static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { |
82 | 1.85k | if (STI.isDarwinABI()) |
83 | 0 | return STI.isPPC64() ? -16U : -8U; |
84 | 1.85k | |
85 | 1.85k | // SVR4 ABI: First slot in the general register save area. |
86 | 1.85k | return STI.isPPC64() |
87 | 1.85k | ? -16U1.46k |
88 | 1.85k | : STI.getTargetMachine().isPositionIndependent() 387 ? -12U34 : -8U353 ; |
89 | 1.85k | } |
90 | | |
91 | | PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) |
92 | | : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, |
93 | | STI.getPlatformStackAlignment(), 0), |
94 | | Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), |
95 | | TOCSaveOffset(computeTOCSaveOffset(Subtarget)), |
96 | | FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), |
97 | | LinkageSize(computeLinkageSize(Subtarget)), |
98 | 1.85k | BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {} |
99 | | |
100 | | // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. |
101 | | const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( |
102 | 434 | unsigned &NumEntries) const { |
103 | 434 | if (Subtarget.isDarwinABI()) { |
104 | 0 | NumEntries = 1; |
105 | 0 | if (Subtarget.isPPC64()) { |
106 | 0 | static const SpillSlot darwin64Offsets = {PPC::X31, -8}; |
107 | 0 | return &darwin64Offsets; |
108 | 0 | } else { |
109 | 0 | static const SpillSlot darwinOffsets = {PPC::R31, -4}; |
110 | 0 | return &darwinOffsets; |
111 | 0 | } |
112 | 434 | } |
113 | 434 | |
114 | 434 | // Early exit if not using the SVR4 ABI. |
115 | 434 | if (!Subtarget.isSVR4ABI()) { |
116 | 0 | NumEntries = 0; |
117 | 0 | return nullptr; |
118 | 0 | } |
119 | 434 | |
120 | 434 | // Note that the offsets here overlap, but this is fixed up in |
121 | 434 | // processFunctionBeforeFrameFinalized. |
122 | 434 | |
123 | 434 | static const SpillSlot Offsets[] = { |
124 | 434 | // Floating-point register save area offsets. |
125 | 434 | {PPC::F31, -8}, |
126 | 434 | {PPC::F30, -16}, |
127 | 434 | {PPC::F29, -24}, |
128 | 434 | {PPC::F28, -32}, |
129 | 434 | {PPC::F27, -40}, |
130 | 434 | {PPC::F26, -48}, |
131 | 434 | {PPC::F25, -56}, |
132 | 434 | {PPC::F24, -64}, |
133 | 434 | {PPC::F23, -72}, |
134 | 434 | {PPC::F22, -80}, |
135 | 434 | {PPC::F21, -88}, |
136 | 434 | {PPC::F20, -96}, |
137 | 434 | {PPC::F19, -104}, |
138 | 434 | {PPC::F18, -112}, |
139 | 434 | {PPC::F17, -120}, |
140 | 434 | {PPC::F16, -128}, |
141 | 434 | {PPC::F15, -136}, |
142 | 434 | {PPC::F14, -144}, |
143 | 434 | |
144 | 434 | // General register save area offsets. |
145 | 434 | {PPC::R31, -4}, |
146 | 434 | {PPC::R30, -8}, |
147 | 434 | {PPC::R29, -12}, |
148 | 434 | {PPC::R28, -16}, |
149 | 434 | {PPC::R27, -20}, |
150 | 434 | {PPC::R26, -24}, |
151 | 434 | {PPC::R25, -28}, |
152 | 434 | {PPC::R24, -32}, |
153 | 434 | {PPC::R23, -36}, |
154 | 434 | {PPC::R22, -40}, |
155 | 434 | {PPC::R21, -44}, |
156 | 434 | {PPC::R20, -48}, |
157 | 434 | {PPC::R19, -52}, |
158 | 434 | {PPC::R18, -56}, |
159 | 434 | {PPC::R17, -60}, |
160 | 434 | {PPC::R16, -64}, |
161 | 434 | {PPC::R15, -68}, |
162 | 434 | {PPC::R14, -72}, |
163 | 434 | |
164 | 434 | // CR save area offset. We map each of the nonvolatile CR fields |
165 | 434 | // to the slot for CR2, which is the first of the nonvolatile CR |
166 | 434 | // fields to be assigned, so that we only allocate one save slot. |
167 | 434 | // See PPCRegisterInfo::hasReservedSpillSlot() for more information. |
168 | 434 | {PPC::CR2, -4}, |
169 | 434 | |
170 | 434 | // VRSAVE save area offset. |
171 | 434 | {PPC::VRSAVE, -4}, |
172 | 434 | |
173 | 434 | // Vector register save area |
174 | 434 | {PPC::V31, -16}, |
175 | 434 | {PPC::V30, -32}, |
176 | 434 | {PPC::V29, -48}, |
177 | 434 | {PPC::V28, -64}, |
178 | 434 | {PPC::V27, -80}, |
179 | 434 | {PPC::V26, -96}, |
180 | 434 | {PPC::V25, -112}, |
181 | 434 | {PPC::V24, -128}, |
182 | 434 | {PPC::V23, -144}, |
183 | 434 | {PPC::V22, -160}, |
184 | 434 | {PPC::V21, -176}, |
185 | 434 | {PPC::V20, -192}, |
186 | 434 | |
187 | 434 | // SPE register save area (overlaps Vector save area). |
188 | 434 | {PPC::S31, -8}, |
189 | 434 | {PPC::S30, -16}, |
190 | 434 | {PPC::S29, -24}, |
191 | 434 | {PPC::S28, -32}, |
192 | 434 | {PPC::S27, -40}, |
193 | 434 | {PPC::S26, -48}, |
194 | 434 | {PPC::S25, -56}, |
195 | 434 | {PPC::S24, -64}, |
196 | 434 | {PPC::S23, -72}, |
197 | 434 | {PPC::S22, -80}, |
198 | 434 | {PPC::S21, -88}, |
199 | 434 | {PPC::S20, -96}, |
200 | 434 | {PPC::S19, -104}, |
201 | 434 | {PPC::S18, -112}, |
202 | 434 | {PPC::S17, -120}, |
203 | 434 | {PPC::S16, -128}, |
204 | 434 | {PPC::S15, -136}, |
205 | 434 | {PPC::S14, -144}}; |
206 | 434 | |
207 | 434 | static const SpillSlot Offsets64[] = { |
208 | 434 | // Floating-point register save area offsets. |
209 | 434 | {PPC::F31, -8}, |
210 | 434 | {PPC::F30, -16}, |
211 | 434 | {PPC::F29, -24}, |
212 | 434 | {PPC::F28, -32}, |
213 | 434 | {PPC::F27, -40}, |
214 | 434 | {PPC::F26, -48}, |
215 | 434 | {PPC::F25, -56}, |
216 | 434 | {PPC::F24, -64}, |
217 | 434 | {PPC::F23, -72}, |
218 | 434 | {PPC::F22, -80}, |
219 | 434 | {PPC::F21, -88}, |
220 | 434 | {PPC::F20, -96}, |
221 | 434 | {PPC::F19, -104}, |
222 | 434 | {PPC::F18, -112}, |
223 | 434 | {PPC::F17, -120}, |
224 | 434 | {PPC::F16, -128}, |
225 | 434 | {PPC::F15, -136}, |
226 | 434 | {PPC::F14, -144}, |
227 | 434 | |
228 | 434 | // General register save area offsets. |
229 | 434 | {PPC::X31, -8}, |
230 | 434 | {PPC::X30, -16}, |
231 | 434 | {PPC::X29, -24}, |
232 | 434 | {PPC::X28, -32}, |
233 | 434 | {PPC::X27, -40}, |
234 | 434 | {PPC::X26, -48}, |
235 | 434 | {PPC::X25, -56}, |
236 | 434 | {PPC::X24, -64}, |
237 | 434 | {PPC::X23, -72}, |
238 | 434 | {PPC::X22, -80}, |
239 | 434 | {PPC::X21, -88}, |
240 | 434 | {PPC::X20, -96}, |
241 | 434 | {PPC::X19, -104}, |
242 | 434 | {PPC::X18, -112}, |
243 | 434 | {PPC::X17, -120}, |
244 | 434 | {PPC::X16, -128}, |
245 | 434 | {PPC::X15, -136}, |
246 | 434 | {PPC::X14, -144}, |
247 | 434 | |
248 | 434 | // VRSAVE save area offset. |
249 | 434 | {PPC::VRSAVE, -4}, |
250 | 434 | |
251 | 434 | // Vector register save area |
252 | 434 | {PPC::V31, -16}, |
253 | 434 | {PPC::V30, -32}, |
254 | 434 | {PPC::V29, -48}, |
255 | 434 | {PPC::V28, -64}, |
256 | 434 | {PPC::V27, -80}, |
257 | 434 | {PPC::V26, -96}, |
258 | 434 | {PPC::V25, -112}, |
259 | 434 | {PPC::V24, -128}, |
260 | 434 | {PPC::V23, -144}, |
261 | 434 | {PPC::V22, -160}, |
262 | 434 | {PPC::V21, -176}, |
263 | 434 | {PPC::V20, -192}}; |
264 | 434 | |
265 | 434 | if (Subtarget.isPPC64()) { |
266 | 363 | NumEntries = array_lengthof(Offsets64); |
267 | 363 | |
268 | 363 | return Offsets64; |
269 | 363 | } else { |
270 | 71 | NumEntries = array_lengthof(Offsets); |
271 | 71 | |
272 | 71 | return Offsets; |
273 | 71 | } |
274 | 434 | } |
275 | | |
276 | | /// RemoveVRSaveCode - We have found that this function does not need any code |
277 | | /// to manipulate the VRSAVE register, even though it uses vector registers. |
278 | | /// This can happen when the only registers used are known to be live in or out |
279 | | /// of the function. Remove all of the VRSAVE related code from the function. |
280 | | /// FIXME: The removal of the code results in a compile failure at -O0 when the |
281 | | /// function contains a function call, as the GPR containing original VRSAVE |
282 | | /// contents is spilled and reloaded around the call. Without the prolog code, |
283 | | /// the spill instruction refers to an undefined register. This code needs |
284 | | /// to account for all uses of that GPR. |
285 | 0 | static void RemoveVRSaveCode(MachineInstr &MI) { |
286 | 0 | MachineBasicBlock *Entry = MI.getParent(); |
287 | 0 | MachineFunction *MF = Entry->getParent(); |
288 | 0 |
|
289 | 0 | // We know that the MTVRSAVE instruction immediately follows MI. Remove it. |
290 | 0 | MachineBasicBlock::iterator MBBI = MI; |
291 | 0 | ++MBBI; |
292 | 0 | assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); |
293 | 0 | MBBI->eraseFromParent(); |
294 | 0 |
|
295 | 0 | bool RemovedAllMTVRSAVEs = true; |
296 | 0 | // See if we can find and remove the MTVRSAVE instruction from all of the |
297 | 0 | // epilog blocks. |
298 | 0 | for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { |
299 | 0 | // If last instruction is a return instruction, add an epilogue |
300 | 0 | if (I->isReturnBlock()) { |
301 | 0 | bool FoundIt = false; |
302 | 0 | for (MBBI = I->end(); MBBI != I->begin(); ) { |
303 | 0 | --MBBI; |
304 | 0 | if (MBBI->getOpcode() == PPC::MTVRSAVE) { |
305 | 0 | MBBI->eraseFromParent(); // remove it. |
306 | 0 | FoundIt = true; |
307 | 0 | break; |
308 | 0 | } |
309 | 0 | } |
310 | 0 | RemovedAllMTVRSAVEs &= FoundIt; |
311 | 0 | } |
312 | 0 | } |
313 | 0 |
|
314 | 0 | // If we found and removed all MTVRSAVE instructions, remove the read of |
315 | 0 | // VRSAVE as well. |
316 | 0 | if (RemovedAllMTVRSAVEs) { |
317 | 0 | MBBI = MI; |
318 | 0 | assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); |
319 | 0 | --MBBI; |
320 | 0 | assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); |
321 | 0 | MBBI->eraseFromParent(); |
322 | 0 | } |
323 | 0 |
|
324 | 0 | // Finally, nuke the UPDATE_VRSAVE. |
325 | 0 | MI.eraseFromParent(); |
326 | 0 | } |
327 | | |
328 | | // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the |
329 | | // instruction selector. Based on the vector registers that have been used, |
330 | | // transform this into the appropriate ORI instruction. |
331 | 0 | static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { |
332 | 0 | MachineFunction *MF = MI.getParent()->getParent(); |
333 | 0 | const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); |
334 | 0 | DebugLoc dl = MI.getDebugLoc(); |
335 | 0 |
|
336 | 0 | const MachineRegisterInfo &MRI = MF->getRegInfo(); |
337 | 0 | unsigned UsedRegMask = 0; |
338 | 0 | for (unsigned i = 0; i != 32; ++i) |
339 | 0 | if (MRI.isPhysRegModified(VRRegNo[i])) |
340 | 0 | UsedRegMask |= 1 << (31-i); |
341 | 0 |
|
342 | 0 | // Live in and live out values already must be in the mask, so don't bother |
343 | 0 | // marking them. |
344 | 0 | for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) { |
345 | 0 | unsigned RegNo = TRI->getEncodingValue(LI.first); |
346 | 0 | if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg. |
347 | 0 | UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. |
348 | 0 | } |
349 | 0 |
|
350 | 0 | // Live out registers appear as use operands on return instructions. |
351 | 0 | for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); |
352 | 0 | UsedRegMask != 0 && BI != BE; ++BI) { |
353 | 0 | const MachineBasicBlock &MBB = *BI; |
354 | 0 | if (!MBB.isReturnBlock()) |
355 | 0 | continue; |
356 | 0 | const MachineInstr &Ret = MBB.back(); |
357 | 0 | for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { |
358 | 0 | const MachineOperand &MO = Ret.getOperand(I); |
359 | 0 | if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) |
360 | 0 | continue; |
361 | 0 | unsigned RegNo = TRI->getEncodingValue(MO.getReg()); |
362 | 0 | UsedRegMask &= ~(1 << (31-RegNo)); |
363 | 0 | } |
364 | 0 | } |
365 | 0 |
|
366 | 0 | // If no registers are used, turn this into a copy. |
367 | 0 | if (UsedRegMask == 0) { |
368 | 0 | // Remove all VRSAVE code. |
369 | 0 | RemoveVRSaveCode(MI); |
370 | 0 | return; |
371 | 0 | } |
372 | 0 | |
373 | 0 | unsigned SrcReg = MI.getOperand(1).getReg(); |
374 | 0 | unsigned DstReg = MI.getOperand(0).getReg(); |
375 | 0 |
|
376 | 0 | if ((UsedRegMask & 0xFFFF) == UsedRegMask) { |
377 | 0 | if (DstReg != SrcReg) |
378 | 0 | BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) |
379 | 0 | .addReg(SrcReg) |
380 | 0 | .addImm(UsedRegMask); |
381 | 0 | else |
382 | 0 | BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) |
383 | 0 | .addReg(SrcReg, RegState::Kill) |
384 | 0 | .addImm(UsedRegMask); |
385 | 0 | } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { |
386 | 0 | if (DstReg != SrcReg) |
387 | 0 | BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) |
388 | 0 | .addReg(SrcReg) |
389 | 0 | .addImm(UsedRegMask >> 16); |
390 | 0 | else |
391 | 0 | BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) |
392 | 0 | .addReg(SrcReg, RegState::Kill) |
393 | 0 | .addImm(UsedRegMask >> 16); |
394 | 0 | } else { |
395 | 0 | if (DstReg != SrcReg) |
396 | 0 | BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) |
397 | 0 | .addReg(SrcReg) |
398 | 0 | .addImm(UsedRegMask >> 16); |
399 | 0 | else |
400 | 0 | BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) |
401 | 0 | .addReg(SrcReg, RegState::Kill) |
402 | 0 | .addImm(UsedRegMask >> 16); |
403 | 0 |
|
404 | 0 | BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) |
405 | 0 | .addReg(DstReg, RegState::Kill) |
406 | 0 | .addImm(UsedRegMask & 0xFFFF); |
407 | 0 | } |
408 | 0 |
|
409 | 0 | // Remove the old UPDATE_VRSAVE instruction. |
410 | 0 | MI.eraseFromParent(); |
411 | 0 | } |
412 | | |
413 | 11.9k | static bool spillsCR(const MachineFunction &MF) { |
414 | 11.9k | const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
415 | 11.9k | return FuncInfo->isCRSpilled(); |
416 | 11.9k | } |
417 | | |
418 | 11.3k | static bool spillsVRSAVE(const MachineFunction &MF) { |
419 | 11.3k | const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
420 | 11.3k | return FuncInfo->isVRSAVESpilled(); |
421 | 11.3k | } |
422 | | |
423 | 11.1k | static bool hasSpills(const MachineFunction &MF) { |
424 | 11.1k | const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
425 | 11.1k | return FuncInfo->hasSpills(); |
426 | 11.1k | } |
427 | | |
428 | 11.2k | static bool hasNonRISpills(const MachineFunction &MF) { |
429 | 11.2k | const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
430 | 11.2k | return FuncInfo->hasNonRISpills(); |
431 | 11.2k | } |
432 | | |
433 | | /// MustSaveLR - Return true if this function requires that we save the LR |
434 | | /// register onto the stack in the prolog and restore it in the epilog of the |
435 | | /// function. |
436 | 44.5k | static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { |
437 | 44.5k | const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); |
438 | 44.5k | |
439 | 44.5k | // We need a save/restore of LR if there is any def of LR (which is |
440 | 44.5k | // defined by calls, including the PIC setup sequence), or if there is |
441 | 44.5k | // some use of the LR stack slot (e.g. for builtin_return_address). |
442 | 44.5k | // (LR comes in 32 and 64 bit versions.) |
443 | 44.5k | MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); |
444 | 44.5k | return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired()41.6k ; |
445 | 44.5k | } |
446 | | |
447 | | /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum |
448 | | /// call frame size. Update the MachineFunction object with the stack size. |
449 | | unsigned |
450 | | PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, |
451 | 11.2k | bool UseEstimate) const { |
452 | 11.2k | unsigned NewMaxCallFrameSize = 0; |
453 | 11.2k | unsigned FrameSize = determineFrameLayout(MF, UseEstimate, |
454 | 11.2k | &NewMaxCallFrameSize); |
455 | 11.2k | MF.getFrameInfo().setStackSize(FrameSize); |
456 | 11.2k | MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); |
457 | 11.2k | return FrameSize; |
458 | 11.2k | } |
459 | | |
460 | | /// determineFrameLayout - Determine the size of the frame and maximum call |
461 | | /// frame size. |
462 | | unsigned |
463 | | PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, |
464 | | bool UseEstimate, |
465 | 37.0k | unsigned *NewMaxCallFrameSize) const { |
466 | 37.0k | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
467 | 37.0k | const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
468 | 37.0k | |
469 | 37.0k | // Get the number of bytes to allocate from the FrameInfo |
470 | 37.0k | unsigned FrameSize = |
471 | 37.0k | UseEstimate ? MFI.estimateStackSize(MF)14.3k : MFI.getStackSize()22.6k ; |
472 | 37.0k | |
473 | 37.0k | // Get stack alignments. The frame must be aligned to the greatest of these: |
474 | 37.0k | unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI |
475 | 37.0k | unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame |
476 | 37.0k | unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1; |
477 | 37.0k | |
478 | 37.0k | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
479 | 37.0k | |
480 | 37.0k | unsigned LR = RegInfo->getRARegister(); |
481 | 37.0k | bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); |
482 | 37.0k | bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. |
483 | 37.0k | !MFI.adjustsStack()36.9k && // No calls. |
484 | 37.0k | !MustSaveLR(MF, LR)33.1k && // No need to save LR. |
485 | 37.0k | !FI->mustSaveTOC()31.5k && // No need to save TOC. |
486 | 37.0k | !RegInfo->hasBasePointer(MF)31.5k ; // No special alignment. |
487 | 37.0k | |
488 | 37.0k | // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless |
489 | 37.0k | // code if all local vars are reg-allocated. |
490 | 37.0k | bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); |
491 | 37.0k | |
492 | 37.0k | // Check whether we can skip adjusting the stack pointer (by using red zone) |
493 | 37.0k | if (!DisableRedZone && CanUseRedZone && FitsInRedZone31.5k ) { |
494 | 30.7k | // No need for frame |
495 | 30.7k | return 0; |
496 | 30.7k | } |
497 | 6.25k | |
498 | 6.25k | // Get the maximum call frame size of all the calls. |
499 | 6.25k | unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); |
500 | 6.25k | |
501 | 6.25k | // Maximum call frame needs to be at least big enough for linkage area. |
502 | 6.25k | unsigned minCallFrameSize = getLinkageSize(); |
503 | 6.25k | maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); |
504 | 6.25k | |
505 | 6.25k | // If we have dynamic alloca then maxCallFrameSize needs to be aligned so |
506 | 6.25k | // that allocations will be aligned. |
507 | 6.25k | if (MFI.hasVarSizedObjects()) |
508 | 61 | maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; |
509 | 6.25k | |
510 | 6.25k | // Update the new max call frame size if the caller passes in a valid pointer. |
511 | 6.25k | if (NewMaxCallFrameSize) |
512 | 1.46k | *NewMaxCallFrameSize = maxCallFrameSize; |
513 | 6.25k | |
514 | 6.25k | // Include call frame size in total. |
515 | 6.25k | FrameSize += maxCallFrameSize; |
516 | 6.25k | |
517 | 6.25k | // Make sure the frame is aligned. |
518 | 6.25k | FrameSize = (FrameSize + AlignMask) & ~AlignMask; |
519 | 6.25k | |
520 | 6.25k | return FrameSize; |
521 | 6.25k | } |
522 | | |
523 | | // hasFP - Return true if the specified function actually has a dedicated frame |
524 | | // pointer register. |
525 | 75.1k | bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { |
526 | 75.1k | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
527 | 75.1k | // FIXME: This is pretty much broken by design: hasFP() might be called really |
528 | 75.1k | // early, before the stack layout was calculated and thus hasFP() might return |
529 | 75.1k | // true or false here depending on the time of call. |
530 | 75.1k | return (MFI.getStackSize()) && needsFP(MF)13.5k ; |
531 | 75.1k | } |
532 | | |
533 | | // needsFP - Return true if the specified function should have a dedicated frame |
534 | | // pointer register. This is true if the function has variable sized allocas or |
535 | | // if frame pointer elimination is disabled. |
536 | 81.8k | bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { |
537 | 81.8k | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
538 | 81.8k | |
539 | 81.8k | // Naked functions have no stack frame pushed, so we don't have a frame |
540 | 81.8k | // pointer. |
541 | 81.8k | if (MF.getFunction().hasFnAttribute(Attribute::Naked)) |
542 | 6 | return false; |
543 | 81.8k | |
544 | 81.8k | return MF.getTarget().Options.DisableFramePointerElim(MF) || |
545 | 81.8k | MFI.hasVarSizedObjects()80.5k || MFI.hasStackMap()80.3k || MFI.hasPatchPoint()80.0k || |
546 | 81.8k | (79.4k MF.getTarget().Options.GuaranteedTailCallOpt79.4k && |
547 | 79.4k | MF.getInfo<PPCFunctionInfo>()->hasFastCall()58 ); |
548 | 81.8k | } |
549 | | |
550 | 11 | void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { |
551 | 11 | bool is31 = needsFP(MF); |
552 | 11 | unsigned FPReg = is31 ? PPC::R315 : PPC::R16 ; |
553 | 11 | unsigned FP8Reg = is31 ? PPC::X315 : PPC::X16 ; |
554 | 11 | |
555 | 11 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
556 | 11 | bool HasBP = RegInfo->hasBasePointer(MF); |
557 | 11 | unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF)2 : FPReg9 ; |
558 | 11 | unsigned BP8Reg = HasBP ? (unsigned) PPC::X302 : FP8Reg9 ; |
559 | 11 | |
560 | 11 | for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); |
561 | 51 | BI != BE; ++BI40 ) |
562 | 614 | for (MachineBasicBlock::iterator MBBI = BI->end(); 40 MBBI != BI->begin(); ) { |
563 | 574 | --MBBI; |
564 | 2.34k | for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I1.76k ) { |
565 | 1.76k | MachineOperand &MO = MBBI->getOperand(I); |
566 | 1.76k | if (!MO.isReg()) |
567 | 1.02k | continue; |
568 | 744 | |
569 | 744 | switch (MO.getReg()) { |
570 | 744 | case PPC::FP: |
571 | 4 | MO.setReg(FPReg); |
572 | 4 | break; |
573 | 744 | case PPC::FP8: |
574 | 7 | MO.setReg(FP8Reg); |
575 | 7 | break; |
576 | 744 | case PPC::BP: |
577 | 0 | MO.setReg(BPReg); |
578 | 0 | break; |
579 | 744 | case PPC::BP8: |
580 | 5 | MO.setReg(BP8Reg); |
581 | 5 | break; |
582 | 744 | |
583 | 744 | } |
584 | 744 | } |
585 | 574 | } |
586 | 11 | } |
587 | | |
588 | | /* This function will do the following: |
589 | | - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 |
590 | | respectively (defaults recommended by the ABI) and return true |
591 | | - If MBB is not an entry block, initialize the register scavenger and look |
592 | | for available registers. |
593 | | - If the defaults (R0/R12) are available, return true |
594 | | - If TwoUniqueRegsRequired is set to true, it looks for two unique |
595 | | registers. Otherwise, look for a single available register. |
596 | | - If the required registers are found, set SR1 and SR2 and return true. |
597 | | - If the required registers are not found, set SR2 or both SR1 and SR2 to |
598 | | PPC::NoRegister and return false. |
599 | | |
600 | | Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired |
601 | | is not set, this function will attempt to find two different registers, but |
602 | | still return true if only one register is available (and set SR1 == SR2). |
603 | | */ |
604 | | bool |
605 | | PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, |
606 | | bool UseAtEnd, |
607 | | bool TwoUniqueRegsRequired, |
608 | | unsigned *SR1, |
609 | 22.8k | unsigned *SR2) const { |
610 | 22.8k | RegScavenger RS; |
611 | 22.8k | unsigned R0 = Subtarget.isPPC64() ? PPC::X020.7k : PPC::R02.10k ; |
612 | 22.8k | unsigned R12 = Subtarget.isPPC64() ? PPC::X1220.7k : PPC::R122.10k ; |
613 | 22.8k | |
614 | 22.8k | // Set the defaults for the two scratch registers. |
615 | 22.8k | if (SR1) |
616 | 22.6k | *SR1 = R0; |
617 | 22.8k | |
618 | 22.8k | if (SR2) { |
619 | 22.6k | assert (SR1 && "Asking for the second scratch register but not the first?"); |
620 | 22.6k | *SR2 = R12; |
621 | 22.6k | } |
622 | 22.8k | |
623 | 22.8k | // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. |
624 | 22.8k | if ((UseAtEnd && MBB->isReturnBlock()11.4k ) || |
625 | 22.8k | (11.4k !UseAtEnd11.4k && (&MBB->getParent()->front() == MBB)11.3k )) |
626 | 22.5k | return true; |
627 | 270 | |
628 | 270 | RS.enterBasicBlock(*MBB); |
629 | 270 | |
630 | 270 | if (UseAtEnd && !MBB->empty()82 ) { |
631 | 81 | // The scratch register will be used at the end of the block, so must |
632 | 81 | // consider all registers used within the block |
633 | 81 | |
634 | 81 | MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); |
635 | 81 | // If no terminator, back iterator up to previous instruction. |
636 | 81 | if (MBBI == MBB->end()) |
637 | 57 | MBBI = std::prev(MBBI); |
638 | 81 | |
639 | 81 | if (MBBI != MBB->begin()) |
640 | 78 | RS.forward(MBBI); |
641 | 81 | } |
642 | 270 | |
643 | 270 | // If the two registers are available, we're all good. |
644 | 270 | // Note that we only return here if both R0 and R12 are available because |
645 | 270 | // although the function may not require two unique registers, it may benefit |
646 | 270 | // from having two so we should try to provide them. |
647 | 270 | if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) |
648 | 270 | return true; |
649 | 0 | |
650 | 0 | // Get the list of callee-saved registers for the target. |
651 | 0 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
652 | 0 | const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); |
653 | 0 |
|
654 | 0 | // Get all the available registers in the block. |
655 | 0 | BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : |
656 | 0 | &PPC::GPRCRegClass); |
657 | 0 |
|
658 | 0 | // We shouldn't use callee-saved registers as scratch registers as they may be |
659 | 0 | // available when looking for a candidate block for shrink wrapping but not |
660 | 0 | // available when the actual prologue/epilogue is being emitted because they |
661 | 0 | // were added as live-in to the prologue block by PrologueEpilogueInserter. |
662 | 0 | for (int i = 0; CSRegs[i]; ++i) |
663 | 0 | BV.reset(CSRegs[i]); |
664 | 0 |
|
665 | 0 | // Set the first scratch register to the first available one. |
666 | 0 | if (SR1) { |
667 | 0 | int FirstScratchReg = BV.find_first(); |
668 | 0 | *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; |
669 | 0 | } |
670 | 0 |
|
671 | 0 | // If there is another one available, set the second scratch register to that. |
672 | 0 | // Otherwise, set it to either PPC::NoRegister if this function requires two |
673 | 0 | // or to whatever SR1 is set to if this function doesn't require two. |
674 | 0 | if (SR2) { |
675 | 0 | int SecondScratchReg = BV.find_next(*SR1); |
676 | 0 | if (SecondScratchReg != -1) |
677 | 0 | *SR2 = SecondScratchReg; |
678 | 0 | else |
679 | 0 | *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1; |
680 | 0 | } |
681 | 0 |
|
682 | 0 | // Now that we've done our best to provide both registers, double check |
683 | 0 | // whether we were unable to provide enough. |
684 | 0 | if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) |
685 | 0 | return false; |
686 | 0 | |
687 | 0 | return true; |
688 | 0 | } |
689 | | |
690 | | // We need a scratch register for spilling LR and for spilling CR. By default, |
691 | | // we use two scratch registers to hide latency. However, if only one scratch |
692 | | // register is available, we can adjust for that by not overlapping the spill |
693 | | // code. However, if we need to realign the stack (i.e. have a base pointer) |
694 | | // and the stack frame is large, we need two scratch registers. |
695 | | bool |
696 | 11.3k | PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { |
697 | 11.3k | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
698 | 11.3k | MachineFunction &MF = *(MBB->getParent()); |
699 | 11.3k | bool HasBP = RegInfo->hasBasePointer(MF); |
700 | 11.3k | unsigned FrameSize = determineFrameLayout(MF); |
701 | 11.3k | int NegFrameSize = -FrameSize; |
702 | 11.3k | bool IsLargeFrame = !isInt<16>(NegFrameSize); |
703 | 11.3k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
704 | 11.3k | unsigned MaxAlign = MFI.getMaxAlignment(); |
705 | 11.3k | bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI()1.04k ; |
706 | 11.3k | |
707 | 11.3k | return (IsLargeFrame || !HasRedZone11.3k ) && HasBP1.05k && MaxAlign > 116 ; |
708 | 11.3k | } |
709 | | |
710 | 94 | bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { |
711 | 94 | MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); |
712 | 94 | |
713 | 94 | return findScratchRegister(TmpMBB, false, |
714 | 94 | twoUniqueScratchRegsRequired(TmpMBB)); |
715 | 94 | } |
716 | | |
717 | 94 | bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { |
718 | 94 | MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); |
719 | 94 | |
720 | 94 | return findScratchRegister(TmpMBB, true); |
721 | 94 | } |
722 | | |
723 | 22.6k | bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { |
724 | 22.6k | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
725 | 22.6k | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
726 | 22.6k | |
727 | 22.6k | // Abort if there is no register info or function info. |
728 | 22.6k | if (!RegInfo || !FI) |
729 | 0 | return false; |
730 | 22.6k | |
731 | 22.6k | // Only move the stack update on ELFv2 ABI and PPC64. |
732 | 22.6k | if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()10.7k ) |
733 | 11.9k | return false; |
734 | 10.7k | |
735 | 10.7k | // Check the frame size first and return false if it does not fit the |
736 | 10.7k | // requirements. |
737 | 10.7k | // We need a non-zero frame size as well as a frame that will fit in the red |
738 | 10.7k | // zone. This is because by moving the stack pointer update we are now storing |
739 | 10.7k | // to the red zone until the stack pointer is updated. If we get an interrupt |
740 | 10.7k | // inside the prologue but before the stack update we now have a number of |
741 | 10.7k | // stores to the red zone and those stores must all fit. |
742 | 10.7k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
743 | 10.7k | unsigned FrameSize = MFI.getStackSize(); |
744 | 10.7k | if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()1.17k ) |
745 | 9.56k | return false; |
746 | 1.13k | |
747 | 1.13k | // Frame pointers and base pointers complicate matters so don't do anything |
748 | 1.13k | // if we have them. For example having a frame pointer will sometimes require |
749 | 1.13k | // a copy of r1 into r31 and that makes keeping track of updates to r1 more |
750 | 1.13k | // difficult. |
751 | 1.13k | if (hasFP(MF) || RegInfo->hasBasePointer(MF)1.10k ) |
752 | 38 | return false; |
753 | 1.10k | |
754 | 1.10k | // Calls to fast_cc functions use different rules for passing parameters on |
755 | 1.10k | // the stack from the ABI and using PIC base in the function imposes |
756 | 1.10k | // similar restrictions to using the base pointer. It is not generally safe |
757 | 1.10k | // to move the stack pointer update in these situations. |
758 | 1.10k | if (FI->hasFastCall() || FI->usesPICBase()) |
759 | 0 | return false; |
760 | 1.10k | |
761 | 1.10k | // Finally we can move the stack update if we do not require register |
762 | 1.10k | // scavenging. Register scavenging can introduce more spills and so |
763 | 1.10k | // may make the frame size larger than we have computed. |
764 | 1.10k | return !RegInfo->requiresFrameIndexScavenging(MF); |
765 | 1.10k | } |
766 | | |
767 | | void PPCFrameLowering::emitPrologue(MachineFunction &MF, |
768 | 11.2k | MachineBasicBlock &MBB) const { |
769 | 11.2k | MachineBasicBlock::iterator MBBI = MBB.begin(); |
770 | 11.2k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
771 | 11.2k | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
772 | 11.2k | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
773 | 11.2k | |
774 | 11.2k | MachineModuleInfo &MMI = MF.getMMI(); |
775 | 11.2k | const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); |
776 | 11.2k | DebugLoc dl; |
777 | 11.2k | bool needsCFI = MMI.hasDebugInfo() || |
778 | 11.2k | MF.getFunction().needsUnwindTableEntry()11.2k ; |
779 | 11.2k | |
780 | 11.2k | // Get processor type. |
781 | 11.2k | bool isPPC64 = Subtarget.isPPC64(); |
782 | 11.2k | // Get the ABI. |
783 | 11.2k | bool isSVR4ABI = Subtarget.isSVR4ABI(); |
784 | 11.2k | bool isELFv2ABI = Subtarget.isELFv2ABI(); |
785 | 11.2k | assert((Subtarget.isDarwinABI() || isSVR4ABI) && |
786 | 11.2k | "Currently only Darwin and SVR4 ABIs are supported for PowerPC."); |
787 | 11.2k | |
788 | 11.2k | // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, |
789 | 11.2k | // process it. |
790 | 11.2k | if (!isSVR4ABI) |
791 | 0 | for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { |
792 | 0 | if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { |
793 | 0 | HandleVRSaveUpdate(*MBBI, TII); |
794 | 0 | break; |
795 | 0 | } |
796 | 0 | } |
797 | 11.2k | |
798 | 11.2k | // Move MBBI back to the beginning of the prologue block. |
799 | 11.2k | MBBI = MBB.begin(); |
800 | 11.2k | |
801 | 11.2k | // Work out frame sizes. |
802 | 11.2k | unsigned FrameSize = determineFrameLayoutAndUpdate(MF); |
803 | 11.2k | int NegFrameSize = -FrameSize; |
804 | 11.2k | if (!isInt<32>(NegFrameSize)) |
805 | 11.2k | llvm_unreachable0 ("Unhandled stack size!"); |
806 | 11.2k | |
807 | 11.2k | if (MFI.isFrameAddressTaken()) |
808 | 11 | replaceFPWithRealFP(MF); |
809 | 11.2k | |
810 | 11.2k | // Check if the link register (LR) must be saved. |
811 | 11.2k | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
812 | 11.2k | bool MustSaveLR = FI->mustSaveLR(); |
813 | 11.2k | bool MustSaveTOC = FI->mustSaveTOC(); |
814 | 11.2k | const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); |
815 | 11.2k | bool MustSaveCR = !MustSaveCRs.empty(); |
816 | 11.2k | // Do we have a frame pointer and/or base pointer for this function? |
817 | 11.2k | bool HasFP = hasFP(MF); |
818 | 11.2k | bool HasBP = RegInfo->hasBasePointer(MF); |
819 | 11.2k | bool HasRedZone = isPPC64 || !isSVR4ABI1.04k ; |
820 | 11.2k | |
821 | 11.2k | unsigned SPReg = isPPC64 ? PPC::X110.2k : PPC::R11.04k ; |
822 | 11.2k | unsigned BPReg = RegInfo->getBaseRegister(MF); |
823 | 11.2k | unsigned FPReg = isPPC64 ? PPC::X3110.2k : PPC::R311.04k ; |
824 | 11.2k | unsigned LRReg = isPPC64 ? PPC::LR810.2k : PPC::LR1.04k ; |
825 | 11.2k | unsigned TOCReg = isPPC64 ? PPC::X210.2k : PPC::R21.04k ; |
826 | 11.2k | unsigned ScratchReg = 0; |
827 | 11.2k | unsigned TempReg = isPPC64 ? PPC::X1210.2k : PPC::R121.04k ; // another scratch reg |
828 | 11.2k | // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) |
829 | 11.2k | const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR810.2k |
830 | 11.2k | : PPC::MFLR1.04k ); |
831 | 11.2k | const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD10.2k |
832 | 11.2k | : PPC::STW1.04k ); |
833 | 11.2k | const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU10.2k |
834 | 11.2k | : PPC::STWU1.04k ); |
835 | 11.2k | const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX10.2k |
836 | 11.2k | : PPC::STWUX1.04k ); |
837 | 11.2k | const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS810.2k |
838 | 11.2k | : PPC::LIS1.04k ); |
839 | 11.2k | const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI810.2k |
840 | 11.2k | : PPC::ORI1.04k ); |
841 | 11.2k | const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR810.2k |
842 | 11.2k | : PPC::OR1.04k ); |
843 | 11.2k | const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC810.2k |
844 | 11.2k | : PPC::SUBFC1.04k ); |
845 | 11.2k | const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC810.2k |
846 | 11.2k | : PPC::SUBFIC1.04k ); |
847 | 11.2k | |
848 | 11.2k | // Regarding this assert: Even though LR is saved in the caller's frame (i.e., |
849 | 11.2k | // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no |
850 | 11.2k | // Red Zone, an asynchronous event (a form of "callee") could claim a frame & |
851 | 11.2k | // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. |
852 | 11.2k | assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && |
853 | 11.2k | "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); |
854 | 11.2k | |
855 | 11.2k | // Using the same bool variable as below to suppress compiler warnings. |
856 | 11.2k | bool SingleScratchReg = |
857 | 11.2k | findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB), |
858 | 11.2k | &ScratchReg, &TempReg); |
859 | 11.2k | assert(SingleScratchReg && |
860 | 11.2k | "Required number of registers not available in this block"); |
861 | 11.2k | |
862 | 11.2k | SingleScratchReg = ScratchReg == TempReg; |
863 | 11.2k | |
864 | 11.2k | int LROffset = getReturnSaveOffset(); |
865 | 11.2k | |
866 | 11.2k | int FPOffset = 0; |
867 | 11.2k | if (HasFP) { |
868 | 119 | if (isSVR4ABI) { |
869 | 119 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
870 | 119 | int FPIndex = FI->getFramePointerSaveIndex(); |
871 | 119 | assert(FPIndex && "No Frame Pointer Save Slot!"); |
872 | 119 | FPOffset = MFI.getObjectOffset(FPIndex); |
873 | 119 | } else { |
874 | 0 | FPOffset = getFramePointerSaveOffset(); |
875 | 0 | } |
876 | 119 | } |
877 | 11.2k | |
878 | 11.2k | int BPOffset = 0; |
879 | 11.2k | if (HasBP) { |
880 | 26 | if (isSVR4ABI) { |
881 | 26 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
882 | 26 | int BPIndex = FI->getBasePointerSaveIndex(); |
883 | 26 | assert(BPIndex && "No Base Pointer Save Slot!"); |
884 | 26 | BPOffset = MFI.getObjectOffset(BPIndex); |
885 | 26 | } else { |
886 | 0 | BPOffset = getBasePointerSaveOffset(); |
887 | 0 | } |
888 | 26 | } |
889 | 11.2k | |
890 | 11.2k | int PBPOffset = 0; |
891 | 11.2k | if (FI->usesPICBase()) { |
892 | 23 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
893 | 23 | int PBPIndex = FI->getPICBasePointerSaveIndex(); |
894 | 23 | assert(PBPIndex && "No PIC Base Pointer Save Slot!"); |
895 | 23 | PBPOffset = MFI.getObjectOffset(PBPIndex); |
896 | 23 | } |
897 | 11.2k | |
898 | 11.2k | // Get stack alignments. |
899 | 11.2k | unsigned MaxAlign = MFI.getMaxAlignment(); |
900 | 11.2k | if (HasBP && MaxAlign > 126 ) |
901 | 11.2k | assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && |
902 | 11.2k | "Invalid alignment!"); |
903 | 11.2k | |
904 | 11.2k | // Frames of 32KB & larger require special handling because they cannot be |
905 | 11.2k | // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. |
906 | 11.2k | bool isLargeFrame = !isInt<16>(NegFrameSize); |
907 | 11.2k | |
908 | 11.2k | assert((isPPC64 || !MustSaveCR) && |
909 | 11.2k | "Prologue CR saving supported only in 64-bit mode"); |
910 | 11.2k | |
911 | 11.2k | // Check if we can move the stack update instruction (stdu) down the prologue |
912 | 11.2k | // past the callee saves. Hopefully this will avoid the situation where the |
913 | 11.2k | // saves are waiting for the update on the store with update to complete. |
914 | 11.2k | MachineBasicBlock::iterator StackUpdateLoc = MBBI; |
915 | 11.2k | bool MovingStackUpdateDown = false; |
916 | 11.2k | |
917 | 11.2k | // Check if we can move the stack update. |
918 | 11.2k | if (stackUpdateCanBeMoved(MF)) { |
919 | 502 | const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); |
920 | 502 | for (CalleeSavedInfo CSI : Info) { |
921 | 339 | int FrIdx = CSI.getFrameIdx(); |
922 | 339 | // If the frame index is not negative the callee saved info belongs to a |
923 | 339 | // stack object that is not a fixed stack object. We ignore non-fixed |
924 | 339 | // stack objects because we won't move the stack update pointer past them. |
925 | 339 | if (FrIdx >= 0) |
926 | 13 | continue; |
927 | 326 | |
928 | 326 | if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { |
929 | 326 | StackUpdateLoc++; |
930 | 326 | MovingStackUpdateDown = true; |
931 | 326 | } else { |
932 | 0 | // We need all of the Frame Indices to meet these conditions. |
933 | 0 | // If they do not, abort the whole operation. |
934 | 0 | StackUpdateLoc = MBBI; |
935 | 0 | MovingStackUpdateDown = false; |
936 | 0 | break; |
937 | 0 | } |
938 | 326 | } |
939 | 502 | |
940 | 502 | // If the operation was not aborted then update the object offset. |
941 | 502 | if (MovingStackUpdateDown) { |
942 | 332 | for (CalleeSavedInfo CSI : Info) { |
943 | 332 | int FrIdx = CSI.getFrameIdx(); |
944 | 332 | if (FrIdx < 0) |
945 | 326 | MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); |
946 | 332 | } |
947 | 116 | } |
948 | 502 | } |
949 | 11.2k | |
950 | 11.2k | // If we need to spill the CR and the LR but we don't have two separate |
951 | 11.2k | // registers available, we must spill them one at a time |
952 | 11.2k | if (MustSaveCR && SingleScratchReg34 && MustSaveLR0 ) { |
953 | 0 | // In the ELFv2 ABI, we are not required to save all CR fields. |
954 | 0 | // If only one or two CR fields are clobbered, it is more efficient to use |
955 | 0 | // mfocrf to selectively save just those fields, because mfocrf has short |
956 | 0 | // latency compares to mfcr. |
957 | 0 | unsigned MfcrOpcode = PPC::MFCR8; |
958 | 0 | unsigned CrState = RegState::ImplicitKill; |
959 | 0 | if (isELFv2ABI && MustSaveCRs.size() == 1) { |
960 | 0 | MfcrOpcode = PPC::MFOCRF8; |
961 | 0 | CrState = RegState::Kill; |
962 | 0 | } |
963 | 0 | MachineInstrBuilder MIB = |
964 | 0 | BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); |
965 | 0 | for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) |
966 | 0 | MIB.addReg(MustSaveCRs[i], CrState); |
967 | 0 | BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) |
968 | 0 | .addReg(TempReg, getKillRegState(true)) |
969 | 0 | .addImm(8) |
970 | 0 | .addReg(SPReg); |
971 | 0 | } |
972 | 11.2k | |
973 | 11.2k | if (MustSaveLR) |
974 | 1.28k | BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); |
975 | 11.2k | |
976 | 11.2k | if (MustSaveCR && |
977 | 11.2k | !(34 SingleScratchReg34 && MustSaveLR0 )) { // will only occur for PPC64 |
978 | 34 | // In the ELFv2 ABI, we are not required to save all CR fields. |
979 | 34 | // If only one or two CR fields are clobbered, it is more efficient to use |
980 | 34 | // mfocrf to selectively save just those fields, because mfocrf has short |
981 | 34 | // latency compares to mfcr. |
982 | 34 | unsigned MfcrOpcode = PPC::MFCR8; |
983 | 34 | unsigned CrState = RegState::ImplicitKill; |
984 | 34 | if (isELFv2ABI && MustSaveCRs.size() == 111 ) { |
985 | 4 | MfcrOpcode = PPC::MFOCRF8; |
986 | 4 | CrState = RegState::Kill; |
987 | 4 | } |
988 | 34 | MachineInstrBuilder MIB = |
989 | 34 | BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); |
990 | 114 | for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i80 ) |
991 | 80 | MIB.addReg(MustSaveCRs[i], CrState); |
992 | 34 | } |
993 | 11.2k | |
994 | 11.2k | if (HasRedZone) { |
995 | 10.2k | if (HasFP) |
996 | 81 | BuildMI(MBB, MBBI, dl, StoreInst) |
997 | 81 | .addReg(FPReg) |
998 | 81 | .addImm(FPOffset) |
999 | 81 | .addReg(SPReg); |
1000 | 10.2k | if (FI->usesPICBase()) |
1001 | 0 | BuildMI(MBB, MBBI, dl, StoreInst) |
1002 | 0 | .addReg(PPC::R30) |
1003 | 0 | .addImm(PBPOffset) |
1004 | 0 | .addReg(SPReg); |
1005 | 10.2k | if (HasBP) |
1006 | 13 | BuildMI(MBB, MBBI, dl, StoreInst) |
1007 | 13 | .addReg(BPReg) |
1008 | 13 | .addImm(BPOffset) |
1009 | 13 | .addReg(SPReg); |
1010 | 10.2k | } |
1011 | 11.2k | |
1012 | 11.2k | if (MustSaveLR) |
1013 | 1.28k | BuildMI(MBB, StackUpdateLoc, dl, StoreInst) |
1014 | 1.28k | .addReg(ScratchReg, getKillRegState(true)) |
1015 | 1.28k | .addImm(LROffset) |
1016 | 1.28k | .addReg(SPReg); |
1017 | 11.2k | |
1018 | 11.2k | if (MustSaveCR && |
1019 | 11.2k | !(34 SingleScratchReg34 && MustSaveLR0 )) { // will only occur for PPC64 |
1020 | 34 | assert(HasRedZone && "A red zone is always available on PPC64"); |
1021 | 34 | BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) |
1022 | 34 | .addReg(TempReg, getKillRegState(true)) |
1023 | 34 | .addImm(8) |
1024 | 34 | .addReg(SPReg); |
1025 | 34 | } |
1026 | 11.2k | |
1027 | 11.2k | // Skip the rest if this is a leaf function & all spills fit in the Red Zone. |
1028 | 11.2k | if (!FrameSize) |
1029 | 9.81k | return; |
1030 | 1.46k | |
1031 | 1.46k | // Adjust stack pointer: r1 += NegFrameSize. |
1032 | 1.46k | // If there is a preferred stack alignment, align R1 now |
1033 | 1.46k | |
1034 | 1.46k | if (HasBP && HasRedZone26 ) { |
1035 | 13 | // Save a copy of r1 as the base pointer. |
1036 | 13 | BuildMI(MBB, MBBI, dl, OrInst, BPReg) |
1037 | 13 | .addReg(SPReg) |
1038 | 13 | .addReg(SPReg); |
1039 | 13 | } |
1040 | 1.46k | |
1041 | 1.46k | // Have we generated a STUX instruction to claim stack frame? If so, |
1042 | 1.46k | // the negated frame size will be placed in ScratchReg. |
1043 | 1.46k | bool HasSTUX = false; |
1044 | 1.46k | |
1045 | 1.46k | // This condition must be kept in sync with canUseAsPrologue. |
1046 | 1.46k | if (HasBP && MaxAlign > 126 ) { |
1047 | 25 | if (isPPC64) |
1048 | 12 | BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) |
1049 | 12 | .addReg(SPReg) |
1050 | 12 | .addImm(0) |
1051 | 12 | .addImm(64 - Log2_32(MaxAlign)); |
1052 | 13 | else // PPC32... |
1053 | 13 | BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) |
1054 | 13 | .addReg(SPReg) |
1055 | 13 | .addImm(0) |
1056 | 13 | .addImm(32 - Log2_32(MaxAlign)) |
1057 | 13 | .addImm(31); |
1058 | 25 | if (!isLargeFrame) { |
1059 | 18 | BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) |
1060 | 18 | .addReg(ScratchReg, RegState::Kill) |
1061 | 18 | .addImm(NegFrameSize); |
1062 | 18 | } else { |
1063 | 7 | assert(!SingleScratchReg && "Only a single scratch reg available"); |
1064 | 7 | BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) |
1065 | 7 | .addImm(NegFrameSize >> 16); |
1066 | 7 | BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) |
1067 | 7 | .addReg(TempReg, RegState::Kill) |
1068 | 7 | .addImm(NegFrameSize & 0xFFFF); |
1069 | 7 | BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) |
1070 | 7 | .addReg(ScratchReg, RegState::Kill) |
1071 | 7 | .addReg(TempReg, RegState::Kill); |
1072 | 7 | } |
1073 | 25 | |
1074 | 25 | BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) |
1075 | 25 | .addReg(SPReg, RegState::Kill) |
1076 | 25 | .addReg(SPReg) |
1077 | 25 | .addReg(ScratchReg); |
1078 | 25 | HasSTUX = true; |
1079 | 25 | |
1080 | 1.44k | } else if (!isLargeFrame) { |
1081 | 1.42k | BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) |
1082 | 1.42k | .addReg(SPReg) |
1083 | 1.42k | .addImm(NegFrameSize) |
1084 | 1.42k | .addReg(SPReg); |
1085 | 1.42k | |
1086 | 1.42k | } else { |
1087 | 14 | BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) |
1088 | 14 | .addImm(NegFrameSize >> 16); |
1089 | 14 | BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) |
1090 | 14 | .addReg(ScratchReg, RegState::Kill) |
1091 | 14 | .addImm(NegFrameSize & 0xFFFF); |
1092 | 14 | BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) |
1093 | 14 | .addReg(SPReg, RegState::Kill) |
1094 | 14 | .addReg(SPReg) |
1095 | 14 | .addReg(ScratchReg); |
1096 | 14 | HasSTUX = true; |
1097 | 14 | } |
1098 | 1.46k | |
1099 | 1.46k | // Save the TOC register after the stack pointer update if a prologue TOC |
1100 | 1.46k | // save is required for the function. |
1101 | 1.46k | if (MustSaveTOC) { |
1102 | 11 | assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); |
1103 | 11 | BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) |
1104 | 11 | .addReg(TOCReg, getKillRegState(true)) |
1105 | 11 | .addImm(TOCSaveOffset) |
1106 | 11 | .addReg(SPReg); |
1107 | 11 | } |
1108 | 1.46k | |
1109 | 1.46k | if (!HasRedZone) { |
1110 | 396 | assert(!isPPC64 && "A red zone is always available on PPC64"); |
1111 | 396 | if (HasSTUX) { |
1112 | 20 | // The negated frame size is in ScratchReg, and the SPReg has been |
1113 | 20 | // decremented by the frame size: SPReg = old SPReg + ScratchReg. |
1114 | 20 | // Since FPOffset, PBPOffset, etc. are relative to the beginning of |
1115 | 20 | // the stack frame (i.e. the old SP), ideally, we would put the old |
1116 | 20 | // SP into a register and use it as the base for the stores. The |
1117 | 20 | // problem is that the only available register may be ScratchReg, |
1118 | 20 | // which could be R0, and R0 cannot be used as a base address. |
1119 | 20 | |
1120 | 20 | // First, set ScratchReg to the old SP. This may need to be modified |
1121 | 20 | // later. |
1122 | 20 | BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) |
1123 | 20 | .addReg(ScratchReg, RegState::Kill) |
1124 | 20 | .addReg(SPReg); |
1125 | 20 | |
1126 | 20 | if (ScratchReg == PPC::R0) { |
1127 | 20 | // R0 cannot be used as a base register, but it can be used as an |
1128 | 20 | // index in a store-indexed. |
1129 | 20 | int LastOffset = 0; |
1130 | 20 | if (HasFP) { |
1131 | 13 | // R0 += (FPOffset-LastOffset). |
1132 | 13 | // Need addic, since addi treats R0 as 0. |
1133 | 13 | BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) |
1134 | 13 | .addReg(ScratchReg) |
1135 | 13 | .addImm(FPOffset-LastOffset); |
1136 | 13 | LastOffset = FPOffset; |
1137 | 13 | // Store FP into *R0. |
1138 | 13 | BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) |
1139 | 13 | .addReg(FPReg, RegState::Kill) // Save FP. |
1140 | 13 | .addReg(PPC::ZERO) |
1141 | 13 | .addReg(ScratchReg); // This will be the index (R0 is ok here). |
1142 | 13 | } |
1143 | 20 | if (FI->usesPICBase()) { |
1144 | 3 | // R0 += (PBPOffset-LastOffset). |
1145 | 3 | BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) |
1146 | 3 | .addReg(ScratchReg) |
1147 | 3 | .addImm(PBPOffset-LastOffset); |
1148 | 3 | LastOffset = PBPOffset; |
1149 | 3 | BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) |
1150 | 3 | .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. |
1151 | 3 | .addReg(PPC::ZERO) |
1152 | 3 | .addReg(ScratchReg); // This will be the index (R0 is ok here). |
1153 | 3 | } |
1154 | 20 | if (HasBP) { |
1155 | 13 | // R0 += (BPOffset-LastOffset). |
1156 | 13 | BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) |
1157 | 13 | .addReg(ScratchReg) |
1158 | 13 | .addImm(BPOffset-LastOffset); |
1159 | 13 | LastOffset = BPOffset; |
1160 | 13 | BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) |
1161 | 13 | .addReg(BPReg, RegState::Kill) // Save BP. |
1162 | 13 | .addReg(PPC::ZERO) |
1163 | 13 | .addReg(ScratchReg); // This will be the index (R0 is ok here). |
1164 | 13 | // BP = R0-LastOffset |
1165 | 13 | BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) |
1166 | 13 | .addReg(ScratchReg, RegState::Kill) |
1167 | 13 | .addImm(-LastOffset); |
1168 | 13 | } |
1169 | 20 | } else { |
1170 | 0 | // ScratchReg is not R0, so use it as the base register. It is |
1171 | 0 | // already set to the old SP, so we can use the offsets directly. |
1172 | 0 |
|
1173 | 0 | // Now that the stack frame has been allocated, save all the necessary |
1174 | 0 | // registers using ScratchReg as the base address. |
1175 | 0 | if (HasFP) |
1176 | 0 | BuildMI(MBB, MBBI, dl, StoreInst) |
1177 | 0 | .addReg(FPReg) |
1178 | 0 | .addImm(FPOffset) |
1179 | 0 | .addReg(ScratchReg); |
1180 | 0 | if (FI->usesPICBase()) |
1181 | 0 | BuildMI(MBB, MBBI, dl, StoreInst) |
1182 | 0 | .addReg(PPC::R30) |
1183 | 0 | .addImm(PBPOffset) |
1184 | 0 | .addReg(ScratchReg); |
1185 | 0 | if (HasBP) { |
1186 | 0 | BuildMI(MBB, MBBI, dl, StoreInst) |
1187 | 0 | .addReg(BPReg) |
1188 | 0 | .addImm(BPOffset) |
1189 | 0 | .addReg(ScratchReg); |
1190 | 0 | BuildMI(MBB, MBBI, dl, OrInst, BPReg) |
1191 | 0 | .addReg(ScratchReg, RegState::Kill) |
1192 | 0 | .addReg(ScratchReg); |
1193 | 0 | } |
1194 | 0 | } |
1195 | 376 | } else { |
1196 | 376 | // The frame size is a known 16-bit constant (fitting in the immediate |
1197 | 376 | // field of STWU). To be here we have to be compiling for PPC32. |
1198 | 376 | // Since the SPReg has been decreased by FrameSize, add it back to each |
1199 | 376 | // offset. |
1200 | 376 | if (HasFP) |
1201 | 25 | BuildMI(MBB, MBBI, dl, StoreInst) |
1202 | 25 | .addReg(FPReg) |
1203 | 25 | .addImm(FrameSize + FPOffset) |
1204 | 25 | .addReg(SPReg); |
1205 | 376 | if (FI->usesPICBase()) |
1206 | 20 | BuildMI(MBB, MBBI, dl, StoreInst) |
1207 | 20 | .addReg(PPC::R30) |
1208 | 20 | .addImm(FrameSize + PBPOffset) |
1209 | 20 | .addReg(SPReg); |
1210 | 376 | if (HasBP) { |
1211 | 0 | BuildMI(MBB, MBBI, dl, StoreInst) |
1212 | 0 | .addReg(BPReg) |
1213 | 0 | .addImm(FrameSize + BPOffset) |
1214 | 0 | .addReg(SPReg); |
1215 | 0 | BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) |
1216 | 0 | .addReg(SPReg) |
1217 | 0 | .addImm(FrameSize); |
1218 | 0 | } |
1219 | 376 | } |
1220 | 396 | } |
1221 | 1.46k | |
1222 | 1.46k | // Add Call Frame Information for the instructions we generated above. |
1223 | 1.46k | if (needsCFI) { |
1224 | 997 | unsigned CFIIndex; |
1225 | 997 | |
1226 | 997 | if (HasBP) { |
1227 | 16 | // Define CFA in terms of BP. Do this in preference to using FP/SP, |
1228 | 16 | // because if the stack needed aligning then CFA won't be at a fixed |
1229 | 16 | // offset from FP/SP. |
1230 | 16 | unsigned Reg = MRI->getDwarfRegNum(BPReg, true); |
1231 | 16 | CFIIndex = MF.addFrameInst( |
1232 | 16 | MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); |
1233 | 981 | } else { |
1234 | 981 | // Adjust the definition of CFA to account for the change in SP. |
1235 | 981 | assert(NegFrameSize); |
1236 | 981 | CFIIndex = MF.addFrameInst( |
1237 | 981 | MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize)); |
1238 | 981 | } |
1239 | 997 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1240 | 997 | .addCFIIndex(CFIIndex); |
1241 | 997 | |
1242 | 997 | if (HasFP) { |
1243 | 80 | // Describe where FP was saved, at a fixed offset from CFA. |
1244 | 80 | unsigned Reg = MRI->getDwarfRegNum(FPReg, true); |
1245 | 80 | CFIIndex = MF.addFrameInst( |
1246 | 80 | MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); |
1247 | 80 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1248 | 80 | .addCFIIndex(CFIIndex); |
1249 | 80 | } |
1250 | 997 | |
1251 | 997 | if (FI->usesPICBase()) { |
1252 | 20 | // Describe where FP was saved, at a fixed offset from CFA. |
1253 | 20 | unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); |
1254 | 20 | CFIIndex = MF.addFrameInst( |
1255 | 20 | MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); |
1256 | 20 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1257 | 20 | .addCFIIndex(CFIIndex); |
1258 | 20 | } |
1259 | 997 | |
1260 | 997 | if (HasBP) { |
1261 | 16 | // Describe where BP was saved, at a fixed offset from CFA. |
1262 | 16 | unsigned Reg = MRI->getDwarfRegNum(BPReg, true); |
1263 | 16 | CFIIndex = MF.addFrameInst( |
1264 | 16 | MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); |
1265 | 16 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1266 | 16 | .addCFIIndex(CFIIndex); |
1267 | 16 | } |
1268 | 997 | |
1269 | 997 | if (MustSaveLR) { |
1270 | 929 | // Describe where LR was saved, at a fixed offset from CFA. |
1271 | 929 | unsigned Reg = MRI->getDwarfRegNum(LRReg, true); |
1272 | 929 | CFIIndex = MF.addFrameInst( |
1273 | 929 | MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); |
1274 | 929 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1275 | 929 | .addCFIIndex(CFIIndex); |
1276 | 929 | } |
1277 | 997 | } |
1278 | 1.46k | |
1279 | 1.46k | // If there is a frame pointer, copy R1 into R31 |
1280 | 1.46k | if (HasFP) { |
1281 | 119 | BuildMI(MBB, MBBI, dl, OrInst, FPReg) |
1282 | 119 | .addReg(SPReg) |
1283 | 119 | .addReg(SPReg); |
1284 | 119 | |
1285 | 119 | if (!HasBP && needsCFI101 ) { |
1286 | 70 | // Change the definition of CFA from SP+offset to FP+offset, because SP |
1287 | 70 | // will change at every alloca. |
1288 | 70 | unsigned Reg = MRI->getDwarfRegNum(FPReg, true); |
1289 | 70 | unsigned CFIIndex = MF.addFrameInst( |
1290 | 70 | MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); |
1291 | 70 | |
1292 | 70 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1293 | 70 | .addCFIIndex(CFIIndex); |
1294 | 70 | } |
1295 | 119 | } |
1296 | 1.46k | |
1297 | 1.46k | if (needsCFI) { |
1298 | 997 | // Describe where callee saved registers were saved, at fixed offsets from |
1299 | 997 | // CFA. |
1300 | 997 | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
1301 | 1.97k | for (unsigned I = 0, E = CSI.size(); I != E; ++I976 ) { |
1302 | 976 | unsigned Reg = CSI[I].getReg(); |
1303 | 976 | if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue0 ; |
1304 | 976 | |
1305 | 976 | // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just |
1306 | 976 | // subregisters of CR2. We just need to emit a move of CR2. |
1307 | 976 | if (PPC::CRBITRCRegClass.contains(Reg)) |
1308 | 0 | continue; |
1309 | 976 | |
1310 | 976 | if ((Reg == PPC::X2 || Reg == PPC::R2974 ) && MustSaveTOC2 ) |
1311 | 0 | continue; |
1312 | 976 | |
1313 | 976 | // For SVR4, don't emit a move for the CR spill slot if we haven't |
1314 | 976 | // spilled CRs. |
1315 | 976 | if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) |
1316 | 976 | && !MustSaveCR29 ) |
1317 | 8 | continue; |
1318 | 968 | |
1319 | 968 | // For 64-bit SVR4 when we have spilled CRs, the spill location |
1320 | 968 | // is SP+8, not a frame-relative slot. |
1321 | 968 | if (isSVR4ABI && isPPC64 && (839 PPC::CR2 <= Reg839 && Reg <= PPC::CR4839 )) { |
1322 | 21 | // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for |
1323 | 21 | // the whole CR word. In the ELFv2 ABI, every CR that was |
1324 | 21 | // actually saved gets its own CFI record. |
1325 | 21 | unsigned CRReg = isELFv2ABI? Reg10 : (unsigned) PPC::CR211 ; |
1326 | 21 | unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( |
1327 | 21 | nullptr, MRI->getDwarfRegNum(CRReg, true), 8)); |
1328 | 21 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1329 | 21 | .addCFIIndex(CFIIndex); |
1330 | 21 | continue; |
1331 | 21 | } |
1332 | 947 | |
1333 | 947 | if (CSI[I].isSpilledToReg()) { |
1334 | 3 | unsigned SpilledReg = CSI[I].getDstReg(); |
1335 | 3 | unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( |
1336 | 3 | nullptr, MRI->getDwarfRegNum(Reg, true), |
1337 | 3 | MRI->getDwarfRegNum(SpilledReg, true))); |
1338 | 3 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1339 | 3 | .addCFIIndex(CFIRegister); |
1340 | 944 | } else { |
1341 | 944 | int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); |
1342 | 944 | // We have changed the object offset above but we do not want to change |
1343 | 944 | // the actual offsets in the CFI instruction so we have to undo the |
1344 | 944 | // offset change here. |
1345 | 944 | if (MovingStackUpdateDown) |
1346 | 307 | Offset -= NegFrameSize; |
1347 | 944 | |
1348 | 944 | unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( |
1349 | 944 | nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); |
1350 | 944 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
1351 | 944 | .addCFIIndex(CFIIndex); |
1352 | 944 | } |
1353 | 947 | } |
1354 | 997 | } |
1355 | 1.46k | } |
1356 | | |
1357 | | void PPCFrameLowering::emitEpilogue(MachineFunction &MF, |
1358 | 11.3k | MachineBasicBlock &MBB) const { |
1359 | 11.3k | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
1360 | 11.3k | DebugLoc dl; |
1361 | 11.3k | |
1362 | 11.3k | if (MBBI != MBB.end()) |
1363 | 11.3k | dl = MBBI->getDebugLoc(); |
1364 | 11.3k | |
1365 | 11.3k | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
1366 | 11.3k | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
1367 | 11.3k | |
1368 | 11.3k | // Get alignment info so we know how to restore the SP. |
1369 | 11.3k | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
1370 | 11.3k | |
1371 | 11.3k | // Get the number of bytes allocated from the FrameInfo. |
1372 | 11.3k | int FrameSize = MFI.getStackSize(); |
1373 | 11.3k | |
1374 | 11.3k | // Get processor type. |
1375 | 11.3k | bool isPPC64 = Subtarget.isPPC64(); |
1376 | 11.3k | // Get the ABI. |
1377 | 11.3k | bool isSVR4ABI = Subtarget.isSVR4ABI(); |
1378 | 11.3k | |
1379 | 11.3k | // Check if the link register (LR) has been saved. |
1380 | 11.3k | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
1381 | 11.3k | bool MustSaveLR = FI->mustSaveLR(); |
1382 | 11.3k | const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); |
1383 | 11.3k | bool MustSaveCR = !MustSaveCRs.empty(); |
1384 | 11.3k | // Do we have a frame pointer and/or base pointer for this function? |
1385 | 11.3k | bool HasFP = hasFP(MF); |
1386 | 11.3k | bool HasBP = RegInfo->hasBasePointer(MF); |
1387 | 11.3k | bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI()1.05k ; |
1388 | 11.3k | |
1389 | 11.3k | unsigned SPReg = isPPC64 ? PPC::X110.3k : PPC::R11.05k ; |
1390 | 11.3k | unsigned BPReg = RegInfo->getBaseRegister(MF); |
1391 | 11.3k | unsigned FPReg = isPPC64 ? PPC::X3110.3k : PPC::R311.05k ; |
1392 | 11.3k | unsigned ScratchReg = 0; |
1393 | 11.3k | unsigned TempReg = isPPC64 ? PPC::X1210.3k : PPC::R121.05k ; // another scratch reg |
1394 | 11.3k | const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR810.3k |
1395 | 11.3k | : PPC::MTLR1.05k ); |
1396 | 11.3k | const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD10.3k |
1397 | 11.3k | : PPC::LWZ1.05k ); |
1398 | 11.3k | const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS810.3k |
1399 | 11.3k | : PPC::LIS1.05k ); |
1400 | 11.3k | const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR810.3k |
1401 | 11.3k | : PPC::OR1.05k ); |
1402 | 11.3k | const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI810.3k |
1403 | 11.3k | : PPC::ORI1.05k ); |
1404 | 11.3k | const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI810.3k |
1405 | 11.3k | : PPC::ADDI1.05k ); |
1406 | 11.3k | const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD810.3k |
1407 | 11.3k | : PPC::ADD41.05k ); |
1408 | 11.3k | |
1409 | 11.3k | int LROffset = getReturnSaveOffset(); |
1410 | 11.3k | |
1411 | 11.3k | int FPOffset = 0; |
1412 | 11.3k | |
1413 | 11.3k | // Using the same bool variable as below to suppress compiler warnings. |
1414 | 11.3k | bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, |
1415 | 11.3k | &TempReg); |
1416 | 11.3k | assert(SingleScratchReg && |
1417 | 11.3k | "Could not find an available scratch register"); |
1418 | 11.3k | |
1419 | 11.3k | SingleScratchReg = ScratchReg == TempReg; |
1420 | 11.3k | |
1421 | 11.3k | if (HasFP) { |
1422 | 113 | if (isSVR4ABI) { |
1423 | 113 | int FPIndex = FI->getFramePointerSaveIndex(); |
1424 | 113 | assert(FPIndex && "No Frame Pointer Save Slot!"); |
1425 | 113 | FPOffset = MFI.getObjectOffset(FPIndex); |
1426 | 113 | } else { |
1427 | 0 | FPOffset = getFramePointerSaveOffset(); |
1428 | 0 | } |
1429 | 113 | } |
1430 | 11.3k | |
1431 | 11.3k | int BPOffset = 0; |
1432 | 11.3k | if (HasBP) { |
1433 | 26 | if (isSVR4ABI) { |
1434 | 26 | int BPIndex = FI->getBasePointerSaveIndex(); |
1435 | 26 | assert(BPIndex && "No Base Pointer Save Slot!"); |
1436 | 26 | BPOffset = MFI.getObjectOffset(BPIndex); |
1437 | 26 | } else { |
1438 | 0 | BPOffset = getBasePointerSaveOffset(); |
1439 | 0 | } |
1440 | 26 | } |
1441 | 11.3k | |
1442 | 11.3k | int PBPOffset = 0; |
1443 | 11.3k | if (FI->usesPICBase()) { |
1444 | 25 | int PBPIndex = FI->getPICBasePointerSaveIndex(); |
1445 | 25 | assert(PBPIndex && "No PIC Base Pointer Save Slot!"); |
1446 | 25 | PBPOffset = MFI.getObjectOffset(PBPIndex); |
1447 | 25 | } |
1448 | 11.3k | |
1449 | 11.3k | bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()11.3k ); |
1450 | 11.3k | |
1451 | 11.3k | if (IsReturnBlock) { |
1452 | 11.3k | unsigned RetOpcode = MBBI->getOpcode(); |
1453 | 11.3k | bool UsesTCRet = RetOpcode == PPC::TCRETURNri || |
1454 | 11.3k | RetOpcode == PPC::TCRETURNdi || |
1455 | 11.3k | RetOpcode == PPC::TCRETURNai11.3k || |
1456 | 11.3k | RetOpcode == PPC::TCRETURNri811.3k || |
1457 | 11.3k | RetOpcode == PPC::TCRETURNdi811.3k || |
1458 | 11.3k | RetOpcode == PPC::TCRETURNai811.2k ; |
1459 | 11.3k | |
1460 | 11.3k | if (UsesTCRet) { |
1461 | 60 | int MaxTCRetDelta = FI->getTailCallSPDelta(); |
1462 | 60 | MachineOperand &StackAdjust = MBBI->getOperand(1); |
1463 | 60 | assert(StackAdjust.isImm() && "Expecting immediate value."); |
1464 | 60 | // Adjust stack pointer. |
1465 | 60 | int StackAdj = StackAdjust.getImm(); |
1466 | 60 | int Delta = StackAdj - MaxTCRetDelta; |
1467 | 60 | assert((Delta >= 0) && "Delta must be positive"); |
1468 | 60 | if (MaxTCRetDelta>0) |
1469 | 0 | FrameSize += (StackAdj +Delta); |
1470 | 60 | else |
1471 | 60 | FrameSize += StackAdj; |
1472 | 60 | } |
1473 | 11.3k | } |
1474 | 11.3k | |
1475 | 11.3k | // Frames of 32KB & larger require special handling because they cannot be |
1476 | 11.3k | // indexed into with a simple LD/LWZ immediate offset operand. |
1477 | 11.3k | bool isLargeFrame = !isInt<16>(FrameSize); |
1478 | 11.3k | |
1479 | 11.3k | // On targets without red zone, the SP needs to be restored last, so that |
1480 | 11.3k | // all live contents of the stack frame are upwards of the SP. This means |
1481 | 11.3k | // that we cannot restore SP just now, since there may be more registers |
1482 | 11.3k | // to restore from the stack frame (e.g. R31). If the frame size is not |
1483 | 11.3k | // a simple immediate value, we will need a spare register to hold the |
1484 | 11.3k | // restored SP. If the frame size is known and small, we can simply adjust |
1485 | 11.3k | // the offsets of the registers to be restored, and still use SP to restore |
1486 | 11.3k | // them. In such case, the final update of SP will be to add the frame |
1487 | 11.3k | // size to it. |
1488 | 11.3k | // To simplify the code, set RBReg to the base register used to restore |
1489 | 11.3k | // values from the stack, and set SPAdd to the value that needs to be added |
1490 | 11.3k | // to the SP at the end. The default values are as if red zone was present. |
1491 | 11.3k | unsigned RBReg = SPReg; |
1492 | 11.3k | unsigned SPAdd = 0; |
1493 | 11.3k | |
1494 | 11.3k | // Check if we can move the stack update instruction up the epilogue |
1495 | 11.3k | // past the callee saves. This will allow the move to LR instruction |
1496 | 11.3k | // to be executed before the restores of the callee saves which means |
1497 | 11.3k | // that the callee saves can hide the latency from the MTLR instrcution. |
1498 | 11.3k | MachineBasicBlock::iterator StackUpdateLoc = MBBI; |
1499 | 11.3k | if (stackUpdateCanBeMoved(MF)) { |
1500 | 501 | const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); |
1501 | 501 | for (CalleeSavedInfo CSI : Info) { |
1502 | 338 | int FrIdx = CSI.getFrameIdx(); |
1503 | 338 | // If the frame index is not negative the callee saved info belongs to a |
1504 | 338 | // stack object that is not a fixed stack object. We ignore non-fixed |
1505 | 338 | // stack objects because we won't move the update of the stack pointer |
1506 | 338 | // past them. |
1507 | 338 | if (FrIdx >= 0) |
1508 | 10 | continue; |
1509 | 328 | |
1510 | 328 | if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) |
1511 | 328 | StackUpdateLoc--; |
1512 | 0 | else { |
1513 | 0 | // Abort the operation as we can't update all CSR restores. |
1514 | 0 | StackUpdateLoc = MBBI; |
1515 | 0 | break; |
1516 | 0 | } |
1517 | 328 | } |
1518 | 501 | } |
1519 | 11.3k | |
1520 | 11.3k | if (FrameSize) { |
1521 | 1.46k | // In the prologue, the loaded (or persistent) stack pointer value is |
1522 | 1.46k | // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red |
1523 | 1.46k | // zone add this offset back now. |
1524 | 1.46k | |
1525 | 1.46k | // If this function contained a fastcc call and GuaranteedTailCallOpt is |
1526 | 1.46k | // enabled (=> hasFastCall()==true) the fastcc call might contain a tail |
1527 | 1.46k | // call which invalidates the stack pointer value in SP(0). So we use the |
1528 | 1.46k | // value of R31 in this case. |
1529 | 1.46k | if (FI->hasFastCall()) { |
1530 | 3 | assert(HasFP && "Expecting a valid frame pointer."); |
1531 | 3 | if (!HasRedZone) |
1532 | 2 | RBReg = FPReg; |
1533 | 3 | if (!isLargeFrame) { |
1534 | 3 | BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) |
1535 | 3 | .addReg(FPReg).addImm(FrameSize); |
1536 | 3 | } else { |
1537 | 0 | BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) |
1538 | 0 | .addImm(FrameSize >> 16); |
1539 | 0 | BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) |
1540 | 0 | .addReg(ScratchReg, RegState::Kill) |
1541 | 0 | .addImm(FrameSize & 0xFFFF); |
1542 | 0 | BuildMI(MBB, MBBI, dl, AddInst) |
1543 | 0 | .addReg(RBReg) |
1544 | 0 | .addReg(FPReg) |
1545 | 0 | .addReg(ScratchReg); |
1546 | 0 | } |
1547 | 1.45k | } else if (!isLargeFrame && !HasBP1.43k && !MFI.hasVarSizedObjects()1.41k ) { |
1548 | 1.40k | if (HasRedZone) { |
1549 | 1.01k | BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) |
1550 | 1.01k | .addReg(SPReg) |
1551 | 1.01k | .addImm(FrameSize); |
1552 | 1.01k | } else { |
1553 | 381 | // Make sure that adding FrameSize will not overflow the max offset |
1554 | 381 | // size. |
1555 | 381 | assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && |
1556 | 381 | "Local offsets should be negative"); |
1557 | 381 | SPAdd = FrameSize; |
1558 | 381 | FPOffset += FrameSize; |
1559 | 381 | BPOffset += FrameSize; |
1560 | 381 | PBPOffset += FrameSize; |
1561 | 381 | } |
1562 | 1.40k | } else { |
1563 | 58 | // We don't want to use ScratchReg as a base register, because it |
1564 | 58 | // could happen to be R0. Use FP instead, but make sure to preserve it. |
1565 | 58 | if (!HasRedZone) { |
1566 | 26 | // If FP is not saved, copy it to ScratchReg. |
1567 | 26 | if (!HasFP) |
1568 | 9 | BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) |
1569 | 9 | .addReg(FPReg) |
1570 | 9 | .addReg(FPReg); |
1571 | 26 | RBReg = FPReg; |
1572 | 26 | } |
1573 | 58 | BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) |
1574 | 58 | .addImm(0) |
1575 | 58 | .addReg(SPReg); |
1576 | 58 | } |
1577 | 1.46k | } |
1578 | 11.3k | assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); |
1579 | 11.3k | // If there is no red zone, ScratchReg may be needed for holding a useful |
1580 | 11.3k | // value (although not the base register). Make sure it is not overwritten |
1581 | 11.3k | // too early. |
1582 | 11.3k | |
1583 | 11.3k | assert((isPPC64 || !MustSaveCR) && |
1584 | 11.3k | "Epilogue CR restoring supported only in 64-bit mode"); |
1585 | 11.3k | |
1586 | 11.3k | // If we need to restore both the LR and the CR and we only have one |
1587 | 11.3k | // available scratch register, we must do them one at a time. |
1588 | 11.3k | if (MustSaveCR && SingleScratchReg32 && MustSaveLR0 ) { |
1589 | 0 | // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg |
1590 | 0 | // is live here. |
1591 | 0 | assert(HasRedZone && "Expecting red zone"); |
1592 | 0 | BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) |
1593 | 0 | .addImm(8) |
1594 | 0 | .addReg(SPReg); |
1595 | 0 | for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) |
1596 | 0 | BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) |
1597 | 0 | .addReg(TempReg, getKillRegState(i == e-1)); |
1598 | 0 | } |
1599 | 11.3k | |
1600 | 11.3k | // Delay restoring of the LR if ScratchReg is needed. This is ok, since |
1601 | 11.3k | // LR is stored in the caller's stack frame. ScratchReg will be needed |
1602 | 11.3k | // if RBReg is anything other than SP. We shouldn't use ScratchReg as |
1603 | 11.3k | // a base register anyway, because it may happen to be R0. |
1604 | 11.3k | bool LoadedLR = false; |
1605 | 11.3k | if (MustSaveLR && RBReg == SPReg1.27k && isInt<16>(LROffset+SPAdd)1.26k ) { |
1606 | 1.26k | BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) |
1607 | 1.26k | .addImm(LROffset+SPAdd) |
1608 | 1.26k | .addReg(RBReg); |
1609 | 1.26k | LoadedLR = true; |
1610 | 1.26k | } |
1611 | 11.3k | |
1612 | 11.3k | if (MustSaveCR && !(32 SingleScratchReg32 && MustSaveLR0 )) { |
1613 | 32 | // This will only occur for PPC64. |
1614 | 32 | assert(isPPC64 && "Expecting 64-bit mode"); |
1615 | 32 | assert(RBReg == SPReg && "Should be using SP as a base register"); |
1616 | 32 | BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) |
1617 | 32 | .addImm(8) |
1618 | 32 | .addReg(RBReg); |
1619 | 32 | } |
1620 | 11.3k | |
1621 | 11.3k | if (HasFP) { |
1622 | 113 | // If there is red zone, restore FP directly, since SP has already been |
1623 | 113 | // restored. Otherwise, restore the value of FP into ScratchReg. |
1624 | 113 | if (HasRedZone || RBReg == SPReg37 ) |
1625 | 94 | BuildMI(MBB, MBBI, dl, LoadInst, FPReg) |
1626 | 94 | .addImm(FPOffset) |
1627 | 94 | .addReg(SPReg); |
1628 | 19 | else |
1629 | 19 | BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) |
1630 | 19 | .addImm(FPOffset) |
1631 | 19 | .addReg(RBReg); |
1632 | 113 | } |
1633 | 11.3k | |
1634 | 11.3k | if (FI->usesPICBase()) |
1635 | 25 | BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) |
1636 | 25 | .addImm(PBPOffset) |
1637 | 25 | .addReg(RBReg); |
1638 | 11.3k | |
1639 | 11.3k | if (HasBP) |
1640 | 26 | BuildMI(MBB, MBBI, dl, LoadInst, BPReg) |
1641 | 26 | .addImm(BPOffset) |
1642 | 26 | .addReg(RBReg); |
1643 | 11.3k | |
1644 | 11.3k | // There is nothing more to be loaded from the stack, so now we can |
1645 | 11.3k | // restore SP: SP = RBReg + SPAdd. |
1646 | 11.3k | if (RBReg != SPReg || SPAdd != 011.3k ) { |
1647 | 409 | assert(!HasRedZone && "This should not happen with red zone"); |
1648 | 409 | // If SPAdd is 0, generate a copy. |
1649 | 409 | if (SPAdd == 0) |
1650 | 28 | BuildMI(MBB, MBBI, dl, OrInst, SPReg) |
1651 | 28 | .addReg(RBReg) |
1652 | 28 | .addReg(RBReg); |
1653 | 381 | else |
1654 | 381 | BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) |
1655 | 381 | .addReg(RBReg) |
1656 | 381 | .addImm(SPAdd); |
1657 | 409 | |
1658 | 409 | assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); |
1659 | 409 | if (RBReg == FPReg) |
1660 | 28 | BuildMI(MBB, MBBI, dl, OrInst, FPReg) |
1661 | 28 | .addReg(ScratchReg) |
1662 | 28 | .addReg(ScratchReg); |
1663 | 409 | |
1664 | 409 | // Now load the LR from the caller's stack frame. |
1665 | 409 | if (MustSaveLR && !LoadedLR266 ) |
1666 | 19 | BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) |
1667 | 19 | .addImm(LROffset) |
1668 | 19 | .addReg(SPReg); |
1669 | 409 | } |
1670 | 11.3k | |
1671 | 11.3k | if (MustSaveCR && |
1672 | 11.3k | !(32 SingleScratchReg32 && MustSaveLR0 )) // will only occur for PPC64 |
1673 | 112 | for (unsigned i = 0, e = MustSaveCRs.size(); 32 i != e; ++i80 ) |
1674 | 80 | BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) |
1675 | 80 | .addReg(TempReg, getKillRegState(i == e-1)); |
1676 | 11.3k | |
1677 | 11.3k | if (MustSaveLR) |
1678 | 1.27k | BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); |
1679 | 11.3k | |
1680 | 11.3k | // Callee pop calling convention. Pop parameter/linkage area. Used for tail |
1681 | 11.3k | // call optimization |
1682 | 11.3k | if (IsReturnBlock) { |
1683 | 11.3k | unsigned RetOpcode = MBBI->getOpcode(); |
1684 | 11.3k | if (MF.getTarget().Options.GuaranteedTailCallOpt && |
1685 | 11.3k | (6 RetOpcode == PPC::BLR6 || RetOpcode == PPC::BLR84 ) && |
1686 | 11.3k | MF.getFunction().getCallingConv() == CallingConv::Fast3 ) { |
1687 | 3 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
1688 | 3 | unsigned CallerAllocatedAmt = FI->getMinReservedArea(); |
1689 | 3 | |
1690 | 3 | if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { |
1691 | 3 | BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) |
1692 | 3 | .addReg(SPReg).addImm(CallerAllocatedAmt); |
1693 | 3 | } else { |
1694 | 0 | BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) |
1695 | 0 | .addImm(CallerAllocatedAmt >> 16); |
1696 | 0 | BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) |
1697 | 0 | .addReg(ScratchReg, RegState::Kill) |
1698 | 0 | .addImm(CallerAllocatedAmt & 0xFFFF); |
1699 | 0 | BuildMI(MBB, MBBI, dl, AddInst) |
1700 | 0 | .addReg(SPReg) |
1701 | 0 | .addReg(FPReg) |
1702 | 0 | .addReg(ScratchReg); |
1703 | 0 | } |
1704 | 11.3k | } else { |
1705 | 11.3k | createTailCallBranchInstr(MBB); |
1706 | 11.3k | } |
1707 | 11.3k | } |
1708 | 11.3k | } |
1709 | | |
1710 | 11.3k | void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { |
1711 | 11.3k | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
1712 | 11.3k | |
1713 | 11.3k | // If we got this far a first terminator should exist. |
1714 | 11.3k | assert(MBBI != MBB.end() && "Failed to find the first terminator."); |
1715 | 11.3k | |
1716 | 11.3k | DebugLoc dl = MBBI->getDebugLoc(); |
1717 | 11.3k | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
1718 | 11.3k | |
1719 | 11.3k | // Create branch instruction for pseudo tail call return instruction |
1720 | 11.3k | unsigned RetOpcode = MBBI->getOpcode(); |
1721 | 11.3k | if (RetOpcode == PPC::TCRETURNdi) { |
1722 | 2 | MBBI = MBB.getLastNonDebugInstr(); |
1723 | 2 | MachineOperand &JumpTarget = MBBI->getOperand(0); |
1724 | 2 | BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). |
1725 | 2 | addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); |
1726 | 11.3k | } else if (RetOpcode == PPC::TCRETURNri) { |
1727 | 0 | MBBI = MBB.getLastNonDebugInstr(); |
1728 | 0 | assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); |
1729 | 0 | BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); |
1730 | 11.3k | } else if (RetOpcode == PPC::TCRETURNai) { |
1731 | 0 | MBBI = MBB.getLastNonDebugInstr(); |
1732 | 0 | MachineOperand &JumpTarget = MBBI->getOperand(0); |
1733 | 0 | BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); |
1734 | 11.3k | } else if (RetOpcode == PPC::TCRETURNdi8) { |
1735 | 58 | MBBI = MBB.getLastNonDebugInstr(); |
1736 | 58 | MachineOperand &JumpTarget = MBBI->getOperand(0); |
1737 | 58 | BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). |
1738 | 58 | addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); |
1739 | 11.2k | } else if (RetOpcode == PPC::TCRETURNri8) { |
1740 | 0 | MBBI = MBB.getLastNonDebugInstr(); |
1741 | 0 | assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); |
1742 | 0 | BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); |
1743 | 11.2k | } else if (RetOpcode == PPC::TCRETURNai8) { |
1744 | 0 | MBBI = MBB.getLastNonDebugInstr(); |
1745 | 0 | MachineOperand &JumpTarget = MBBI->getOperand(0); |
1746 | 0 | BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); |
1747 | 0 | } |
1748 | 11.3k | } |
1749 | | |
1750 | | void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, |
1751 | | BitVector &SavedRegs, |
1752 | 11.3k | RegScavenger *RS) const { |
1753 | 11.3k | TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); |
1754 | 11.3k | |
1755 | 11.3k | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
1756 | 11.3k | |
1757 | 11.3k | // Save and clear the LR state. |
1758 | 11.3k | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
1759 | 11.3k | unsigned LR = RegInfo->getRARegister(); |
1760 | 11.3k | FI->setMustSaveLR(MustSaveLR(MF, LR)); |
1761 | 11.3k | SavedRegs.reset(LR); |
1762 | 11.3k | |
1763 | 11.3k | // Save R31 if necessary |
1764 | 11.3k | int FPSI = FI->getFramePointerSaveIndex(); |
1765 | 11.3k | bool isPPC64 = Subtarget.isPPC64(); |
1766 | 11.3k | bool isDarwinABI = Subtarget.isDarwinABI(); |
1767 | 11.3k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1768 | 11.3k | |
1769 | 11.3k | // If the frame pointer save index hasn't been defined yet. |
1770 | 11.3k | if (!FPSI && needsFP(MF)11.3k ) { |
1771 | 135 | // Find out what the fix offset of the frame pointer save area. |
1772 | 135 | int FPOffset = getFramePointerSaveOffset(); |
1773 | 135 | // Allocate the frame index for frame pointer save area. |
1774 | 135 | FPSI = MFI.CreateFixedObject(isPPC64? 8104 : 431 , FPOffset, true); |
1775 | 135 | // Save the result. |
1776 | 135 | FI->setFramePointerSaveIndex(FPSI); |
1777 | 135 | } |
1778 | 11.3k | |
1779 | 11.3k | int BPSI = FI->getBasePointerSaveIndex(); |
1780 | 11.3k | if (!BPSI && RegInfo->hasBasePointer(MF)) { |
1781 | 26 | int BPOffset = getBasePointerSaveOffset(); |
1782 | 26 | // Allocate the frame index for the base pointer save area. |
1783 | 26 | BPSI = MFI.CreateFixedObject(isPPC64? 813 : 413 , BPOffset, true); |
1784 | 26 | // Save the result. |
1785 | 26 | FI->setBasePointerSaveIndex(BPSI); |
1786 | 26 | } |
1787 | 11.3k | |
1788 | 11.3k | // Reserve stack space for the PIC Base register (R30). |
1789 | 11.3k | // Only used in SVR4 32-bit. |
1790 | 11.3k | if (FI->usesPICBase()) { |
1791 | 23 | int PBPSI = MFI.CreateFixedObject(4, -8, true); |
1792 | 23 | FI->setPICBasePointerSaveIndex(PBPSI); |
1793 | 23 | } |
1794 | 11.3k | |
1795 | 11.3k | // Make sure we don't explicitly spill r31, because, for example, we have |
1796 | 11.3k | // some inline asm which explicitly clobbers it, when we otherwise have a |
1797 | 11.3k | // frame pointer and are using r31's spill slot for the prologue/epilogue |
1798 | 11.3k | // code. Same goes for the base pointer and the PIC base register. |
1799 | 11.3k | if (needsFP(MF)) |
1800 | 158 | SavedRegs.reset(isPPC64 ? PPC::X31119 : PPC::R3139 ); |
1801 | 11.3k | if (RegInfo->hasBasePointer(MF)) |
1802 | 26 | SavedRegs.reset(RegInfo->getBaseRegister(MF)); |
1803 | 11.3k | if (FI->usesPICBase()) |
1804 | 23 | SavedRegs.reset(PPC::R30); |
1805 | 11.3k | |
1806 | 11.3k | // Reserve stack space to move the linkage area to in case of a tail call. |
1807 | 11.3k | int TCSPDelta = 0; |
1808 | 11.3k | if (MF.getTarget().Options.GuaranteedTailCallOpt && |
1809 | 11.3k | (TCSPDelta = FI->getTailCallSPDelta()) < 07 ) { |
1810 | 0 | MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); |
1811 | 0 | } |
1812 | 11.3k | |
1813 | 11.3k | // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the |
1814 | 11.3k | // function uses CR 2, 3, or 4. |
1815 | 11.3k | if (!isPPC64 && !isDarwinABI1.04k && |
1816 | 11.3k | (1.04k SavedRegs.test(PPC::CR2)1.04k || |
1817 | 1.04k | SavedRegs.test(PPC::CR3)1.03k || |
1818 | 1.04k | SavedRegs.test(PPC::CR4)1.03k )) { |
1819 | 11 | int FrameIdx = MFI.CreateFixedObject((uint64_t)4, (int64_t)-4, true); |
1820 | 11 | FI->setCRSpillFrameIndex(FrameIdx); |
1821 | 11 | } |
1822 | 11.3k | } |
1823 | | |
1824 | | void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, |
1825 | 11.2k | RegScavenger *RS) const { |
1826 | 11.2k | // Early exit if not using the SVR4 ABI. |
1827 | 11.2k | if (!Subtarget.isSVR4ABI()) { |
1828 | 0 | addScavengingSpillSlot(MF, RS); |
1829 | 0 | return; |
1830 | 0 | } |
1831 | 11.2k | |
1832 | 11.2k | // Get callee saved register information. |
1833 | 11.2k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1834 | 11.2k | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
1835 | 11.2k | |
1836 | 11.2k | // If the function is shrink-wrapped, and if the function has a tail call, the |
1837 | 11.2k | // tail call might not be in the new RestoreBlock, so real branch instruction |
1838 | 11.2k | // won't be generated by emitEpilogue(), because shrink-wrap has chosen new |
1839 | 11.2k | // RestoreBlock. So we handle this case here. |
1840 | 11.2k | if (MFI.getSavePoint() && MFI.hasTailCall()94 ) { |
1841 | 0 | MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); |
1842 | 0 | for (MachineBasicBlock &MBB : MF) { |
1843 | 0 | if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) |
1844 | 0 | createTailCallBranchInstr(MBB); |
1845 | 0 | } |
1846 | 0 | } |
1847 | 11.2k | |
1848 | 11.2k | // Early exit if no callee saved registers are modified! |
1849 | 11.2k | if (CSI.empty() && !needsFP(MF)10.8k ) { |
1850 | 10.7k | addScavengingSpillSlot(MF, RS); |
1851 | 10.7k | return; |
1852 | 10.7k | } |
1853 | 546 | |
1854 | 546 | unsigned MinGPR = PPC::R31; |
1855 | 546 | unsigned MinG8R = PPC::X31; |
1856 | 546 | unsigned MinFPR = PPC::F31; |
1857 | 546 | unsigned MinVR = Subtarget.hasSPE() ? PPC::S311 : PPC::V31545 ; |
1858 | 546 | |
1859 | 546 | bool HasGPSaveArea = false; |
1860 | 546 | bool HasG8SaveArea = false; |
1861 | 546 | bool HasFPSaveArea = false; |
1862 | 546 | bool HasVRSAVESaveArea = false; |
1863 | 546 | bool HasVRSaveArea = false; |
1864 | 546 | |
1865 | 546 | SmallVector<CalleeSavedInfo, 18> GPRegs; |
1866 | 546 | SmallVector<CalleeSavedInfo, 18> G8Regs; |
1867 | 546 | SmallVector<CalleeSavedInfo, 18> FPRegs; |
1868 | 546 | SmallVector<CalleeSavedInfo, 18> VRegs; |
1869 | 546 | |
1870 | 3.04k | for (unsigned i = 0, e = CSI.size(); i != e; ++i2.49k ) { |
1871 | 2.49k | unsigned Reg = CSI[i].getReg(); |
1872 | 2.49k | assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || |
1873 | 2.49k | (Reg != PPC::X2 && Reg != PPC::R2)) && |
1874 | 2.49k | "Not expecting to try to spill R2 in a function that must save TOC"); |
1875 | 2.49k | if (PPC::GPRCRegClass.contains(Reg) || |
1876 | 2.49k | PPC::SPE4RCRegClass.contains(Reg)2.29k ) { |
1877 | 208 | HasGPSaveArea = true; |
1878 | 208 | |
1879 | 208 | GPRegs.push_back(CSI[i]); |
1880 | 208 | |
1881 | 208 | if (Reg < MinGPR) { |
1882 | 51 | MinGPR = Reg; |
1883 | 51 | } |
1884 | 2.29k | } else if (PPC::G8RCRegClass.contains(Reg)) { |
1885 | 1.14k | HasG8SaveArea = true; |
1886 | 1.14k | |
1887 | 1.14k | G8Regs.push_back(CSI[i]); |
1888 | 1.14k | |
1889 | 1.14k | if (Reg < MinG8R) { |
1890 | 233 | MinG8R = Reg; |
1891 | 233 | } |
1892 | 1.14k | } else if (PPC::F8RCRegClass.contains(Reg)) { |
1893 | 810 | HasFPSaveArea = true; |
1894 | 810 | |
1895 | 810 | FPRegs.push_back(CSI[i]); |
1896 | 810 | |
1897 | 810 | if (Reg < MinFPR) { |
1898 | 104 | MinFPR = Reg; |
1899 | 104 | } |
1900 | 810 | } else if (336 PPC::CRBITRCRegClass.contains(Reg)336 || |
1901 | 336 | PPC::CRRCRegClass.contains(Reg)) { |
1902 | 99 | ; // do nothing, as we already know whether CRs are spilled |
1903 | 237 | } else if (PPC::VRSAVERCRegClass.contains(Reg)) { |
1904 | 0 | HasVRSAVESaveArea = true; |
1905 | 237 | } else if (PPC::VRRCRegClass.contains(Reg) || |
1906 | 237 | PPC::SPERCRegClass.contains(Reg)18 ) { |
1907 | 237 | // Altivec and SPE are mutually exclusive, but have the same stack |
1908 | 237 | // alignment requirements, so overload the save area for both cases. |
1909 | 237 | HasVRSaveArea = true; |
1910 | 237 | |
1911 | 237 | VRegs.push_back(CSI[i]); |
1912 | 237 | |
1913 | 237 | if (Reg < MinVR) { |
1914 | 25 | MinVR = Reg; |
1915 | 25 | } |
1916 | 237 | } else { |
1917 | 0 | llvm_unreachable("Unknown RegisterClass!"); |
1918 | 0 | } |
1919 | 2.49k | } |
1920 | 546 | |
1921 | 546 | PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); |
1922 | 546 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
1923 | 546 | |
1924 | 546 | int64_t LowerBound = 0; |
1925 | 546 | |
1926 | 546 | // Take into account stack space reserved for tail calls. |
1927 | 546 | int TCSPDelta = 0; |
1928 | 546 | if (MF.getTarget().Options.GuaranteedTailCallOpt && |
1929 | 546 | (TCSPDelta = PFI->getTailCallSPDelta()) < 03 ) { |
1930 | 0 | LowerBound = TCSPDelta; |
1931 | 0 | } |
1932 | 546 | |
1933 | 546 | // The Floating-point register save area is right below the back chain word |
1934 | 546 | // of the previous stack frame. |
1935 | 546 | if (HasFPSaveArea) { |
1936 | 951 | for (unsigned i = 0, e = FPRegs.size(); i != e; ++i810 ) { |
1937 | 810 | int FI = FPRegs[i].getFrameIdx(); |
1938 | 810 | |
1939 | 810 | MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); |
1940 | 810 | } |
1941 | 141 | |
1942 | 141 | LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; |
1943 | 141 | } |
1944 | 546 | |
1945 | 546 | // Check whether the frame pointer register is allocated. If so, make sure it |
1946 | 546 | // is spilled to the correct offset. |
1947 | 546 | if (needsFP(MF)) { |
1948 | 154 | int FI = PFI->getFramePointerSaveIndex(); |
1949 | 154 | assert(FI && "No Frame Pointer Save Slot!"); |
1950 | 154 | MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); |
1951 | 154 | // FP is R31/X31, so no need to update MinGPR/MinG8R. |
1952 | 154 | HasGPSaveArea = true; |
1953 | 154 | } |
1954 | 546 | |
1955 | 546 | if (PFI->usesPICBase()) { |
1956 | 11 | int FI = PFI->getPICBasePointerSaveIndex(); |
1957 | 11 | assert(FI && "No PIC Base Pointer Save Slot!"); |
1958 | 11 | MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); |
1959 | 11 | |
1960 | 11 | MinGPR = std::min<unsigned>(MinGPR, PPC::R30); |
1961 | 11 | HasGPSaveArea = true; |
1962 | 11 | } |
1963 | 546 | |
1964 | 546 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
1965 | 546 | if (RegInfo->hasBasePointer(MF)) { |
1966 | 21 | int FI = PFI->getBasePointerSaveIndex(); |
1967 | 21 | assert(FI && "No Base Pointer Save Slot!"); |
1968 | 21 | MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); |
1969 | 21 | |
1970 | 21 | unsigned BP = RegInfo->getBaseRegister(MF); |
1971 | 21 | if (PPC::G8RCRegClass.contains(BP)) { |
1972 | 9 | MinG8R = std::min<unsigned>(MinG8R, BP); |
1973 | 9 | HasG8SaveArea = true; |
1974 | 12 | } else if (PPC::GPRCRegClass.contains(BP)) { |
1975 | 12 | MinGPR = std::min<unsigned>(MinGPR, BP); |
1976 | 12 | HasGPSaveArea = true; |
1977 | 12 | } |
1978 | 21 | } |
1979 | 546 | |
1980 | 546 | // General register save area starts right below the Floating-point |
1981 | 546 | // register save area. |
1982 | 546 | if (HasGPSaveArea || HasG8SaveArea345 ) { |
1983 | 398 | // Move general register save area spill slots down, taking into account |
1984 | 398 | // the size of the Floating-point register save area. |
1985 | 606 | for (unsigned i = 0, e = GPRegs.size(); i != e; ++i208 ) { |
1986 | 208 | if (!GPRegs[i].isSpilledToReg()) { |
1987 | 208 | int FI = GPRegs[i].getFrameIdx(); |
1988 | 208 | MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); |
1989 | 208 | } |
1990 | 208 | } |
1991 | 398 | |
1992 | 398 | // Move general register save area spill slots down, taking into account |
1993 | 398 | // the size of the Floating-point register save area. |
1994 | 1.54k | for (unsigned i = 0, e = G8Regs.size(); i != e; ++i1.14k ) { |
1995 | 1.14k | if (!G8Regs[i].isSpilledToReg()) { |
1996 | 1.13k | int FI = G8Regs[i].getFrameIdx(); |
1997 | 1.13k | MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); |
1998 | 1.13k | } |
1999 | 1.14k | } |
2000 | 398 | |
2001 | 398 | unsigned MinReg = |
2002 | 398 | std::min<unsigned>(TRI->getEncodingValue(MinGPR), |
2003 | 398 | TRI->getEncodingValue(MinG8R)); |
2004 | 398 | |
2005 | 398 | if (Subtarget.isPPC64()) { |
2006 | 313 | LowerBound -= (31 - MinReg + 1) * 8; |
2007 | 313 | } else { |
2008 | 85 | LowerBound -= (31 - MinReg + 1) * 4; |
2009 | 85 | } |
2010 | 398 | } |
2011 | 546 | |
2012 | 546 | // For 32-bit only, the CR save area is below the general register |
2013 | 546 | // save area. For 64-bit SVR4, the CR save area is addressed relative |
2014 | 546 | // to the stack pointer and hence does not need an adjustment here. |
2015 | 546 | // Only CR2 (the first nonvolatile spilled) has an associated frame |
2016 | 546 | // index so that we have a single uniform save area. |
2017 | 546 | if (spillsCR(MF) && !(18 Subtarget.isPPC64()18 && Subtarget.isSVR4ABI()7 )) { |
2018 | 11 | // Adjust the frame index of the CR spill slot. |
2019 | 86 | for (unsigned i = 0, e = CSI.size(); i != e; ++i75 ) { |
2020 | 75 | unsigned Reg = CSI[i].getReg(); |
2021 | 75 | |
2022 | 75 | if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2) |
2023 | 75 | // Leave Darwin logic as-is. |
2024 | 75 | || (64 !Subtarget.isSVR4ABI()64 && |
2025 | 64 | (0 PPC::CRBITRCRegClass.contains(Reg)0 || |
2026 | 11 | PPC::CRRCRegClass.contains(Reg)0 ))) { |
2027 | 11 | int FI = CSI[i].getFrameIdx(); |
2028 | 11 | |
2029 | 11 | MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); |
2030 | 11 | } |
2031 | 75 | } |
2032 | 11 | |
2033 | 11 | LowerBound -= 4; // The CR save area is always 4 bytes long. |
2034 | 11 | } |
2035 | 546 | |
2036 | 546 | if (HasVRSAVESaveArea) { |
2037 | 0 | // FIXME SVR4: Is it actually possible to have multiple elements in CSI |
2038 | 0 | // which have the VRSAVE register class? |
2039 | 0 | // Adjust the frame index of the VRSAVE spill slot. |
2040 | 0 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) { |
2041 | 0 | unsigned Reg = CSI[i].getReg(); |
2042 | 0 |
|
2043 | 0 | if (PPC::VRSAVERCRegClass.contains(Reg)) { |
2044 | 0 | int FI = CSI[i].getFrameIdx(); |
2045 | 0 |
|
2046 | 0 | MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); |
2047 | 0 | } |
2048 | 0 | } |
2049 | 0 |
|
2050 | 0 | LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. |
2051 | 0 | } |
2052 | 546 | |
2053 | 546 | // Both Altivec and SPE have the same alignment and padding requirements |
2054 | 546 | // within the stack frame. |
2055 | 546 | if (HasVRSaveArea) { |
2056 | 71 | // Insert alignment padding, we need 16-byte alignment. Note: for positive |
2057 | 71 | // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since |
2058 | 71 | // we are using negative number here (the stack grows downward). We should |
2059 | 71 | // use formula : y = x & (~(n-1)). Where x is the size before aligning, n |
2060 | 71 | // is the alignment size ( n = 16 here) and y is the size after aligning. |
2061 | 71 | assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); |
2062 | 71 | LowerBound &= ~(15); |
2063 | 71 | |
2064 | 308 | for (unsigned i = 0, e = VRegs.size(); i != e; ++i237 ) { |
2065 | 237 | int FI = VRegs[i].getFrameIdx(); |
2066 | 237 | |
2067 | 237 | MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); |
2068 | 237 | } |
2069 | 71 | } |
2070 | 546 | |
2071 | 546 | addScavengingSpillSlot(MF, RS); |
2072 | 546 | } |
2073 | | |
2074 | | void |
2075 | | PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, |
2076 | 11.2k | RegScavenger *RS) const { |
2077 | 11.2k | // Reserve a slot closest to SP or frame pointer if we have a dynalloc or |
2078 | 11.2k | // a large stack, which will require scavenging a register to materialize a |
2079 | 11.2k | // large offset. |
2080 | 11.2k | |
2081 | 11.2k | // We need to have a scavenger spill slot for spills if the frame size is |
2082 | 11.2k | // large. In case there is no free register for large-offset addressing, |
2083 | 11.2k | // this slot is used for the necessary emergency spill. Also, we need the |
2084 | 11.2k | // slot for dynamic stack allocations. |
2085 | 11.2k | |
2086 | 11.2k | // The scavenger might be invoked if the frame offset does not fit into |
2087 | 11.2k | // the 16-bit immediate. We don't know the complete frame size here |
2088 | 11.2k | // because we've not yet computed callee-saved register spills or the |
2089 | 11.2k | // needed alignment padding. |
2090 | 11.2k | unsigned StackSize = determineFrameLayout(MF, true); |
2091 | 11.2k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
2092 | 11.2k | if (MFI.hasVarSizedObjects() || spillsCR(MF)11.2k || spillsVRSAVE(MF)11.2k || |
2093 | 11.2k | hasNonRISpills(MF)11.2k || (11.1k hasSpills(MF)11.1k && !isInt<16>(StackSize)406 )) { |
2094 | 153 | const TargetRegisterClass &GPRC = PPC::GPRCRegClass; |
2095 | 153 | const TargetRegisterClass &G8RC = PPC::G8RCRegClass; |
2096 | 153 | const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC132 : GPRC21 ; |
2097 | 153 | const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); |
2098 | 153 | unsigned Size = TRI.getSpillSize(RC); |
2099 | 153 | unsigned Align = TRI.getSpillAlignment(RC); |
2100 | 153 | RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); |
2101 | 153 | |
2102 | 153 | // Might we have over-aligned allocas? |
2103 | 153 | bool HasAlVars = MFI.hasVarSizedObjects() && |
2104 | 153 | MFI.getMaxAlignment() > getStackAlignment()21 ; |
2105 | 153 | |
2106 | 153 | // These kinds of spills might need two registers. |
2107 | 153 | if (spillsCR(MF) || spillsVRSAVE(MF)135 || HasAlVars135 ) |
2108 | 21 | RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); |
2109 | 153 | |
2110 | 153 | } |
2111 | 11.2k | } |
2112 | | |
2113 | | // This function checks if a callee saved gpr can be spilled to a volatile |
2114 | | // vector register. This occurs for leaf functions when the option |
2115 | | // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers |
2116 | | // which were not spilled to vectors, return false so the target independent |
2117 | | // code can handle them by assigning a FrameIdx to a stack slot. |
2118 | | bool PPCFrameLowering::assignCalleeSavedSpillSlots( |
2119 | | MachineFunction &MF, const TargetRegisterInfo *TRI, |
2120 | 11.2k | std::vector<CalleeSavedInfo> &CSI) const { |
2121 | 11.2k | |
2122 | 11.2k | if (CSI.empty()) |
2123 | 10.8k | return true; // Early exit if no callee saved registers are modified! |
2124 | 435 | |
2125 | 435 | // Early exit if cannot spill gprs to volatile vector registers. |
2126 | 435 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
2127 | 435 | if (!EnablePEVectorSpills || MFI.hasCalls()2 || !Subtarget.hasP9Vector()2 ) |
2128 | 433 | return false; |
2129 | 2 | |
2130 | 2 | // Build a BitVector of VSRs that can be used for spilling GPRs. |
2131 | 2 | BitVector BVAllocatable = TRI->getAllocatableSet(MF); |
2132 | 2 | BitVector BVCalleeSaved(TRI->getNumRegs()); |
2133 | 2 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
2134 | 2 | const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); |
2135 | 106 | for (unsigned i = 0; CSRegs[i]; ++i104 ) |
2136 | 104 | BVCalleeSaved.set(CSRegs[i]); |
2137 | 2 | |
2138 | 584 | for (unsigned Reg : BVAllocatable.set_bits()) { |
2139 | 584 | // Set to 0 if the register is not a volatile VF/F8 register, or if it is |
2140 | 584 | // used in the function. |
2141 | 584 | if (BVCalleeSaved[Reg] || |
2142 | 584 | (480 !PPC::F8RCRegClass.contains(Reg)480 && |
2143 | 480 | !PPC::VFRCRegClass.contains(Reg)452 ) || |
2144 | 584 | (MF.getRegInfo().isPhysRegUsed(Reg))92 ) |
2145 | 494 | BVAllocatable.reset(Reg); |
2146 | 584 | } |
2147 | 2 | |
2148 | 2 | bool AllSpilledToReg = true; |
2149 | 7 | for (auto &CS : CSI) { |
2150 | 7 | if (BVAllocatable.none()) |
2151 | 0 | return false; |
2152 | 7 | |
2153 | 7 | unsigned Reg = CS.getReg(); |
2154 | 7 | if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)1 ) { |
2155 | 1 | AllSpilledToReg = false; |
2156 | 1 | continue; |
2157 | 1 | } |
2158 | 6 | |
2159 | 6 | unsigned VolatileVFReg = BVAllocatable.find_first(); |
2160 | 6 | if (VolatileVFReg < BVAllocatable.size()) { |
2161 | 6 | CS.setDstReg(VolatileVFReg); |
2162 | 6 | BVAllocatable.reset(VolatileVFReg); |
2163 | 6 | } else { |
2164 | 0 | AllSpilledToReg = false; |
2165 | 0 | } |
2166 | 6 | } |
2167 | 2 | return AllSpilledToReg; |
2168 | 2 | } |
2169 | | |
2170 | | |
2171 | | bool |
2172 | | PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, |
2173 | | MachineBasicBlock::iterator MI, |
2174 | | const std::vector<CalleeSavedInfo> &CSI, |
2175 | 435 | const TargetRegisterInfo *TRI) const { |
2176 | 435 | |
2177 | 435 | // Currently, this function only handles SVR4 32- and 64-bit ABIs. |
2178 | 435 | // Return false otherwise to maintain pre-existing behavior. |
2179 | 435 | if (!Subtarget.isSVR4ABI()) |
2180 | 0 | return false; |
2181 | 435 | |
2182 | 435 | MachineFunction *MF = MBB.getParent(); |
2183 | 435 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
2184 | 435 | PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); |
2185 | 435 | bool MustSaveTOC = FI->mustSaveTOC(); |
2186 | 435 | DebugLoc DL; |
2187 | 435 | bool CRSpilled = false; |
2188 | 435 | MachineInstrBuilder CRMIB; |
2189 | 435 | |
2190 | 2.93k | for (unsigned i = 0, e = CSI.size(); i != e; ++i2.49k ) { |
2191 | 2.49k | unsigned Reg = CSI[i].getReg(); |
2192 | 2.49k | // Only Darwin actually uses the VRSAVE register, but it can still appear |
2193 | 2.49k | // here if, for example, @llvm.eh.unwind.init() is used. If we're not on |
2194 | 2.49k | // Darwin, ignore it. |
2195 | 2.49k | if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()0 ) |
2196 | 0 | continue; |
2197 | 2.49k | |
2198 | 2.49k | // CR2 through CR4 are the nonvolatile CR fields. |
2199 | 2.49k | bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; |
2200 | 2.49k | |
2201 | 2.49k | // Add the callee-saved register as live-in; it's killed at the spill. |
2202 | 2.49k | // Do not do this for callee-saved registers that are live-in to the |
2203 | 2.49k | // function because they will already be marked live-in and this will be |
2204 | 2.49k | // adding it for a second time. It is an error to add the same register |
2205 | 2.49k | // to the set more than once. |
2206 | 2.49k | const MachineRegisterInfo &MRI = MF->getRegInfo(); |
2207 | 2.49k | bool IsLiveIn = MRI.isLiveIn(Reg); |
2208 | 2.49k | if (!IsLiveIn) |
2209 | 2.49k | MBB.addLiveIn(Reg); |
2210 | 2.49k | |
2211 | 2.49k | if (CRSpilled && IsCRField19 ) { |
2212 | 8 | CRMIB.addReg(Reg, RegState::ImplicitKill); |
2213 | 8 | continue; |
2214 | 8 | } |
2215 | 2.49k | |
2216 | 2.49k | // The actual spill will happen in the prologue. |
2217 | 2.49k | if ((Reg == PPC::X2 || Reg == PPC::R22.48k ) && MustSaveTOC6 ) |
2218 | 0 | continue; |
2219 | 2.49k | |
2220 | 2.49k | // Insert the spill to the stack frame. |
2221 | 2.49k | if (IsCRField) { |
2222 | 91 | PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); |
2223 | 91 | if (Subtarget.isPPC64()) { |
2224 | 80 | // The actual spill will happen at the start of the prologue. |
2225 | 80 | FuncInfo->addMustSaveCR(Reg); |
2226 | 80 | } else { |
2227 | 11 | CRSpilled = true; |
2228 | 11 | FuncInfo->setSpillsCR(); |
2229 | 11 | |
2230 | 11 | // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have |
2231 | 11 | // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. |
2232 | 11 | CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) |
2233 | 11 | .addReg(Reg, RegState::ImplicitKill); |
2234 | 11 | |
2235 | 11 | MBB.insert(MI, CRMIB); |
2236 | 11 | MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) |
2237 | 11 | .addReg(PPC::R12, |
2238 | 11 | getKillRegState(true)), |
2239 | 11 | CSI[i].getFrameIdx())); |
2240 | 11 | } |
2241 | 2.39k | } else { |
2242 | 2.39k | if (CSI[i].isSpilledToReg()) { |
2243 | 6 | NumPESpillVSR++; |
2244 | 6 | BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) |
2245 | 6 | .addReg(Reg, getKillRegState(true)); |
2246 | 2.39k | } else { |
2247 | 2.39k | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); |
2248 | 2.39k | // Use !IsLiveIn for the kill flag. |
2249 | 2.39k | // We do not want to kill registers that are live in this function |
2250 | 2.39k | // before their use because they will become undefined registers. |
2251 | 2.39k | TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, |
2252 | 2.39k | CSI[i].getFrameIdx(), RC, TRI); |
2253 | 2.39k | } |
2254 | 2.39k | } |
2255 | 2.49k | } |
2256 | 435 | return true; |
2257 | 435 | } |
2258 | | |
2259 | | static void |
2260 | | restoreCRs(bool isPPC64, bool is31, |
2261 | | bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, |
2262 | | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
2263 | 45 | const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) { |
2264 | 45 | |
2265 | 45 | MachineFunction *MF = MBB.getParent(); |
2266 | 45 | const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); |
2267 | 45 | DebugLoc DL; |
2268 | 45 | unsigned RestoreOp, MoveReg; |
2269 | 45 | |
2270 | 45 | if (isPPC64) |
2271 | 32 | // This is handled during epilogue generation. |
2272 | 32 | return; |
2273 | 13 | else { |
2274 | 13 | // 32-bit: FP-relative |
2275 | 13 | MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), |
2276 | 13 | PPC::R12), |
2277 | 13 | CSI[CSIIndex].getFrameIdx())); |
2278 | 13 | RestoreOp = PPC::MTOCRF; |
2279 | 13 | MoveReg = PPC::R12; |
2280 | 13 | } |
2281 | 45 | |
2282 | 45 | if (13 CR2Spilled13 ) |
2283 | 13 | MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) |
2284 | 13 | .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled8 ))); |
2285 | 13 | |
2286 | 13 | if (CR3Spilled) |
2287 | 5 | MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) |
2288 | 5 | .addReg(MoveReg, getKillRegState(!CR4Spilled))); |
2289 | 13 | |
2290 | 13 | if (CR4Spilled) |
2291 | 3 | MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) |
2292 | 3 | .addReg(MoveReg, getKillRegState(true))); |
2293 | 13 | } |
2294 | | |
2295 | | MachineBasicBlock::iterator PPCFrameLowering:: |
2296 | | eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, |
2297 | 4.54k | MachineBasicBlock::iterator I) const { |
2298 | 4.54k | const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); |
2299 | 4.54k | if (MF.getTarget().Options.GuaranteedTailCallOpt && |
2300 | 4.54k | I->getOpcode() == PPC::ADJCALLSTACKUP6 ) { |
2301 | 3 | // Add (actually subtract) back the amount the callee popped on return. |
2302 | 3 | if (int CalleeAmt = I->getOperand(1).getImm()) { |
2303 | 0 | bool is64Bit = Subtarget.isPPC64(); |
2304 | 0 | CalleeAmt *= -1; |
2305 | 0 | unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; |
2306 | 0 | unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; |
2307 | 0 | unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; |
2308 | 0 | unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; |
2309 | 0 | unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; |
2310 | 0 | unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; |
2311 | 0 | const DebugLoc &dl = I->getDebugLoc(); |
2312 | 0 |
|
2313 | 0 | if (isInt<16>(CalleeAmt)) { |
2314 | 0 | BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) |
2315 | 0 | .addReg(StackReg, RegState::Kill) |
2316 | 0 | .addImm(CalleeAmt); |
2317 | 0 | } else { |
2318 | 0 | MachineBasicBlock::iterator MBBI = I; |
2319 | 0 | BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) |
2320 | 0 | .addImm(CalleeAmt >> 16); |
2321 | 0 | BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) |
2322 | 0 | .addReg(TmpReg, RegState::Kill) |
2323 | 0 | .addImm(CalleeAmt & 0xFFFF); |
2324 | 0 | BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) |
2325 | 0 | .addReg(StackReg, RegState::Kill) |
2326 | 0 | .addReg(TmpReg); |
2327 | 0 | } |
2328 | 0 | } |
2329 | 3 | } |
2330 | 4.54k | // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. |
2331 | 4.54k | return MBB.erase(I); |
2332 | 4.54k | } |
2333 | | |
2334 | | bool |
2335 | | PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, |
2336 | | MachineBasicBlock::iterator MI, |
2337 | | std::vector<CalleeSavedInfo> &CSI, |
2338 | 433 | const TargetRegisterInfo *TRI) const { |
2339 | 433 | |
2340 | 433 | // Currently, this function only handles SVR4 32- and 64-bit ABIs. |
2341 | 433 | // Return false otherwise to maintain pre-existing behavior. |
2342 | 433 | if (!Subtarget.isSVR4ABI()) |
2343 | 0 | return false; |
2344 | 433 | |
2345 | 433 | MachineFunction *MF = MBB.getParent(); |
2346 | 433 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
2347 | 433 | PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); |
2348 | 433 | bool MustSaveTOC = FI->mustSaveTOC(); |
2349 | 433 | bool CR2Spilled = false; |
2350 | 433 | bool CR3Spilled = false; |
2351 | 433 | bool CR4Spilled = false; |
2352 | 433 | unsigned CSIIndex = 0; |
2353 | 433 | |
2354 | 433 | // Initialize insertion-point logic; we will be restoring in reverse |
2355 | 433 | // order of spill. |
2356 | 433 | MachineBasicBlock::iterator I = MI, BeforeI = I; |
2357 | 433 | bool AtStart = I == MBB.begin(); |
2358 | 433 | |
2359 | 433 | if (!AtStart) |
2360 | 335 | --BeforeI; |
2361 | 433 | |
2362 | 2.89k | for (unsigned i = 0, e = CSI.size(); i != e; ++i2.46k ) { |
2363 | 2.46k | unsigned Reg = CSI[i].getReg(); |
2364 | 2.46k | |
2365 | 2.46k | // Only Darwin actually uses the VRSAVE register, but it can still appear |
2366 | 2.46k | // here if, for example, @llvm.eh.unwind.init() is used. If we're not on |
2367 | 2.46k | // Darwin, ignore it. |
2368 | 2.46k | if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()0 ) |
2369 | 0 | continue; |
2370 | 2.46k | |
2371 | 2.46k | if ((Reg == PPC::X2 || Reg == PPC::R22.46k ) && MustSaveTOC6 ) |
2372 | 0 | continue; |
2373 | 2.46k | |
2374 | 2.46k | if (Reg == PPC::CR2) { |
2375 | 45 | CR2Spilled = true; |
2376 | 45 | // The spill slot is associated only with CR2, which is the |
2377 | 45 | // first nonvolatile spilled. Save it here. |
2378 | 45 | CSIIndex = i; |
2379 | 45 | continue; |
2380 | 2.42k | } else if (Reg == PPC::CR3) { |
2381 | 29 | CR3Spilled = true; |
2382 | 29 | continue; |
2383 | 2.39k | } else if (Reg == PPC::CR4) { |
2384 | 27 | CR4Spilled = true; |
2385 | 27 | continue; |
2386 | 2.36k | } else { |
2387 | 2.36k | // When we first encounter a non-CR register after seeing at |
2388 | 2.36k | // least one CR register, restore all spilled CRs together. |
2389 | 2.36k | if ((CR2Spilled || CR3Spilled2.35k || CR4Spilled2.35k ) |
2390 | 2.36k | && !(13 PPC::CR2 <= Reg13 && Reg <= PPC::CR413 )) { |
2391 | 13 | bool is31 = needsFP(*MF); |
2392 | 13 | restoreCRs(Subtarget.isPPC64(), is31, |
2393 | 13 | CR2Spilled, CR3Spilled, CR4Spilled, |
2394 | 13 | MBB, I, CSI, CSIIndex); |
2395 | 13 | CR2Spilled = CR3Spilled = CR4Spilled = false; |
2396 | 13 | } |
2397 | 2.36k | |
2398 | 2.36k | if (CSI[i].isSpilledToReg()) { |
2399 | 6 | DebugLoc DL; |
2400 | 6 | NumPEReloadVSR++; |
2401 | 6 | BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) |
2402 | 6 | .addReg(CSI[i].getDstReg(), getKillRegState(true)); |
2403 | 2.35k | } else { |
2404 | 2.35k | // Default behavior for non-CR saves. |
2405 | 2.35k | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); |
2406 | 2.35k | TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); |
2407 | 2.35k | assert(I != MBB.begin() && |
2408 | 2.35k | "loadRegFromStackSlot didn't insert any code!"); |
2409 | 2.35k | } |
2410 | 2.36k | } |
2411 | 2.46k | |
2412 | 2.46k | // Insert in reverse order. |
2413 | 2.46k | if (2.36k AtStart2.36k ) |
2414 | 471 | I = MBB.begin(); |
2415 | 1.89k | else { |
2416 | 1.89k | I = BeforeI; |
2417 | 1.89k | ++I; |
2418 | 1.89k | } |
2419 | 2.36k | } |
2420 | 433 | |
2421 | 433 | // If we haven't yet spilled the CRs, do so now. |
2422 | 433 | if (CR2Spilled || CR3Spilled401 || CR4Spilled401 ) { |
2423 | 32 | bool is31 = needsFP(*MF); |
2424 | 32 | restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled, |
2425 | 32 | MBB, I, CSI, CSIIndex); |
2426 | 32 | } |
2427 | 433 | |
2428 | 433 | return true; |
2429 | 433 | } |
2430 | | |
2431 | 10.4k | bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { |
2432 | 10.4k | if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) |
2433 | 3 | return false; |
2434 | 10.4k | return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && |
2435 | 10.4k | MF.getSubtarget<PPCSubtarget>().isPPC64()); |
2436 | 10.4k | } |