/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- Thumb1FrameLowering.cpp - Thumb1 Frame Information -----------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file contains the Thumb1 implementation of TargetFrameLowering class. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "Thumb1FrameLowering.h" |
14 | | #include "ARMBaseInstrInfo.h" |
15 | | #include "ARMBaseRegisterInfo.h" |
16 | | #include "ARMMachineFunctionInfo.h" |
17 | | #include "ARMSubtarget.h" |
18 | | #include "Thumb1InstrInfo.h" |
19 | | #include "ThumbRegisterInfo.h" |
20 | | #include "Utils/ARMBaseInfo.h" |
21 | | #include "llvm/ADT/BitVector.h" |
22 | | #include "llvm/ADT/STLExtras.h" |
23 | | #include "llvm/ADT/SmallVector.h" |
24 | | #include "llvm/CodeGen/LivePhysRegs.h" |
25 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
26 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
27 | | #include "llvm/CodeGen/MachineFunction.h" |
28 | | #include "llvm/CodeGen/MachineInstr.h" |
29 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
30 | | #include "llvm/CodeGen/MachineModuleInfo.h" |
31 | | #include "llvm/CodeGen/MachineOperand.h" |
32 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
33 | | #include "llvm/CodeGen/TargetInstrInfo.h" |
34 | | #include "llvm/CodeGen/TargetOpcodes.h" |
35 | | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
36 | | #include "llvm/IR/DebugLoc.h" |
37 | | #include "llvm/MC/MCContext.h" |
38 | | #include "llvm/MC/MCDwarf.h" |
39 | | #include "llvm/MC/MCRegisterInfo.h" |
40 | | #include "llvm/Support/Compiler.h" |
41 | | #include "llvm/Support/ErrorHandling.h" |
42 | | #include "llvm/Support/MathExtras.h" |
43 | | #include <bitset> |
44 | | #include <cassert> |
45 | | #include <iterator> |
46 | | #include <vector> |
47 | | |
48 | | using namespace llvm; |
49 | | |
50 | | Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti) |
51 | 740 | : ARMFrameLowering(sti) {} |
52 | | |
53 | 29.8k | bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{ |
54 | 29.8k | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
55 | 29.8k | unsigned CFSize = MFI.getMaxCallFrameSize(); |
56 | 29.8k | // It's not always a good idea to include the call frame as part of the |
57 | 29.8k | // stack frame. ARM (especially Thumb) has small immediate offset to |
58 | 29.8k | // address the stack frame. So a large call frame can cause poor codegen |
59 | 29.8k | // and may even makes it impossible to scavenge a register. |
60 | 29.8k | if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 |
61 | 303 | return false; |
62 | 29.5k | |
63 | 29.5k | return !MFI.hasVarSizedObjects(); |
64 | 29.5k | } |
65 | | |
66 | | static void |
67 | | emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB, |
68 | | MachineBasicBlock::iterator &MBBI, |
69 | | const TargetInstrInfo &TII, const DebugLoc &dl, |
70 | | const ThumbRegisterInfo &MRI, int NumBytes, |
71 | 1.36k | unsigned ScratchReg, unsigned MIFlags) { |
72 | 1.36k | // If it would take more than three instructions to adjust the stack pointer |
73 | 1.36k | // using tADDspi/tSUBspi, load an immediate instead. |
74 | 1.36k | if (std::abs(NumBytes) > 508 * 3) { |
75 | 61 | // We use a different codepath here from the normal |
76 | 61 | // emitThumbRegPlusImmediate so we don't have to deal with register |
77 | 61 | // scavenging. (Scavenging could try to use the emergency spill slot |
78 | 61 | // before we've actually finished setting up the stack.) |
79 | 61 | if (ScratchReg == ARM::NoRegister) |
80 | 0 | report_fatal_error("Failed to emit Thumb1 stack adjustment"); |
81 | 61 | MachineFunction &MF = *MBB.getParent(); |
82 | 61 | const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>(); |
83 | 61 | if (ST.genExecuteOnly()) { |
84 | 4 | BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ScratchReg) |
85 | 4 | .addImm(NumBytes).setMIFlags(MIFlags); |
86 | 57 | } else { |
87 | 57 | MRI.emitLoadConstPool(MBB, MBBI, dl, ScratchReg, 0, NumBytes, ARMCC::AL, |
88 | 57 | 0, MIFlags); |
89 | 57 | } |
90 | 61 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDhirr), ARM::SP) |
91 | 61 | .addReg(ARM::SP).addReg(ScratchReg, RegState::Kill) |
92 | 61 | .add(predOps(ARMCC::AL)); |
93 | 61 | return; |
94 | 61 | } |
95 | 1.30k | // FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate |
96 | 1.30k | // won't change. |
97 | 1.30k | emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, |
98 | 1.30k | MRI, MIFlags); |
99 | 1.30k | |
100 | 1.30k | } |
101 | | |
102 | | static void emitCallSPUpdate(MachineBasicBlock &MBB, |
103 | | MachineBasicBlock::iterator &MBBI, |
104 | | const TargetInstrInfo &TII, const DebugLoc &dl, |
105 | | const ThumbRegisterInfo &MRI, int NumBytes, |
106 | 22 | unsigned MIFlags = MachineInstr::NoFlags) { |
107 | 22 | emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, |
108 | 22 | MRI, MIFlags); |
109 | 22 | } |
110 | | |
111 | | |
112 | | MachineBasicBlock::iterator Thumb1FrameLowering:: |
113 | | eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, |
114 | 3.30k | MachineBasicBlock::iterator I) const { |
115 | 3.30k | const Thumb1InstrInfo &TII = |
116 | 3.30k | *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); |
117 | 3.30k | const ThumbRegisterInfo *RegInfo = |
118 | 3.30k | static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); |
119 | 3.30k | if (!hasReservedCallFrame(MF)) { |
120 | 136 | // If we have alloca, convert as follows: |
121 | 136 | // ADJCALLSTACKDOWN -> sub, sp, sp, amount |
122 | 136 | // ADJCALLSTACKUP -> add, sp, sp, amount |
123 | 136 | MachineInstr &Old = *I; |
124 | 136 | DebugLoc dl = Old.getDebugLoc(); |
125 | 136 | unsigned Amount = TII.getFrameSize(Old); |
126 | 136 | if (Amount != 0) { |
127 | 22 | // We need to keep the stack aligned properly. To do this, we round the |
128 | 22 | // amount of space needed for the outgoing arguments up to the next |
129 | 22 | // alignment boundary. |
130 | 22 | Amount = alignTo(Amount, getStackAlignment()); |
131 | 22 | |
132 | 22 | // Replace the pseudo instruction with a new instruction... |
133 | 22 | unsigned Opc = Old.getOpcode(); |
134 | 22 | if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN11 ) { |
135 | 11 | emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount); |
136 | 11 | } else { |
137 | 11 | assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); |
138 | 11 | emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, Amount); |
139 | 11 | } |
140 | 22 | } |
141 | 136 | } |
142 | 3.30k | return MBB.erase(I); |
143 | 3.30k | } |
144 | | |
145 | | void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, |
146 | 1.55k | MachineBasicBlock &MBB) const { |
147 | 1.55k | MachineBasicBlock::iterator MBBI = MBB.begin(); |
148 | 1.55k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
149 | 1.55k | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
150 | 1.55k | MachineModuleInfo &MMI = MF.getMMI(); |
151 | 1.55k | const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); |
152 | 1.55k | const ThumbRegisterInfo *RegInfo = |
153 | 1.55k | static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); |
154 | 1.55k | const Thumb1InstrInfo &TII = |
155 | 1.55k | *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); |
156 | 1.55k | |
157 | 1.55k | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); |
158 | 1.55k | unsigned NumBytes = MFI.getStackSize(); |
159 | 1.55k | assert(NumBytes >= ArgRegsSaveSize && |
160 | 1.55k | "ArgRegsSaveSize is included in NumBytes"); |
161 | 1.55k | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
162 | 1.55k | |
163 | 1.55k | // Debug location must be unknown since the first debug location is used |
164 | 1.55k | // to determine the end of the prologue. |
165 | 1.55k | DebugLoc dl; |
166 | 1.55k | |
167 | 1.55k | unsigned FramePtr = RegInfo->getFrameRegister(MF); |
168 | 1.55k | unsigned BasePtr = RegInfo->getBaseRegister(); |
169 | 1.55k | int CFAOffset = 0; |
170 | 1.55k | |
171 | 1.55k | // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. |
172 | 1.55k | NumBytes = (NumBytes + 3) & ~3; |
173 | 1.55k | MFI.setStackSize(NumBytes); |
174 | 1.55k | |
175 | 1.55k | // Determine the sizes of each callee-save spill areas and record which frame |
176 | 1.55k | // belongs to which callee-save spill areas. |
177 | 1.55k | unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; |
178 | 1.55k | int FramePtrSpillFI = 0; |
179 | 1.55k | |
180 | 1.55k | if (ArgRegsSaveSize) { |
181 | 18 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize, |
182 | 18 | ARM::NoRegister, MachineInstr::FrameSetup); |
183 | 18 | CFAOffset -= ArgRegsSaveSize; |
184 | 18 | unsigned CFIIndex = MF.addFrameInst( |
185 | 18 | MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); |
186 | 18 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
187 | 18 | .addCFIIndex(CFIIndex) |
188 | 18 | .setMIFlags(MachineInstr::FrameSetup); |
189 | 18 | } |
190 | 1.55k | |
191 | 1.55k | if (!AFI->hasStackFrame()) { |
192 | 693 | if (NumBytes - ArgRegsSaveSize != 0) { |
193 | 28 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, |
194 | 28 | -(NumBytes - ArgRegsSaveSize), |
195 | 28 | ARM::NoRegister, MachineInstr::FrameSetup); |
196 | 28 | CFAOffset -= NumBytes - ArgRegsSaveSize; |
197 | 28 | unsigned CFIIndex = MF.addFrameInst( |
198 | 28 | MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); |
199 | 28 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
200 | 28 | .addCFIIndex(CFIIndex) |
201 | 28 | .setMIFlags(MachineInstr::FrameSetup); |
202 | 28 | } |
203 | 693 | return; |
204 | 693 | } |
205 | 861 | |
206 | 3.50k | for (unsigned i = 0, e = CSI.size(); 861 i != e; ++i2.64k ) { |
207 | 2.64k | unsigned Reg = CSI[i].getReg(); |
208 | 2.64k | int FI = CSI[i].getFrameIdx(); |
209 | 2.64k | switch (Reg) { |
210 | 2.64k | case ARM::R8: |
211 | 55 | case ARM::R9: |
212 | 55 | case ARM::R10: |
213 | 55 | case ARM::R11: |
214 | 55 | if (STI.splitFramePushPop(MF)) { |
215 | 55 | GPRCS2Size += 4; |
216 | 55 | break; |
217 | 55 | } |
218 | 0 | LLVM_FALLTHROUGH; |
219 | 2.58k | case ARM::R4: |
220 | 2.58k | case ARM::R5: |
221 | 2.58k | case ARM::R6: |
222 | 2.58k | case ARM::R7: |
223 | 2.58k | case ARM::LR: |
224 | 2.58k | if (Reg == FramePtr) |
225 | 171 | FramePtrSpillFI = FI; |
226 | 2.58k | GPRCS1Size += 4; |
227 | 2.58k | break; |
228 | 2.58k | default: |
229 | 0 | DPRCSSize += 8; |
230 | 2.64k | } |
231 | 2.64k | } |
232 | 861 | |
233 | 861 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { |
234 | 859 | ++MBBI; |
235 | 859 | } |
236 | 861 | |
237 | 861 | // Determine starting offsets of spill areas. |
238 | 861 | unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize); |
239 | 861 | unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; |
240 | 861 | unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; |
241 | 861 | bool HasFP = hasFP(MF); |
242 | 861 | if (HasFP) |
243 | 171 | AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + |
244 | 171 | NumBytes); |
245 | 861 | AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); |
246 | 861 | AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); |
247 | 861 | AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); |
248 | 861 | NumBytes = DPRCSOffset; |
249 | 861 | |
250 | 861 | int FramePtrOffsetInBlock = 0; |
251 | 861 | unsigned adjustedGPRCS1Size = GPRCS1Size; |
252 | 861 | if (GPRCS1Size > 0 && GPRCS2Size == 0859 && |
253 | 861 | tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)842 ) { |
254 | 146 | FramePtrOffsetInBlock = NumBytes; |
255 | 146 | adjustedGPRCS1Size += NumBytes; |
256 | 146 | NumBytes = 0; |
257 | 146 | } |
258 | 861 | |
259 | 861 | if (adjustedGPRCS1Size) { |
260 | 859 | CFAOffset -= adjustedGPRCS1Size; |
261 | 859 | unsigned CFIIndex = MF.addFrameInst( |
262 | 859 | MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); |
263 | 859 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
264 | 859 | .addCFIIndex(CFIIndex) |
265 | 859 | .setMIFlags(MachineInstr::FrameSetup); |
266 | 859 | } |
267 | 861 | for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), |
268 | 3.50k | E = CSI.end(); I != E; ++I2.64k ) { |
269 | 2.64k | unsigned Reg = I->getReg(); |
270 | 2.64k | int FI = I->getFrameIdx(); |
271 | 2.64k | switch (Reg) { |
272 | 2.64k | case ARM::R8: |
273 | 55 | case ARM::R9: |
274 | 55 | case ARM::R10: |
275 | 55 | case ARM::R11: |
276 | 55 | case ARM::R12: |
277 | 55 | if (STI.splitFramePushPop(MF)) |
278 | 55 | break; |
279 | 0 | LLVM_FALLTHROUGH; |
280 | 2.58k | case ARM::R0: |
281 | 2.58k | case ARM::R1: |
282 | 2.58k | case ARM::R2: |
283 | 2.58k | case ARM::R3: |
284 | 2.58k | case ARM::R4: |
285 | 2.58k | case ARM::R5: |
286 | 2.58k | case ARM::R6: |
287 | 2.58k | case ARM::R7: |
288 | 2.58k | case ARM::LR: |
289 | 2.58k | unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( |
290 | 2.58k | nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); |
291 | 2.58k | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
292 | 2.58k | .addCFIIndex(CFIIndex) |
293 | 2.58k | .setMIFlags(MachineInstr::FrameSetup); |
294 | 2.58k | break; |
295 | 2.64k | } |
296 | 2.64k | } |
297 | 861 | |
298 | 861 | // Adjust FP so it point to the stack slot that contains the previous FP. |
299 | 861 | if (HasFP) { |
300 | 171 | FramePtrOffsetInBlock += |
301 | 171 | MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; |
302 | 171 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) |
303 | 171 | .addReg(ARM::SP) |
304 | 171 | .addImm(FramePtrOffsetInBlock / 4) |
305 | 171 | .setMIFlags(MachineInstr::FrameSetup) |
306 | 171 | .add(predOps(ARMCC::AL)); |
307 | 171 | if(FramePtrOffsetInBlock) { |
308 | 105 | CFAOffset += FramePtrOffsetInBlock; |
309 | 105 | unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( |
310 | 105 | nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); |
311 | 105 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
312 | 105 | .addCFIIndex(CFIIndex) |
313 | 105 | .setMIFlags(MachineInstr::FrameSetup); |
314 | 105 | } else { |
315 | 66 | unsigned CFIIndex = |
316 | 66 | MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( |
317 | 66 | nullptr, MRI->getDwarfRegNum(FramePtr, true))); |
318 | 66 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
319 | 66 | .addCFIIndex(CFIIndex) |
320 | 66 | .setMIFlags(MachineInstr::FrameSetup); |
321 | 66 | } |
322 | 171 | if (NumBytes > 508) |
323 | 32 | // If offset is > 508 then sp cannot be adjusted in a single instruction, |
324 | 32 | // try restoring from fp instead. |
325 | 32 | AFI->setShouldRestoreSPFromFP(true); |
326 | 171 | } |
327 | 861 | |
328 | 861 | // Skip past the spilling of r8-r11, which could consist of multiple tPUSH |
329 | 861 | // and tMOVr instructions. We don't need to add any call frame information |
330 | 861 | // in-between these instructions, because they do not modify the high |
331 | 861 | // registers. |
332 | 881 | while (true) { |
333 | 881 | MachineBasicBlock::iterator OldMBBI = MBBI; |
334 | 881 | // Skip a run of tMOVr instructions |
335 | 936 | while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr932 ) |
336 | 55 | MBBI++; |
337 | 881 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH877 ) { |
338 | 20 | MBBI++; |
339 | 861 | } else { |
340 | 861 | // We have reached an instruction which is not a push, so the previous |
341 | 861 | // run of tMOVr instructions (which may have been empty) was not part of |
342 | 861 | // the prologue. Reset MBBI back to the last PUSH of the prologue. |
343 | 861 | MBBI = OldMBBI; |
344 | 861 | break; |
345 | 861 | } |
346 | 881 | } |
347 | 861 | |
348 | 861 | // Emit call frame information for the callee-saved high registers. |
349 | 2.64k | for (auto &I : CSI) { |
350 | 2.64k | unsigned Reg = I.getReg(); |
351 | 2.64k | int FI = I.getFrameIdx(); |
352 | 2.64k | switch (Reg) { |
353 | 2.64k | case ARM::R8: |
354 | 55 | case ARM::R9: |
355 | 55 | case ARM::R10: |
356 | 55 | case ARM::R11: |
357 | 55 | case ARM::R12: { |
358 | 55 | unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( |
359 | 55 | nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); |
360 | 55 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
361 | 55 | .addCFIIndex(CFIIndex) |
362 | 55 | .setMIFlags(MachineInstr::FrameSetup); |
363 | 55 | break; |
364 | 55 | } |
365 | 2.58k | default: |
366 | 2.58k | break; |
367 | 2.64k | } |
368 | 2.64k | } |
369 | 861 | |
370 | 861 | if (NumBytes) { |
371 | 356 | // Insert it after all the callee-save spills. |
372 | 356 | // |
373 | 356 | // For a large stack frame, we might need a scratch register to store |
374 | 356 | // the size of the frame. We know all callee-save registers are free |
375 | 356 | // at this point in the prologue, so pick one. |
376 | 356 | unsigned ScratchRegister = ARM::NoRegister; |
377 | 782 | for (auto &I : CSI) { |
378 | 782 | unsigned Reg = I.getReg(); |
379 | 782 | if (isARMLowRegister(Reg) && !(426 HasFP426 && Reg == FramePtr165 )) { |
380 | 341 | ScratchRegister = Reg; |
381 | 341 | break; |
382 | 341 | } |
383 | 782 | } |
384 | 356 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, |
385 | 356 | ScratchRegister, MachineInstr::FrameSetup); |
386 | 356 | if (!HasFP) { |
387 | 271 | CFAOffset -= NumBytes; |
388 | 271 | unsigned CFIIndex = MF.addFrameInst( |
389 | 271 | MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); |
390 | 271 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
391 | 271 | .addCFIIndex(CFIIndex) |
392 | 271 | .setMIFlags(MachineInstr::FrameSetup); |
393 | 271 | } |
394 | 356 | } |
395 | 861 | |
396 | 861 | if (STI.isTargetELF() && HasFP534 ) |
397 | 129 | MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() - |
398 | 129 | AFI->getFramePtrSpillOffset()); |
399 | 861 | |
400 | 861 | AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); |
401 | 861 | AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); |
402 | 861 | AFI->setDPRCalleeSavedAreaSize(DPRCSSize); |
403 | 861 | |
404 | 861 | if (RegInfo->needsStackRealignment(MF)) { |
405 | 39 | const unsigned NrBitsToZero = countTrailingZeros(MFI.getMaxAlignment()); |
406 | 39 | // Emit the following sequence, using R4 as a temporary, since we cannot use |
407 | 39 | // SP as a source or destination register for the shifts: |
408 | 39 | // mov r4, sp |
409 | 39 | // lsrs r4, r4, #NrBitsToZero |
410 | 39 | // lsls r4, r4, #NrBitsToZero |
411 | 39 | // mov sp, r4 |
412 | 39 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) |
413 | 39 | .addReg(ARM::SP, RegState::Kill) |
414 | 39 | .add(predOps(ARMCC::AL)); |
415 | 39 | |
416 | 39 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), ARM::R4) |
417 | 39 | .addDef(ARM::CPSR) |
418 | 39 | .addReg(ARM::R4, RegState::Kill) |
419 | 39 | .addImm(NrBitsToZero) |
420 | 39 | .add(predOps(ARMCC::AL)); |
421 | 39 | |
422 | 39 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), ARM::R4) |
423 | 39 | .addDef(ARM::CPSR) |
424 | 39 | .addReg(ARM::R4, RegState::Kill) |
425 | 39 | .addImm(NrBitsToZero) |
426 | 39 | .add(predOps(ARMCC::AL)); |
427 | 39 | |
428 | 39 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) |
429 | 39 | .addReg(ARM::R4, RegState::Kill) |
430 | 39 | .add(predOps(ARMCC::AL)); |
431 | 39 | |
432 | 39 | AFI->setShouldRestoreSPFromFP(true); |
433 | 39 | } |
434 | 861 | |
435 | 861 | // If we need a base pointer, set it up here. It's whatever the value |
436 | 861 | // of the stack pointer is at this point. Any variable size objects |
437 | 861 | // will be allocated after this, so we can still use the base pointer |
438 | 861 | // to reference locals. |
439 | 861 | if (RegInfo->hasBasePointer(MF)) |
440 | 25 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) |
441 | 25 | .addReg(ARM::SP) |
442 | 25 | .add(predOps(ARMCC::AL)); |
443 | 861 | |
444 | 861 | // If the frame has variable sized objects then the epilogue must restore |
445 | 861 | // the sp from fp. We can assume there's an FP here since hasFP already |
446 | 861 | // checks for hasVarSizedObjects. |
447 | 861 | if (MFI.hasVarSizedObjects()) |
448 | 18 | AFI->setShouldRestoreSPFromFP(true); |
449 | 861 | |
450 | 861 | // In some cases, virtual registers have been introduced, e.g. by uses of |
451 | 861 | // emitThumbRegPlusImmInReg. |
452 | 861 | MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs); |
453 | 861 | } |
454 | | |
455 | 1.76k | static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) { |
456 | 1.76k | if (MI.getOpcode() == ARM::tLDRspi && MI.getOperand(1).isFI()14 && |
457 | 1.76k | isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs)14 ) |
458 | 0 | return true; |
459 | 1.76k | else if (MI.getOpcode() == ARM::tPOP) { |
460 | 231 | return true; |
461 | 1.53k | } else if (MI.getOpcode() == ARM::tMOVr) { |
462 | 55 | unsigned Dst = MI.getOperand(0).getReg(); |
463 | 55 | unsigned Src = MI.getOperand(1).getReg(); |
464 | 55 | return ((ARM::tGPRRegClass.contains(Src) || Src == ARM::LR0 ) && |
465 | 55 | ARM::hGPRRegClass.contains(Dst)); |
466 | 55 | } |
467 | 1.47k | return false; |
468 | 1.47k | } |
469 | | |
470 | | void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, |
471 | 1.64k | MachineBasicBlock &MBB) const { |
472 | 1.64k | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
473 | 1.64k | DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc()1.63k : DebugLoc()6 ; |
474 | 1.64k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
475 | 1.64k | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
476 | 1.64k | const ThumbRegisterInfo *RegInfo = |
477 | 1.64k | static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); |
478 | 1.64k | const Thumb1InstrInfo &TII = |
479 | 1.64k | *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); |
480 | 1.64k | |
481 | 1.64k | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); |
482 | 1.64k | int NumBytes = (int)MFI.getStackSize(); |
483 | 1.64k | assert((unsigned)NumBytes >= ArgRegsSaveSize && |
484 | 1.64k | "ArgRegsSaveSize is included in NumBytes"); |
485 | 1.64k | const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); |
486 | 1.64k | unsigned FramePtr = RegInfo->getFrameRegister(MF); |
487 | 1.64k | |
488 | 1.64k | if (!AFI->hasStackFrame()) { |
489 | 728 | if (NumBytes - ArgRegsSaveSize != 0) |
490 | 28 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, |
491 | 28 | NumBytes - ArgRegsSaveSize, ARM::NoRegister, |
492 | 28 | MachineInstr::NoFlags); |
493 | 915 | } else { |
494 | 915 | // Unwind MBBI to point to first LDR / VLDRD. |
495 | 915 | if (MBBI != MBB.begin()) { |
496 | 818 | do |
497 | 1.08k | --MBBI; |
498 | 1.08k | while (MBBI != MBB.begin() && isCSRestore(*MBBI, CSRegs)943 ); |
499 | 818 | if (!isCSRestore(*MBBI, CSRegs)) |
500 | 798 | ++MBBI; |
501 | 818 | } |
502 | 915 | |
503 | 915 | // Move SP to start of FP callee save spill area. |
504 | 915 | NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + |
505 | 915 | AFI->getGPRCalleeSavedArea2Size() + |
506 | 915 | AFI->getDPRCalleeSavedAreaSize() + |
507 | 915 | ArgRegsSaveSize); |
508 | 915 | |
509 | 915 | if (AFI->shouldRestoreSPFromFP()) { |
510 | 80 | NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; |
511 | 80 | // Reset SP based on frame pointer only if the stack frame extends beyond |
512 | 80 | // frame pointer stack slot, the target is ELF and the function has FP, or |
513 | 80 | // the target uses var sized objects. |
514 | 80 | if (NumBytes) { |
515 | 80 | assert(!MFI.getPristineRegs(MF).test(ARM::R4) && |
516 | 80 | "No scratch register to restore SP from FP!"); |
517 | 80 | emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, |
518 | 80 | TII, *RegInfo); |
519 | 80 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) |
520 | 80 | .addReg(ARM::R4) |
521 | 80 | .add(predOps(ARMCC::AL)); |
522 | 80 | } else |
523 | 0 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) |
524 | 0 | .addReg(FramePtr) |
525 | 0 | .add(predOps(ARMCC::AL)); |
526 | 835 | } else { |
527 | 835 | // For a large stack frame, we might need a scratch register to store |
528 | 835 | // the size of the frame. We know all callee-save registers are free |
529 | 835 | // at this point in the epilogue, so pick one. |
530 | 835 | unsigned ScratchRegister = ARM::NoRegister; |
531 | 835 | bool HasFP = hasFP(MF); |
532 | 1.67k | for (auto &I : MFI.getCalleeSavedInfo()) { |
533 | 1.67k | unsigned Reg = I.getReg(); |
534 | 1.67k | if (isARMLowRegister(Reg) && !(834 HasFP834 && Reg == FramePtr124 )) { |
535 | 737 | ScratchRegister = Reg; |
536 | 737 | break; |
537 | 737 | } |
538 | 1.67k | } |
539 | 835 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && |
540 | 835 | &MBB.front() != &*MBBI7 && std::prev(MBBI)->getOpcode() == ARM::tPOP5 ) { |
541 | 0 | MachineBasicBlock::iterator PMBBI = std::prev(MBBI); |
542 | 0 | if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes)) |
543 | 0 | emitPrologueEpilogueSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes, |
544 | 0 | ScratchRegister, MachineInstr::NoFlags); |
545 | 835 | } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes)) |
546 | 729 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes, |
547 | 729 | ScratchRegister, MachineInstr::NoFlags); |
548 | 835 | } |
549 | 915 | } |
550 | 1.64k | |
551 | 1.64k | if (needPopSpecialFixUp(MF)) { |
552 | 912 | bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true); |
553 | 912 | (void)Done; |
554 | 912 | assert(Done && "Emission of the special fixup failed!?"); |
555 | 912 | } |
556 | 1.64k | } |
557 | | |
558 | 20 | bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { |
559 | 20 | if (!needPopSpecialFixUp(*MBB.getParent())) |
560 | 20 | return true; |
561 | 0 | |
562 | 0 | MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); |
563 | 0 | return emitPopSpecialFixUp(*TmpMBB, /* DoIt */ false); |
564 | 0 | } |
565 | | |
566 | 1.66k | bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { |
567 | 1.66k | ARMFunctionInfo *AFI = |
568 | 1.66k | const_cast<MachineFunction *>(&MF)->getInfo<ARMFunctionInfo>(); |
569 | 1.66k | if (AFI->getArgRegsSaveSize()) |
570 | 18 | return true; |
571 | 1.64k | |
572 | 1.64k | // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up. |
573 | 1.64k | for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo()) |
574 | 901 | if (CSI.getReg() == ARM::LR) |
575 | 894 | return true; |
576 | 1.64k | |
577 | 1.64k | return false751 ; |
578 | 1.64k | } |
579 | | |
580 | | static void findTemporariesForLR(const BitVector &GPRsNoLRSP, |
581 | | const BitVector &PopFriendly, |
582 | | const LivePhysRegs &UsedRegs, unsigned &PopReg, |
583 | 217 | unsigned &TmpReg) { |
584 | 217 | PopReg = TmpReg = 0; |
585 | 507 | for (auto Reg : GPRsNoLRSP.set_bits()) { |
586 | 507 | if (!UsedRegs.contains(Reg)) { |
587 | 217 | // Remember the first pop-friendly register and exit. |
588 | 217 | if (PopFriendly.test(Reg)) { |
589 | 209 | PopReg = Reg; |
590 | 209 | TmpReg = 0; |
591 | 209 | break; |
592 | 209 | } |
593 | 8 | // Otherwise, remember that the register will be available to |
594 | 8 | // save a pop-friendly register. |
595 | 8 | TmpReg = Reg; |
596 | 8 | } |
597 | 507 | } |
598 | 217 | } |
599 | | |
600 | | bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, |
601 | 912 | bool DoIt) const { |
602 | 912 | MachineFunction &MF = *MBB.getParent(); |
603 | 912 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
604 | 912 | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); |
605 | 912 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
606 | 912 | const ThumbRegisterInfo *RegInfo = |
607 | 912 | static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); |
608 | 912 | |
609 | 912 | // If MBBI is a return instruction, or is a tPOP followed by a return |
610 | 912 | // instruction in the successor BB, we may be able to directly restore |
611 | 912 | // LR in the PC. |
612 | 912 | // This is only possible with v5T ops (v4T can't change the Thumb bit via |
613 | 912 | // a POP PC instruction), and only if we do not need to emit any SP update. |
614 | 912 | // Otherwise, we need a temporary register to pop the value |
615 | 912 | // and copy that value into LR. |
616 | 912 | auto MBBI = MBB.getFirstTerminator(); |
617 | 912 | bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize719 ; |
618 | 912 | if (CanRestoreDirectly) { |
619 | 710 | if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB707 ) |
620 | 701 | CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET || |
621 | 701 | MBBI->getOpcode() == ARM::tPOP_RET); |
622 | 9 | else { |
623 | 9 | auto MBBI_prev = MBBI; |
624 | 9 | MBBI_prev--; |
625 | 9 | assert(MBBI_prev->getOpcode() == ARM::tPOP); |
626 | 9 | assert(MBB.succ_size() == 1); |
627 | 9 | if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET) |
628 | 8 | MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET. |
629 | 1 | else |
630 | 1 | CanRestoreDirectly = false; |
631 | 9 | } |
632 | 710 | } |
633 | 912 | |
634 | 912 | if (CanRestoreDirectly) { |
635 | 703 | if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET) |
636 | 695 | return true; |
637 | 8 | MachineInstrBuilder MIB = |
638 | 8 | BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)) |
639 | 8 | .add(predOps(ARMCC::AL)); |
640 | 8 | // Copy implicit ops and popped registers, if any. |
641 | 8 | for (auto MO: MBBI->operands()) |
642 | 40 | if (MO.isReg() && (32 MO.isImplicit()32 || MO.isDef()16 )) |
643 | 24 | MIB.add(MO); |
644 | 8 | MIB.addReg(ARM::PC, RegState::Define); |
645 | 8 | // Erase the old instruction (tBX_RET or tPOP). |
646 | 8 | MBB.erase(MBBI); |
647 | 8 | return true; |
648 | 8 | } |
649 | 209 | |
650 | 209 | // Look for a temporary register to use. |
651 | 209 | // First, compute the liveness information. |
652 | 209 | const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); |
653 | 209 | LivePhysRegs UsedRegs(TRI); |
654 | 209 | UsedRegs.addLiveOuts(MBB); |
655 | 209 | // The semantic of pristines changed recently and now, |
656 | 209 | // the callee-saved registers that are touched in the function |
657 | 209 | // are not part of the pristines set anymore. |
658 | 209 | // Add those callee-saved now. |
659 | 209 | const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); |
660 | 3.72k | for (unsigned i = 0; CSRegs[i]; ++i3.52k ) |
661 | 3.52k | UsedRegs.addReg(CSRegs[i]); |
662 | 209 | |
663 | 209 | DebugLoc dl = DebugLoc(); |
664 | 209 | if (MBBI != MBB.end()) { |
665 | 205 | dl = MBBI->getDebugLoc(); |
666 | 205 | auto InstUpToMBBI = MBB.end(); |
667 | 410 | while (InstUpToMBBI != MBBI) |
668 | 205 | // The pre-decrement is on purpose here. |
669 | 205 | // We want to have the liveness right before MBBI. |
670 | 205 | UsedRegs.stepBackward(*--InstUpToMBBI); |
671 | 205 | } |
672 | 209 | |
673 | 209 | // Look for a register that can be directly use in the POP. |
674 | 209 | unsigned PopReg = 0; |
675 | 209 | // And some temporary register, just in case. |
676 | 209 | unsigned TemporaryReg = 0; |
677 | 209 | BitVector PopFriendly = |
678 | 209 | TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID)); |
679 | 209 | // R7 may be used as a frame pointer, hence marked as not generally |
680 | 209 | // allocatable, however there's no reason to not use it as a temporary for |
681 | 209 | // restoring LR. |
682 | 209 | if (STI.useR7AsFramePointer()) |
683 | 209 | PopFriendly.set(ARM::R7); |
684 | 209 | |
685 | 209 | assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); |
686 | 209 | // Rebuild the GPRs from the high registers because they are removed |
687 | 209 | // form the GPR reg class for thumb1. |
688 | 209 | BitVector GPRsNoLRSP = |
689 | 209 | TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::hGPRRegClassID)); |
690 | 209 | GPRsNoLRSP |= PopFriendly; |
691 | 209 | GPRsNoLRSP.reset(ARM::LR); |
692 | 209 | GPRsNoLRSP.reset(ARM::SP); |
693 | 209 | GPRsNoLRSP.reset(ARM::PC); |
694 | 209 | findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg); |
695 | 209 | |
696 | 209 | // If we couldn't find a pop-friendly register, try restoring LR before |
697 | 209 | // popping the other callee-saved registers, so we could use one of them as a |
698 | 209 | // temporary. |
699 | 209 | bool UseLDRSP = false; |
700 | 209 | if (!PopReg && MBBI != MBB.begin()8 ) { |
701 | 8 | auto PrevMBBI = MBBI; |
702 | 8 | PrevMBBI--; |
703 | 8 | if (PrevMBBI->getOpcode() == ARM::tPOP) { |
704 | 8 | UsedRegs.stepBackward(*PrevMBBI); |
705 | 8 | findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg); |
706 | 8 | if (PopReg) { |
707 | 8 | MBBI = PrevMBBI; |
708 | 8 | UseLDRSP = true; |
709 | 8 | } |
710 | 8 | } |
711 | 8 | } |
712 | 209 | |
713 | 209 | if (!DoIt && !PopReg0 && !TemporaryReg0 ) |
714 | 0 | return false; |
715 | 209 | |
716 | 209 | assert((PopReg || TemporaryReg) && "Cannot get LR"); |
717 | 209 | |
718 | 209 | if (UseLDRSP) { |
719 | 8 | assert(PopReg && "Do not know how to get LR"); |
720 | 8 | // Load the LR via LDR tmp, [SP, #off] |
721 | 8 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRspi)) |
722 | 8 | .addReg(PopReg, RegState::Define) |
723 | 8 | .addReg(ARM::SP) |
724 | 8 | .addImm(MBBI->getNumExplicitOperands() - 2) |
725 | 8 | .add(predOps(ARMCC::AL)); |
726 | 8 | // Move from the temporary register to the LR. |
727 | 8 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) |
728 | 8 | .addReg(ARM::LR, RegState::Define) |
729 | 8 | .addReg(PopReg, RegState::Kill) |
730 | 8 | .add(predOps(ARMCC::AL)); |
731 | 8 | // Advance past the pop instruction. |
732 | 8 | MBBI++; |
733 | 8 | // Increment the SP. |
734 | 8 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, |
735 | 8 | ArgRegsSaveSize + 4, ARM::NoRegister, |
736 | 8 | MachineInstr::NoFlags); |
737 | 8 | return true; |
738 | 8 | } |
739 | 201 | |
740 | 201 | if (TemporaryReg) { |
741 | 0 | assert(!PopReg && "Unnecessary MOV is about to be inserted"); |
742 | 0 | PopReg = PopFriendly.find_first(); |
743 | 0 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) |
744 | 0 | .addReg(TemporaryReg, RegState::Define) |
745 | 0 | .addReg(PopReg, RegState::Kill) |
746 | 0 | .add(predOps(ARMCC::AL)); |
747 | 0 | } |
748 | 201 | |
749 | 201 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET197 ) { |
750 | 0 | // We couldn't use the direct restoration above, so |
751 | 0 | // perform the opposite conversion: tPOP_RET to tPOP. |
752 | 0 | MachineInstrBuilder MIB = |
753 | 0 | BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)) |
754 | 0 | .add(predOps(ARMCC::AL)); |
755 | 0 | bool Popped = false; |
756 | 0 | for (auto MO: MBBI->operands()) |
757 | 0 | if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && |
758 | 0 | MO.getReg() != ARM::PC) { |
759 | 0 | MIB.add(MO); |
760 | 0 | if (!MO.isImplicit()) |
761 | 0 | Popped = true; |
762 | 0 | } |
763 | 0 | // Is there anything left to pop? |
764 | 0 | if (!Popped) |
765 | 0 | MBB.erase(MIB.getInstr()); |
766 | 0 | // Erase the old instruction. |
767 | 0 | MBB.erase(MBBI); |
768 | 0 | MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)) |
769 | 0 | .add(predOps(ARMCC::AL)); |
770 | 0 | } |
771 | 201 | |
772 | 201 | assert(PopReg && "Do not know how to get LR"); |
773 | 201 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)) |
774 | 201 | .add(predOps(ARMCC::AL)) |
775 | 201 | .addReg(PopReg, RegState::Define); |
776 | 201 | |
777 | 201 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize, |
778 | 201 | ARM::NoRegister, MachineInstr::NoFlags); |
779 | 201 | |
780 | 201 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) |
781 | 201 | .addReg(ARM::LR, RegState::Define) |
782 | 201 | .addReg(PopReg, RegState::Kill) |
783 | 201 | .add(predOps(ARMCC::AL)); |
784 | 201 | |
785 | 201 | if (TemporaryReg) |
786 | 0 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) |
787 | 0 | .addReg(PopReg, RegState::Define) |
788 | 0 | .addReg(TemporaryReg, RegState::Kill) |
789 | 0 | .add(predOps(ARMCC::AL)); |
790 | 201 | |
791 | 201 | return true; |
792 | 201 | } |
793 | | |
794 | | using ARMRegSet = std::bitset<ARM::NUM_TARGET_REGS>; |
795 | | |
796 | | // Return the first iteraror after CurrentReg which is present in EnabledRegs, |
797 | | // or OrderEnd if no further registers are in that set. This does not advance |
798 | | // the iterator fiorst, so returns CurrentReg if it is in EnabledRegs. |
799 | | static const unsigned *findNextOrderedReg(const unsigned *CurrentReg, |
800 | | const ARMRegSet &EnabledRegs, |
801 | 2.03k | const unsigned *OrderEnd) { |
802 | 9.09k | while (CurrentReg != OrderEnd && !EnabledRegs[*CurrentReg]7.31k ) |
803 | 7.06k | ++CurrentReg; |
804 | 2.03k | return CurrentReg; |
805 | 2.03k | } |
806 | | |
807 | | bool Thumb1FrameLowering:: |
808 | | spillCalleeSavedRegisters(MachineBasicBlock &MBB, |
809 | | MachineBasicBlock::iterator MI, |
810 | | const std::vector<CalleeSavedInfo> &CSI, |
811 | 861 | const TargetRegisterInfo *TRI) const { |
812 | 861 | if (CSI.empty()) |
813 | 0 | return false; |
814 | 861 | |
815 | 861 | DebugLoc DL; |
816 | 861 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
817 | 861 | MachineFunction &MF = *MBB.getParent(); |
818 | 861 | const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( |
819 | 861 | MF.getSubtarget().getRegisterInfo()); |
820 | 861 | |
821 | 861 | ARMRegSet LoRegsToSave; // r0-r7, lr |
822 | 861 | ARMRegSet HiRegsToSave; // r8-r11 |
823 | 861 | ARMRegSet CopyRegs; // Registers which can be used after pushing |
824 | 861 | // LoRegs for saving HiRegs. |
825 | 861 | |
826 | 3.50k | for (unsigned i = CSI.size(); i != 0; --i2.64k ) { |
827 | 2.64k | unsigned Reg = CSI[i-1].getReg(); |
828 | 2.64k | |
829 | 2.64k | if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR913 ) { |
830 | 2.58k | LoRegsToSave[Reg] = true; |
831 | 2.58k | } else if (55 ARM::hGPRRegClass.contains(Reg)55 && Reg != ARM::LR55 ) { |
832 | 55 | HiRegsToSave[Reg] = true; |
833 | 55 | } else { |
834 | 0 | llvm_unreachable("callee-saved register of unexpected class"); |
835 | 0 | } |
836 | 2.64k | |
837 | 2.64k | if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR913 ) && |
838 | 2.64k | !MF.getRegInfo().isLiveIn(Reg)2.58k && |
839 | 2.64k | !(2.58k hasFP(MF)2.58k && Reg == RegInfo->getFrameRegister(MF)570 )) |
840 | 2.41k | CopyRegs[Reg] = true; |
841 | 2.64k | } |
842 | 861 | |
843 | 861 | // Unused argument registers can be used for the high register saving. |
844 | 861 | for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) |
845 | 3.44k | if (!MF.getRegInfo().isLiveIn(ArgReg)) |
846 | 2.22k | CopyRegs[ArgReg] = true; |
847 | 861 | |
848 | 861 | // Push the low registers and lr |
849 | 861 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
850 | 861 | if (!LoRegsToSave.none()) { |
851 | 859 | MachineInstrBuilder MIB = |
852 | 859 | BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); |
853 | 4.29k | for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) { |
854 | 4.29k | if (LoRegsToSave[Reg]) { |
855 | 2.58k | bool isKill = !MRI.isLiveIn(Reg); |
856 | 2.58k | if (isKill && !MRI.isReserved(Reg)2.58k ) |
857 | 2.38k | MBB.addLiveIn(Reg); |
858 | 2.58k | |
859 | 2.58k | MIB.addReg(Reg, getKillRegState(isKill)); |
860 | 2.58k | } |
861 | 4.29k | } |
862 | 859 | MIB.setMIFlags(MachineInstr::FrameSetup); |
863 | 859 | } |
864 | 861 | |
865 | 861 | // Push the high registers. There are no store instructions that can access |
866 | 861 | // these registers directly, so we have to move them to low registers, and |
867 | 861 | // push them. This might take multiple pushes, as it is possible for there to |
868 | 861 | // be fewer low registers available than high registers which need saving. |
869 | 861 | |
870 | 861 | // These are in reverse order so that in the case where we need to use |
871 | 861 | // multiple PUSH instructions, the order of the registers on the stack still |
872 | 861 | // matches the unwind info. They need to be swicthed back to ascending order |
873 | 861 | // before adding to the PUSH instruction. |
874 | 861 | static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6, |
875 | 861 | ARM::R5, ARM::R4, ARM::R3, |
876 | 861 | ARM::R2, ARM::R1, ARM::R0}; |
877 | 861 | static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8}; |
878 | 861 | |
879 | 861 | const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); |
880 | 861 | const unsigned *AllHighRegsEnd = std::end(AllHighRegs); |
881 | 861 | |
882 | 861 | // Find the first register to save. |
883 | 861 | const unsigned *HiRegToSave = findNextOrderedReg( |
884 | 861 | std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd); |
885 | 861 | |
886 | 881 | while (HiRegToSave != AllHighRegsEnd) { |
887 | 20 | // Find the first low register to use. |
888 | 20 | const unsigned *CopyReg = |
889 | 20 | findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); |
890 | 20 | |
891 | 20 | // Create the PUSH, but don't insert it yet (the MOVs need to come first). |
892 | 20 | MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)) |
893 | 20 | .add(predOps(ARMCC::AL)) |
894 | 20 | .setMIFlags(MachineInstr::FrameSetup); |
895 | 20 | |
896 | 20 | SmallVector<unsigned, 4> RegsToPush; |
897 | 75 | while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd56 ) { |
898 | 55 | if (HiRegsToSave[*HiRegToSave]) { |
899 | 55 | bool isKill = !MRI.isLiveIn(*HiRegToSave); |
900 | 55 | if (isKill && !MRI.isReserved(*HiRegToSave)) |
901 | 55 | MBB.addLiveIn(*HiRegToSave); |
902 | 55 | |
903 | 55 | // Emit a MOV from the high reg to the low reg. |
904 | 55 | BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) |
905 | 55 | .addReg(*CopyReg, RegState::Define) |
906 | 55 | .addReg(*HiRegToSave, getKillRegState(isKill)) |
907 | 55 | .add(predOps(ARMCC::AL)) |
908 | 55 | .setMIFlags(MachineInstr::FrameSetup); |
909 | 55 | |
910 | 55 | // Record the register that must be added to the PUSH. |
911 | 55 | RegsToPush.push_back(*CopyReg); |
912 | 55 | |
913 | 55 | CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); |
914 | 55 | HiRegToSave = |
915 | 55 | findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd); |
916 | 55 | } |
917 | 55 | } |
918 | 20 | |
919 | 20 | // Add the low registers to the PUSH, in ascending order. |
920 | 20 | for (unsigned Reg : llvm::reverse(RegsToPush)) |
921 | 55 | PushMIB.addReg(Reg, RegState::Kill); |
922 | 20 | |
923 | 20 | // Insert the PUSH instruction after the MOVs. |
924 | 20 | MBB.insert(MI, PushMIB); |
925 | 20 | } |
926 | 861 | |
927 | 861 | return true; |
928 | 861 | } |
929 | | |
930 | | bool Thumb1FrameLowering:: |
931 | | restoreCalleeSavedRegisters(MachineBasicBlock &MBB, |
932 | | MachineBasicBlock::iterator MI, |
933 | | std::vector<CalleeSavedInfo> &CSI, |
934 | 915 | const TargetRegisterInfo *TRI) const { |
935 | 915 | if (CSI.empty()) |
936 | 0 | return false; |
937 | 915 | |
938 | 915 | MachineFunction &MF = *MBB.getParent(); |
939 | 915 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
940 | 915 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
941 | 915 | const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( |
942 | 915 | MF.getSubtarget().getRegisterInfo()); |
943 | 915 | |
944 | 915 | bool isVarArg = AFI->getArgRegsSaveSize() > 0; |
945 | 915 | DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()909 : DebugLoc()6 ; |
946 | 915 | |
947 | 915 | ARMRegSet LoRegsToRestore; |
948 | 915 | ARMRegSet HiRegsToRestore; |
949 | 915 | // Low registers (r0-r7) which can be used to restore the high registers. |
950 | 915 | ARMRegSet CopyRegs; |
951 | 915 | |
952 | 2.77k | for (CalleeSavedInfo I : CSI) { |
953 | 2.77k | unsigned Reg = I.getReg(); |
954 | 2.77k | |
955 | 2.77k | if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR967 ) { |
956 | 2.72k | LoRegsToRestore[Reg] = true; |
957 | 2.72k | } else if (55 ARM::hGPRRegClass.contains(Reg)55 && Reg != ARM::LR55 ) { |
958 | 55 | HiRegsToRestore[Reg] = true; |
959 | 55 | } else { |
960 | 0 | llvm_unreachable("callee-saved register of unexpected class"); |
961 | 0 | } |
962 | 2.77k | |
963 | 2.77k | // If this is a low register not used as the frame pointer, we may want to |
964 | 2.77k | // use it for restoring the high registers. |
965 | 2.77k | if ((ARM::tGPRRegClass.contains(Reg)) && |
966 | 2.77k | !(1.81k hasFP(MF)1.81k && Reg == RegInfo->getFrameRegister(MF)424 )) |
967 | 1.63k | CopyRegs[Reg] = true; |
968 | 2.77k | } |
969 | 915 | |
970 | 915 | // If this is a return block, we may be able to use some unused return value |
971 | 915 | // registers for restoring the high regs. |
972 | 915 | auto Terminator = MBB.getFirstTerminator(); |
973 | 915 | if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET909 ) { |
974 | 890 | CopyRegs[ARM::R0] = true; |
975 | 890 | CopyRegs[ARM::R1] = true; |
976 | 890 | CopyRegs[ARM::R2] = true; |
977 | 890 | CopyRegs[ARM::R3] = true; |
978 | 890 | for (auto Op : Terminator->implicit_operands()) { |
979 | 642 | if (Op.isReg()) |
980 | 642 | CopyRegs[Op.getReg()] = false; |
981 | 642 | } |
982 | 890 | } |
983 | 915 | |
984 | 915 | static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3, |
985 | 915 | ARM::R4, ARM::R5, ARM::R6, ARM::R7}; |
986 | 915 | static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11}; |
987 | 915 | |
988 | 915 | const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); |
989 | 915 | const unsigned *AllHighRegsEnd = std::end(AllHighRegs); |
990 | 915 | |
991 | 915 | // Find the first register to restore. |
992 | 915 | auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs), |
993 | 915 | HiRegsToRestore, AllHighRegsEnd); |
994 | 915 | |
995 | 935 | while (HiRegToRestore != AllHighRegsEnd) { |
996 | 20 | assert(!CopyRegs.none()); |
997 | 20 | // Find the first low register to use. |
998 | 20 | auto CopyReg = |
999 | 20 | findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); |
1000 | 20 | |
1001 | 20 | // Create the POP instruction. |
1002 | 20 | MachineInstrBuilder PopMIB = |
1003 | 20 | BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); |
1004 | 20 | |
1005 | 75 | while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd56 ) { |
1006 | 55 | // Add the low register to the POP. |
1007 | 55 | PopMIB.addReg(*CopyReg, RegState::Define); |
1008 | 55 | |
1009 | 55 | // Create the MOV from low to high register. |
1010 | 55 | BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) |
1011 | 55 | .addReg(*HiRegToRestore, RegState::Define) |
1012 | 55 | .addReg(*CopyReg, RegState::Kill) |
1013 | 55 | .add(predOps(ARMCC::AL)); |
1014 | 55 | |
1015 | 55 | CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); |
1016 | 55 | HiRegToRestore = |
1017 | 55 | findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd); |
1018 | 55 | } |
1019 | 20 | } |
1020 | 915 | |
1021 | 915 | MachineInstrBuilder MIB = |
1022 | 915 | BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); |
1023 | 915 | |
1024 | 915 | bool NeedsPop = false; |
1025 | 3.69k | for (unsigned i = CSI.size(); i != 0; --i2.77k ) { |
1026 | 2.77k | CalleeSavedInfo &Info = CSI[i-1]; |
1027 | 2.77k | unsigned Reg = Info.getReg(); |
1028 | 2.77k | |
1029 | 2.77k | // High registers (excluding lr) have already been dealt with |
1030 | 2.77k | if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR967 )) |
1031 | 55 | continue; |
1032 | 2.72k | |
1033 | 2.72k | if (Reg == ARM::LR) { |
1034 | 912 | Info.setRestored(false); |
1035 | 912 | if (!MBB.succ_empty() || |
1036 | 912 | MI->getOpcode() == ARM::TCRETURNdi894 || |
1037 | 912 | MI->getOpcode() == ARM::TCRETURNri889 ) |
1038 | 24 | // LR may only be popped into PC, as part of return sequence. |
1039 | 24 | // If this isn't the return sequence, we'll need emitPopSpecialFixUp |
1040 | 24 | // to restore LR the hard way. |
1041 | 24 | // FIXME: if we don't pass any stack arguments it would be actually |
1042 | 24 | // advantageous *and* correct to do the conversion to an ordinary call |
1043 | 24 | // instruction here. |
1044 | 24 | continue; |
1045 | 888 | // Special epilogue for vararg functions. See emitEpilogue |
1046 | 888 | if (isVarArg) |
1047 | 18 | continue; |
1048 | 870 | // ARMv4T requires BX, see emitEpilogue |
1049 | 870 | if (!STI.hasV5TOps()) |
1050 | 175 | continue; |
1051 | 695 | |
1052 | 695 | // Pop LR into PC. |
1053 | 695 | Reg = ARM::PC; |
1054 | 695 | (*MIB).setDesc(TII.get(ARM::tPOP_RET)); |
1055 | 695 | if (MI != MBB.end()) |
1056 | 695 | MIB.copyImplicitOps(*MI); |
1057 | 695 | MI = MBB.erase(MI); |
1058 | 695 | } |
1059 | 2.72k | MIB.addReg(Reg, getDefRegState(true)); |
1060 | 2.50k | NeedsPop = true; |
1061 | 2.50k | } |
1062 | 915 | |
1063 | 915 | // It's illegal to emit pop instruction without operands. |
1064 | 915 | if (NeedsPop) |
1065 | 906 | MBB.insert(MI, &*MIB); |
1066 | 9 | else |
1067 | 9 | MF.DeleteMachineInstr(MIB); |
1068 | 915 | |
1069 | 915 | return true; |
1070 | 915 | } |