/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- HexagonFrameLowering.cpp - Define frame lowering -------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | // |
9 | | //===----------------------------------------------------------------------===// |
10 | | |
11 | | #include "HexagonFrameLowering.h" |
12 | | #include "HexagonBlockRanges.h" |
13 | | #include "HexagonInstrInfo.h" |
14 | | #include "HexagonMachineFunctionInfo.h" |
15 | | #include "HexagonRegisterInfo.h" |
16 | | #include "HexagonSubtarget.h" |
17 | | #include "HexagonTargetMachine.h" |
18 | | #include "MCTargetDesc/HexagonBaseInfo.h" |
19 | | #include "llvm/ADT/BitVector.h" |
20 | | #include "llvm/ADT/DenseMap.h" |
21 | | #include "llvm/ADT/None.h" |
22 | | #include "llvm/ADT/Optional.h" |
23 | | #include "llvm/ADT/PostOrderIterator.h" |
24 | | #include "llvm/ADT/SetVector.h" |
25 | | #include "llvm/ADT/SmallSet.h" |
26 | | #include "llvm/ADT/SmallVector.h" |
27 | | #include "llvm/CodeGen/LivePhysRegs.h" |
28 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
29 | | #include "llvm/CodeGen/MachineDominators.h" |
30 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
31 | | #include "llvm/CodeGen/MachineFunction.h" |
32 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
33 | | #include "llvm/CodeGen/MachineInstr.h" |
34 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
35 | | #include "llvm/CodeGen/MachineMemOperand.h" |
36 | | #include "llvm/CodeGen/MachineModuleInfo.h" |
37 | | #include "llvm/CodeGen/MachineOperand.h" |
38 | | #include "llvm/CodeGen/MachinePostDominators.h" |
39 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
40 | | #include "llvm/CodeGen/RegisterScavenging.h" |
41 | | #include "llvm/IR/Attributes.h" |
42 | | #include "llvm/IR/DebugLoc.h" |
43 | | #include "llvm/IR/Function.h" |
44 | | #include "llvm/MC/MCDwarf.h" |
45 | | #include "llvm/MC/MCRegisterInfo.h" |
46 | | #include "llvm/Pass.h" |
47 | | #include "llvm/Support/CodeGen.h" |
48 | | #include "llvm/Support/CommandLine.h" |
49 | | #include "llvm/Support/Compiler.h" |
50 | | #include "llvm/Support/Debug.h" |
51 | | #include "llvm/Support/ErrorHandling.h" |
52 | | #include "llvm/Support/MathExtras.h" |
53 | | #include "llvm/Support/raw_ostream.h" |
54 | | #include "llvm/Target/TargetMachine.h" |
55 | | #include "llvm/Target/TargetOptions.h" |
56 | | #include "llvm/Target/TargetRegisterInfo.h" |
57 | | #include <algorithm> |
58 | | #include <cassert> |
59 | | #include <cstdint> |
60 | | #include <iterator> |
61 | | #include <limits> |
62 | | #include <map> |
63 | | #include <utility> |
64 | | #include <vector> |
65 | | |
66 | | #define DEBUG_TYPE "hexagon-pei" |
67 | | |
68 | | // Hexagon stack frame layout as defined by the ABI: |
69 | | // |
70 | | // Incoming arguments |
71 | | // passed via stack |
72 | | // | |
73 | | // | |
74 | | // SP during function's FP during function's | |
75 | | // +-- runtime (top of stack) runtime (bottom) --+ | |
76 | | // | | | |
77 | | // --++---------------------+------------------+-----------------++-+------- |
78 | | // | parameter area for | variable-size | fixed-size |LR| arg |
79 | | // | called functions | local objects | local objects |FP| |
80 | | // --+----------------------+------------------+-----------------+--+------- |
81 | | // <- size known -> <- size unknown -> <- size known -> |
82 | | // |
83 | | // Low address High address |
84 | | // |
85 | | // <--- stack growth |
86 | | // |
87 | | // |
88 | | // - In any circumstances, the outgoing function arguments are always accessi- |
89 | | // ble using the SP, and the incoming arguments are accessible using the FP. |
90 | | // - If the local objects are not aligned, they can always be accessed using |
91 | | // the FP. |
92 | | // - If there are no variable-sized objects, the local objects can always be |
93 | | // accessed using the SP, regardless whether they are aligned or not. (The |
94 | | // alignment padding will be at the bottom of the stack (highest address), |
95 | | // and so the offset with respect to the SP will be known at the compile- |
96 | | // -time.) |
97 | | // |
98 | | // The only complication occurs if there are both, local aligned objects, and |
99 | | // dynamically allocated (variable-sized) objects. The alignment pad will be |
100 | | // placed between the FP and the local objects, thus preventing the use of the |
101 | | // FP to access the local objects. At the same time, the variable-sized objects |
102 | | // will be between the SP and the local objects, thus introducing an unknown |
103 | | // distance from the SP to the locals. |
104 | | // |
105 | | // To avoid this problem, a new register is created that holds the aligned |
106 | | // address of the bottom of the stack, referred in the sources as AP (aligned |
107 | | // pointer). The AP will be equal to "FP-p", where "p" is the smallest pad |
108 | | // that aligns AP to the required boundary (a maximum of the alignments of |
109 | | // all stack objects, fixed- and variable-sized). All local objects[1] will |
110 | | // then use AP as the base pointer. |
111 | | // [1] The exception is with "fixed" stack objects. "Fixed" stack objects get |
112 | | // their name from being allocated at fixed locations on the stack, relative |
113 | | // to the FP. In the presence of dynamic allocation and local alignment, such |
114 | | // objects can only be accessed through the FP. |
115 | | // |
116 | | // Illustration of the AP: |
117 | | // FP --+ |
118 | | // | |
119 | | // ---------------+---------------------+-----+-----------------------++-+-- |
120 | | // Rest of the | Local stack objects | Pad | Fixed stack objects |LR| |
121 | | // stack frame | (aligned) | | (CSR, spills, etc.) |FP| |
122 | | // ---------------+---------------------+-----+-----------------+-----+--+-- |
123 | | // |<-- Multiple of the -->| |
124 | | // stack alignment +-- AP |
125 | | // |
126 | | // The AP is set up at the beginning of the function. Since it is not a dedi- |
127 | | // cated (reserved) register, it needs to be kept live throughout the function |
128 | | // to be available as the base register for local object accesses. |
129 | | // Normally, an address of a stack objects is obtained by a pseudo-instruction |
130 | | // PS_fi. To access local objects with the AP register present, a different |
131 | | // pseudo-instruction needs to be used: PS_fia. The PS_fia takes one extra |
132 | | // argument compared to PS_fi: the first input register is the AP register. |
133 | | // This keeps the register live between its definition and its uses. |
134 | | |
135 | | // The AP register is originally set up using pseudo-instruction PS_aligna: |
136 | | // AP = PS_aligna A |
137 | | // where |
138 | | // A - required stack alignment |
139 | | // The alignment value must be the maximum of all alignments required by |
140 | | // any stack object. |
141 | | |
142 | | // The dynamic allocation uses a pseudo-instruction PS_alloca: |
143 | | // Rd = PS_alloca Rs, A |
144 | | // where |
145 | | // Rd - address of the allocated space |
146 | | // Rs - minimum size (the actual allocated can be larger to accommodate |
147 | | // alignment) |
148 | | // A - required alignment |
149 | | |
150 | | using namespace llvm; |
151 | | |
152 | | static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret", |
153 | | cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target")); |
154 | | |
155 | | static cl::opt<unsigned> NumberScavengerSlots("number-scavenger-slots", |
156 | | cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2), |
157 | | cl::ZeroOrMore); |
158 | | |
159 | | static cl::opt<int> SpillFuncThreshold("spill-func-threshold", |
160 | | cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"), |
161 | | cl::init(6), cl::ZeroOrMore); |
162 | | |
163 | | static cl::opt<int> SpillFuncThresholdOs("spill-func-threshold-Os", |
164 | | cl::Hidden, cl::desc("Specify Os spill func threshold"), |
165 | | cl::init(1), cl::ZeroOrMore); |
166 | | |
167 | | static cl::opt<bool> EnableStackOVFSanitizer("enable-stackovf-sanitizer", |
168 | | cl::Hidden, cl::desc("Enable runtime checks for stack overflow."), |
169 | | cl::init(false), cl::ZeroOrMore); |
170 | | |
171 | | static cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame", |
172 | | cl::init(true), cl::Hidden, cl::ZeroOrMore, |
173 | | cl::desc("Enable stack frame shrink wrapping")); |
174 | | |
175 | | static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", |
176 | | cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden, cl::ZeroOrMore, |
177 | | cl::desc("Max count of stack frame shrink-wraps")); |
178 | | |
179 | | static cl::opt<bool> EnableSaveRestoreLong("enable-save-restore-long", |
180 | | cl::Hidden, cl::desc("Enable long calls for save-restore stubs."), |
181 | | cl::init(false), cl::ZeroOrMore); |
182 | | |
183 | | static cl::opt<bool> EliminateFramePointer("hexagon-fp-elim", cl::init(true), |
184 | | cl::Hidden, cl::desc("Refrain from using FP whenever possible")); |
185 | | |
186 | | static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden, |
187 | | cl::init(true), cl::desc("Optimize spill slots")); |
188 | | |
189 | | #ifndef NDEBUG |
190 | | static cl::opt<unsigned> SpillOptMax("spill-opt-max", cl::Hidden, |
191 | | cl::init(std::numeric_limits<unsigned>::max())); |
192 | | static unsigned SpillOptCount = 0; |
193 | | #endif |
194 | | |
195 | | namespace llvm { |
196 | | |
197 | | void initializeHexagonCallFrameInformationPass(PassRegistry&); |
198 | | FunctionPass *createHexagonCallFrameInformation(); |
199 | | |
200 | | } // end namespace llvm |
201 | | |
202 | | namespace { |
203 | | |
204 | | class HexagonCallFrameInformation : public MachineFunctionPass { |
205 | | public: |
206 | | static char ID; |
207 | | |
208 | 441 | HexagonCallFrameInformation() : MachineFunctionPass(ID) { |
209 | 441 | PassRegistry &PR = *PassRegistry::getPassRegistry(); |
210 | 441 | initializeHexagonCallFrameInformationPass(PR); |
211 | 441 | } |
212 | | |
213 | | bool runOnMachineFunction(MachineFunction &MF) override; |
214 | | |
215 | 440 | MachineFunctionProperties getRequiredProperties() const override { |
216 | 440 | return MachineFunctionProperties().set( |
217 | 440 | MachineFunctionProperties::Property::NoVRegs); |
218 | 440 | } |
219 | | }; |
220 | | |
221 | | char HexagonCallFrameInformation::ID = 0; |
222 | | |
223 | | } // end anonymous namespace |
224 | | |
225 | 2.41k | bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) { |
226 | 2.41k | auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering(); |
227 | 2.41k | bool NeedCFI = MF.getMMI().hasDebugInfo() || |
228 | 2.40k | MF.getFunction()->needsUnwindTableEntry(); |
229 | 2.41k | |
230 | 2.41k | if (!NeedCFI) |
231 | 754 | return false; |
232 | 1.65k | HFI.insertCFIInstructions(MF); |
233 | 1.65k | return true; |
234 | 1.65k | } |
235 | | |
236 | | INITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi", |
237 | | "Hexagon call frame information", false, false) |
238 | | |
239 | 441 | FunctionPass *llvm::createHexagonCallFrameInformation() { |
240 | 441 | return new HexagonCallFrameInformation(); |
241 | 441 | } |
242 | | |
243 | | /// Map a register pair Reg to the subregister that has the greater "number", |
244 | | /// i.e. D3 (aka R7:6) will be mapped to R7, etc. |
245 | | static unsigned getMax32BitSubRegister(unsigned Reg, |
246 | | const TargetRegisterInfo &TRI, |
247 | 47 | bool hireg = true) { |
248 | 47 | if (Reg < Hexagon::D0 || 47 Reg > Hexagon::D1547 ) |
249 | 0 | return Reg; |
250 | 47 | |
251 | 47 | unsigned RegNo = 0; |
252 | 141 | for (MCSubRegIterator SubRegs(Reg, &TRI); SubRegs.isValid()141 ; ++SubRegs94 ) { |
253 | 94 | if (hireg94 ) { |
254 | 94 | if (*SubRegs > RegNo) |
255 | 94 | RegNo = *SubRegs; |
256 | 0 | } else { |
257 | 0 | if (!RegNo || 0 *SubRegs < RegNo0 ) |
258 | 0 | RegNo = *SubRegs; |
259 | 0 | } |
260 | 94 | } |
261 | 47 | return RegNo; |
262 | 47 | } |
263 | | |
264 | | /// Returns the callee saved register with the largest id in the vector. |
265 | | static unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI, |
266 | 13 | const TargetRegisterInfo &TRI) { |
267 | 13 | static_assert(Hexagon::R1 > 0, |
268 | 13 | "Assume physical registers are encoded as positive integers"); |
269 | 13 | if (CSI.empty()) |
270 | 0 | return 0; |
271 | 13 | |
272 | 13 | unsigned Max = getMax32BitSubRegister(CSI[0].getReg(), TRI); |
273 | 47 | for (unsigned I = 1, E = CSI.size(); I < E47 ; ++I34 ) { |
274 | 34 | unsigned Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI); |
275 | 34 | if (Reg > Max) |
276 | 34 | Max = Reg; |
277 | 34 | } |
278 | 13 | return Max; |
279 | 13 | } |
280 | | |
281 | | /// Checks if the basic block contains any instruction that needs a stack |
282 | | /// frame to be already in place. |
283 | | static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR, |
284 | 2.52k | const HexagonRegisterInfo &HRI) { |
285 | 9.36k | for (auto &I : MBB) { |
286 | 9.36k | const MachineInstr *MI = &I; |
287 | 9.36k | if (MI->isCall()) |
288 | 85 | return true; |
289 | 9.27k | unsigned Opc = MI->getOpcode(); |
290 | 9.27k | switch (Opc) { |
291 | 2 | case Hexagon::PS_alloca: |
292 | 2 | case Hexagon::PS_aligna: |
293 | 2 | return true; |
294 | 9.27k | default: |
295 | 9.27k | break; |
296 | 9.27k | } |
297 | 9.27k | // Check individual operands. |
298 | 9.27k | for (const MachineOperand &MO : MI->operands()) 9.27k { |
299 | 21.4k | // While the presence of a frame index does not prove that a stack |
300 | 21.4k | // frame will be required, all frame indexes should be within alloc- |
301 | 21.4k | // frame/deallocframe. Otherwise, the code that translates a frame |
302 | 21.4k | // index into an offset would have to be aware of the placement of |
303 | 21.4k | // the frame creation/destruction instructions. |
304 | 21.4k | if (MO.isFI()) |
305 | 1.64k | return true; |
306 | 19.7k | if (19.7k MO.isReg()19.7k ) { |
307 | 17.7k | unsigned R = MO.getReg(); |
308 | 17.7k | // Virtual registers will need scavenging, which then may require |
309 | 17.7k | // a stack slot. |
310 | 17.7k | if (TargetRegisterInfo::isVirtualRegister(R)) |
311 | 1 | return true; |
312 | 44.8k | for (MCSubRegIterator S(R, &HRI, true); 17.7k S.isValid()44.8k ; ++S27.0k ) |
313 | 27.1k | if (27.1k CSR[*S]27.1k ) |
314 | 74 | return true; |
315 | 17.7k | continue; |
316 | 1.98k | } |
317 | 1.98k | if (1.98k MO.isRegMask()1.98k ) { |
318 | 0 | // A regmask would normally have all callee-saved registers marked |
319 | 0 | // as preserved, so this check would not be needed, but in case of |
320 | 0 | // ever having other regmasks (for other calling conventions), |
321 | 0 | // make sure they would be processed correctly. |
322 | 0 | const uint32_t *BM = MO.getRegMask(); |
323 | 0 | for (int x = CSR.find_first(); x >= 00 ; x = CSR.find_next(x)0 ) { |
324 | 0 | unsigned R = x; |
325 | 0 | // If this regmask does not preserve a CSR, a frame will be needed. |
326 | 0 | if (!(BM[R/32] & (1u << (R%32)))) |
327 | 0 | return true; |
328 | 0 | } |
329 | 0 | } |
330 | 21.4k | } |
331 | 9.36k | } |
332 | 711 | return false; |
333 | 2.52k | } |
334 | | |
335 | | /// Returns true if MBB has a machine instructions that indicates a tail call |
336 | | /// in the block. |
337 | 8 | static bool hasTailCall(const MachineBasicBlock &MBB) { |
338 | 8 | MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr(); |
339 | 8 | unsigned RetOpc = I->getOpcode(); |
340 | 7 | return RetOpc == Hexagon::PS_tailcall_i || RetOpc == Hexagon::PS_tailcall_r; |
341 | 8 | } |
342 | | |
343 | | /// Returns true if MBB contains an instruction that returns. |
344 | 7 | static bool hasReturn(const MachineBasicBlock &MBB) { |
345 | 7 | for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E7 ; ++I0 ) |
346 | 7 | if (7 I->isReturn()7 ) |
347 | 7 | return true; |
348 | 0 | return false; |
349 | 7 | } |
350 | | |
351 | | /// Returns the "return" instruction from this block, or nullptr if there |
352 | | /// isn't any. |
353 | 3.31k | static MachineInstr *getReturn(MachineBasicBlock &MBB) { |
354 | 3.31k | for (auto &I : MBB) |
355 | 21.9k | if (21.9k I.isReturn()21.9k ) |
356 | 2.35k | return &I; |
357 | 957 | return nullptr; |
358 | 957 | } |
359 | | |
360 | 2.39k | static bool isRestoreCall(unsigned Opc) { |
361 | 2.39k | switch (Opc) { |
362 | 7 | case Hexagon::RESTORE_DEALLOC_RET_JMP_V4: |
363 | 7 | case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC: |
364 | 7 | case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT: |
365 | 7 | case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC: |
366 | 7 | case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT: |
367 | 7 | case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC: |
368 | 7 | case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4: |
369 | 7 | case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC: |
370 | 7 | return true; |
371 | 2.39k | } |
372 | 2.39k | return false; |
373 | 2.39k | } |
374 | | |
375 | 2.40k | static inline bool isOptNone(const MachineFunction &MF) { |
376 | 2.40k | return MF.getFunction()->hasFnAttribute(Attribute::OptimizeNone) || |
377 | 2.40k | MF.getTarget().getOptLevel() == CodeGenOpt::None; |
378 | 2.40k | } |
379 | | |
380 | 125 | static inline bool isOptSize(const MachineFunction &MF) { |
381 | 125 | const Function &F = *MF.getFunction(); |
382 | 34 | return F.optForSize() && !F.optForMinSize(); |
383 | 125 | } |
384 | | |
385 | 116 | static inline bool isMinSize(const MachineFunction &MF) { |
386 | 116 | return MF.getFunction()->optForMinSize(); |
387 | 116 | } |
388 | | |
389 | | /// Implements shrink-wrapping of the stack frame. By default, stack frame |
390 | | /// is created in the function entry block, and is cleaned up in every block |
391 | | /// that returns. This function finds alternate blocks: one for the frame |
392 | | /// setup (prolog) and one for the cleanup (epilog). |
393 | | void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF, |
394 | 2.40k | MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const { |
395 | 2.40k | static unsigned ShrinkCounter = 0; |
396 | 2.40k | |
397 | 2.40k | if (ShrinkLimit.getPosition()2.40k ) { |
398 | 0 | if (ShrinkCounter >= ShrinkLimit) |
399 | 0 | return; |
400 | 0 | ShrinkCounter++; |
401 | 0 | } |
402 | 2.40k | |
403 | 2.40k | auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); |
404 | 2.40k | |
405 | 2.40k | MachineDominatorTree MDT; |
406 | 2.40k | MDT.runOnMachineFunction(MF); |
407 | 2.40k | MachinePostDominatorTree MPT; |
408 | 2.40k | MPT.runOnMachineFunction(MF); |
409 | 2.40k | |
410 | 2.40k | using UnsignedMap = DenseMap<unsigned, unsigned>; |
411 | 2.40k | using RPOTType = ReversePostOrderTraversal<const MachineFunction *>; |
412 | 2.40k | |
413 | 2.40k | UnsignedMap RPO; |
414 | 2.40k | RPOTType RPOT(&MF); |
415 | 2.40k | unsigned RPON = 0; |
416 | 6.02k | for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E6.02k ; ++I3.61k ) |
417 | 3.61k | RPO[(*I)->getNumber()] = RPON++; |
418 | 2.40k | |
419 | 2.40k | // Don't process functions that have loops, at least for now. Placement |
420 | 2.40k | // of prolog and epilog must take loop structure into account. For simpli- |
421 | 2.40k | // city don't do it right now. |
422 | 3.20k | for (auto &I : MF) { |
423 | 3.20k | unsigned BN = RPO[I.getNumber()]; |
424 | 4.34k | for (auto SI = I.succ_begin(), SE = I.succ_end(); SI != SE4.34k ; ++SI1.13k ) { |
425 | 1.29k | // If found a back-edge, return. |
426 | 1.29k | if (RPO[(*SI)->getNumber()] <= BN) |
427 | 152 | return; |
428 | 1.29k | } |
429 | 3.20k | } |
430 | 2.40k | |
431 | 2.40k | // Collect the set of blocks that need a stack frame to execute. Scan |
432 | 2.40k | // each block for uses/defs of callee-saved registers, calls, etc. |
433 | 2.25k | SmallVector<MachineBasicBlock*,16> SFBlocks; |
434 | 2.25k | BitVector CSR(Hexagon::NUM_TARGET_REGS); |
435 | 29.3k | for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P29.3k ; ++P27.0k ) |
436 | 54.1k | for (MCSubRegIterator S(*P, &HRI, true); 27.0k S.isValid()54.1k ; ++S27.0k ) |
437 | 27.0k | CSR[*S] = true; |
438 | 2.25k | |
439 | 2.25k | for (auto &I : MF) |
440 | 2.52k | if (2.52k needsStackFrame(I, CSR, HRI)2.52k ) |
441 | 1.80k | SFBlocks.push_back(&I); |
442 | 2.25k | |
443 | 2.25k | DEBUG({ |
444 | 2.25k | dbgs() << "Blocks needing SF: {"; |
445 | 2.25k | for (auto &B : SFBlocks) |
446 | 2.25k | dbgs() << " BB#" << B->getNumber(); |
447 | 2.25k | dbgs() << " }\n"; |
448 | 2.25k | }); |
449 | 2.25k | // No frame needed? |
450 | 2.25k | if (SFBlocks.empty()) |
451 | 540 | return; |
452 | 1.71k | |
453 | 1.71k | // Pick a common dominator and a common post-dominator. |
454 | 1.71k | MachineBasicBlock *DomB = SFBlocks[0]; |
455 | 1.80k | for (unsigned i = 1, n = SFBlocks.size(); i < n1.80k ; ++i93 ) { |
456 | 93 | DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]); |
457 | 93 | if (!DomB) |
458 | 0 | break; |
459 | 93 | } |
460 | 1.71k | MachineBasicBlock *PDomB = SFBlocks[0]; |
461 | 1.77k | for (unsigned i = 1, n = SFBlocks.size(); i < n1.77k ; ++i62 ) { |
462 | 72 | PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]); |
463 | 72 | if (!PDomB) |
464 | 10 | break; |
465 | 72 | } |
466 | 1.71k | DEBUG({ |
467 | 1.71k | dbgs() << "Computed dom block: BB#"; |
468 | 1.71k | if (DomB) dbgs() << DomB->getNumber(); |
469 | 1.71k | else dbgs() << "<null>"; |
470 | 1.71k | dbgs() << ", computed pdom block: BB#"; |
471 | 1.71k | if (PDomB) dbgs() << PDomB->getNumber(); |
472 | 1.71k | else dbgs() << "<null>"; |
473 | 1.71k | dbgs() << "\n"; |
474 | 1.71k | }); |
475 | 1.71k | if (!DomB || 1.71k !PDomB1.71k ) |
476 | 10 | return; |
477 | 1.70k | |
478 | 1.70k | // Make sure that DomB dominates PDomB and PDomB post-dominates DomB. |
479 | 1.70k | if (1.70k !MDT.dominates(DomB, PDomB)1.70k ) { |
480 | 0 | DEBUG(dbgs() << "Dom block does not dominate pdom block\n"); |
481 | 0 | return; |
482 | 0 | } |
483 | 1.70k | if (1.70k !MPT.dominates(PDomB, DomB)1.70k ) { |
484 | 0 | DEBUG(dbgs() << "PDom block does not post-dominate dom block\n"); |
485 | 0 | return; |
486 | 0 | } |
487 | 1.70k | |
488 | 1.70k | // Finally, everything seems right. |
489 | 1.70k | PrologB = DomB; |
490 | 1.70k | EpilogB = PDomB; |
491 | 1.70k | } |
492 | | |
493 | | /// Perform most of the PEI work here: |
494 | | /// - saving/restoring of the callee-saved registers, |
495 | | /// - stack frame creation and destruction. |
496 | | /// Normally, this work is distributed among various functions, but doing it |
497 | | /// in one place allows shrink-wrapping of the stack frame. |
498 | | void HexagonFrameLowering::emitPrologue(MachineFunction &MF, |
499 | 2.40k | MachineBasicBlock &MBB) const { |
500 | 2.40k | auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); |
501 | 2.40k | |
502 | 2.40k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
503 | 2.40k | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
504 | 2.40k | |
505 | 2.40k | MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr; |
506 | 2.40k | if (EnableShrinkWrapping) |
507 | 2.40k | findShrunkPrologEpilog(MF, PrologB, EpilogB); |
508 | 2.40k | |
509 | 2.40k | bool PrologueStubs = false; |
510 | 2.40k | insertCSRSpillsInBlock(*PrologB, CSI, HRI, PrologueStubs); |
511 | 2.40k | insertPrologueInBlock(*PrologB, PrologueStubs); |
512 | 2.40k | updateEntryPaths(MF, *PrologB); |
513 | 2.40k | |
514 | 2.40k | if (EpilogB2.40k ) { |
515 | 1.70k | insertCSRRestoresInBlock(*EpilogB, CSI, HRI); |
516 | 1.70k | insertEpilogueInBlock(*EpilogB); |
517 | 2.40k | } else { |
518 | 702 | for (auto &B : MF) |
519 | 1.78k | if (1.78k B.isReturnBlock()1.78k ) |
520 | 701 | insertCSRRestoresInBlock(B, CSI, HRI); |
521 | 702 | |
522 | 702 | for (auto &B : MF) |
523 | 1.78k | if (1.78k B.isReturnBlock()1.78k ) |
524 | 701 | insertEpilogueInBlock(B); |
525 | 702 | |
526 | 1.78k | for (auto &B : MF) { |
527 | 1.78k | if (B.empty()) |
528 | 139 | continue; |
529 | 1.64k | MachineInstr *RetI = getReturn(B); |
530 | 1.64k | if (!RetI || 1.64k isRestoreCall(RetI->getOpcode())701 ) |
531 | 944 | continue; |
532 | 701 | for (auto &R : CSI) |
533 | 48 | RetI->addOperand(MachineOperand::CreateReg(R.getReg(), false, true)); |
534 | 1.78k | } |
535 | 702 | } |
536 | 2.40k | |
537 | 2.40k | if (EpilogB2.40k ) { |
538 | 1.70k | // If there is an epilog block, it may not have a return instruction. |
539 | 1.70k | // In such case, we need to add the callee-saved registers as live-ins |
540 | 1.70k | // in all blocks on all paths from the epilog to any return block. |
541 | 1.70k | unsigned MaxBN = MF.getNumBlockIDs(); |
542 | 1.70k | BitVector DoneT(MaxBN+1), DoneF(MaxBN+1), Path(MaxBN+1); |
543 | 1.70k | updateExitPaths(*EpilogB, *EpilogB, DoneT, DoneF, Path); |
544 | 1.70k | } |
545 | 2.40k | } |
546 | | |
547 | | void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB, |
548 | 2.40k | bool PrologueStubs) const { |
549 | 2.40k | MachineFunction &MF = *MBB.getParent(); |
550 | 2.40k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
551 | 2.40k | auto &HST = MF.getSubtarget<HexagonSubtarget>(); |
552 | 2.40k | auto &HII = *HST.getInstrInfo(); |
553 | 2.40k | auto &HRI = *HST.getRegisterInfo(); |
554 | 2.40k | |
555 | 2.40k | unsigned MaxAlign = std::max(MFI.getMaxAlignment(), getStackAlignment()); |
556 | 2.40k | |
557 | 2.40k | // Calculate the total stack frame size. |
558 | 2.40k | // Get the number of bytes to allocate from the FrameInfo. |
559 | 2.40k | unsigned FrameSize = MFI.getStackSize(); |
560 | 2.40k | // Round up the max call frame size to the max alignment on the stack. |
561 | 2.40k | unsigned MaxCFA = alignTo(MFI.getMaxCallFrameSize(), MaxAlign); |
562 | 2.40k | MFI.setMaxCallFrameSize(MaxCFA); |
563 | 2.40k | |
564 | 2.40k | FrameSize = MaxCFA + alignTo(FrameSize, MaxAlign); |
565 | 2.40k | MFI.setStackSize(FrameSize); |
566 | 2.40k | |
567 | 2.40k | bool AlignStack = (MaxAlign > getStackAlignment()); |
568 | 2.40k | |
569 | 2.40k | // Get the number of bytes to allocate from the FrameInfo. |
570 | 2.40k | unsigned NumBytes = MFI.getStackSize(); |
571 | 2.40k | unsigned SP = HRI.getStackRegister(); |
572 | 2.40k | unsigned MaxCF = MFI.getMaxCallFrameSize(); |
573 | 2.40k | MachineBasicBlock::iterator InsertPt = MBB.begin(); |
574 | 2.40k | |
575 | 2.40k | SmallVector<MachineInstr *, 4> AdjustRegs; |
576 | 2.40k | for (auto &MBB : MF) |
577 | 3.61k | for (auto &MI : MBB) |
578 | 21.1k | if (21.1k MI.getOpcode() == Hexagon::PS_alloca21.1k ) |
579 | 3 | AdjustRegs.push_back(&MI); |
580 | 2.40k | |
581 | 3 | for (auto MI : AdjustRegs) { |
582 | 3 | assert((MI->getOpcode() == Hexagon::PS_alloca) && "Expected alloca"); |
583 | 3 | expandAlloca(MI, HII, SP, MaxCF); |
584 | 3 | MI->eraseFromParent(); |
585 | 3 | } |
586 | 2.40k | |
587 | 2.40k | DebugLoc dl = MBB.findDebugLoc(InsertPt); |
588 | 2.40k | |
589 | 2.40k | if (hasFP(MF)2.40k ) { |
590 | 1.66k | insertAllocframe(MBB, InsertPt, NumBytes); |
591 | 1.66k | if (AlignStack1.66k ) { |
592 | 13 | BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP) |
593 | 13 | .addReg(SP) |
594 | 13 | .addImm(-int64_t(MaxAlign)); |
595 | 13 | } |
596 | 1.66k | // If the stack-checking is enabled, and we spilled the callee-saved |
597 | 1.66k | // registers inline (i.e. did not use a spill function), then call |
598 | 1.66k | // the stack checker directly. |
599 | 1.66k | if (EnableStackOVFSanitizer && 1.66k !PrologueStubs2 ) |
600 | 1 | BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk)) |
601 | 1 | .addExternalSymbol("__runtime_stack_check"); |
602 | 2.40k | } else if (739 NumBytes > 0739 ) { |
603 | 63 | assert(alignTo(NumBytes, 8) == NumBytes); |
604 | 63 | BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP) |
605 | 63 | .addReg(SP) |
606 | 63 | .addImm(-int(NumBytes)); |
607 | 63 | } |
608 | 2.40k | } |
609 | | |
610 | 2.40k | void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { |
611 | 2.40k | MachineFunction &MF = *MBB.getParent(); |
612 | 2.40k | auto &HST = MF.getSubtarget<HexagonSubtarget>(); |
613 | 2.40k | auto &HII = *HST.getInstrInfo(); |
614 | 2.40k | auto &HRI = *HST.getRegisterInfo(); |
615 | 2.40k | unsigned SP = HRI.getStackRegister(); |
616 | 2.40k | |
617 | 2.40k | MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator(); |
618 | 2.40k | DebugLoc dl = MBB.findDebugLoc(InsertPt); |
619 | 2.40k | |
620 | 2.40k | if (!hasFP(MF)2.40k ) { |
621 | 740 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
622 | 740 | if (unsigned NumBytes740 = MFI.getStackSize()) { |
623 | 62 | BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP) |
624 | 62 | .addReg(SP) |
625 | 62 | .addImm(NumBytes); |
626 | 62 | } |
627 | 740 | return; |
628 | 740 | } |
629 | 1.66k | |
630 | 1.66k | MachineInstr *RetI = getReturn(MBB); |
631 | 1.66k | unsigned RetOpc = RetI ? RetI->getOpcode()1.65k : 013 ; |
632 | 1.66k | |
633 | 1.66k | // Handle EH_RETURN. |
634 | 1.66k | if (RetOpc == Hexagon::EH_RETURN_JMPR1.66k ) { |
635 | 1 | BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe)); |
636 | 1 | BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_add), SP) |
637 | 1 | .addReg(SP) |
638 | 1 | .addReg(Hexagon::R28); |
639 | 1 | return; |
640 | 1 | } |
641 | 1.66k | |
642 | 1.66k | // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc- |
643 | 1.66k | // frame instruction if we encounter it. |
644 | 1.66k | if (1.66k RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4 || |
645 | 1.66k | RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC || |
646 | 1.66k | RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT || |
647 | 1.66k | RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC1.65k ) { |
648 | 7 | MachineBasicBlock::iterator It = RetI; |
649 | 7 | ++It; |
650 | 7 | // Delete all instructions after the RESTORE (except labels). |
651 | 14 | while (It != MBB.end()14 ) { |
652 | 7 | if (!It->isLabel()) |
653 | 7 | It = MBB.erase(It); |
654 | 7 | else |
655 | 0 | ++It; |
656 | 7 | } |
657 | 7 | return; |
658 | 7 | } |
659 | 1.65k | |
660 | 1.65k | // It is possible that the restoring code is a call to a library function. |
661 | 1.65k | // All of the restore* functions include "deallocframe", so we need to make |
662 | 1.65k | // sure that we don't add an extra one. |
663 | 1.65k | bool NeedsDeallocframe = true; |
664 | 1.65k | if (!MBB.empty() && 1.65k InsertPt != MBB.begin()1.65k ) { |
665 | 1.65k | MachineBasicBlock::iterator PrevIt = std::prev(InsertPt); |
666 | 1.65k | unsigned COpc = PrevIt->getOpcode(); |
667 | 1.65k | if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 || |
668 | 1.65k | COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC || |
669 | 1.65k | COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT || |
670 | 1.65k | COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC || |
671 | 1.65k | COpc == Hexagon::PS_call_nr1.65k || COpc == Hexagon::PS_callr_nr1.64k ) |
672 | 5 | NeedsDeallocframe = false; |
673 | 1.65k | } |
674 | 1.65k | |
675 | 1.65k | if (!NeedsDeallocframe) |
676 | 5 | return; |
677 | 1.65k | // If the returning instruction is PS_jmpret, replace it with dealloc_return, |
678 | 1.65k | // otherwise just add deallocframe. The function could be returning via a |
679 | 1.65k | // tail call. |
680 | 1.65k | if (1.65k RetOpc != Hexagon::PS_jmpret || 1.65k DisableDeallocRet1.64k ) { |
681 | 11 | BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe)); |
682 | 11 | return; |
683 | 11 | } |
684 | 1.64k | unsigned NewOpc = Hexagon::L4_return; |
685 | 1.64k | MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc)); |
686 | 1.64k | // Transfer the function live-out registers. |
687 | 1.64k | NewI->copyImplicitOps(MF, *RetI); |
688 | 1.64k | MBB.erase(RetI); |
689 | 1.64k | } |
690 | | |
691 | | void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB, |
692 | 1.66k | MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const { |
693 | 1.66k | MachineFunction &MF = *MBB.getParent(); |
694 | 1.66k | auto &HST = MF.getSubtarget<HexagonSubtarget>(); |
695 | 1.66k | auto &HII = *HST.getInstrInfo(); |
696 | 1.66k | auto &HRI = *HST.getRegisterInfo(); |
697 | 1.66k | |
698 | 1.66k | // Check for overflow. |
699 | 1.66k | // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used? |
700 | 1.66k | const unsigned int ALLOCFRAME_MAX = 16384; |
701 | 1.66k | |
702 | 1.66k | // Create a dummy memory operand to avoid allocframe from being treated as |
703 | 1.66k | // a volatile memory reference. |
704 | 1.66k | auto *MMO = MF.getMachineMemOperand(MachinePointerInfo::getStack(MF, 0), |
705 | 1.66k | MachineMemOperand::MOStore, 4, 4); |
706 | 1.66k | |
707 | 1.66k | DebugLoc dl = MBB.findDebugLoc(InsertPt); |
708 | 1.66k | |
709 | 1.66k | if (NumBytes >= ALLOCFRAME_MAX1.66k ) { |
710 | 0 | // Emit allocframe(#0). |
711 | 0 | BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) |
712 | 0 | .addImm(0) |
713 | 0 | .addMemOperand(MMO); |
714 | 0 |
|
715 | 0 | // Subtract the size from the stack pointer. |
716 | 0 | unsigned SP = HRI.getStackRegister(); |
717 | 0 | BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP) |
718 | 0 | .addReg(SP) |
719 | 0 | .addImm(-int(NumBytes)); |
720 | 1.66k | } else { |
721 | 1.66k | BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) |
722 | 1.66k | .addImm(NumBytes) |
723 | 1.66k | .addMemOperand(MMO); |
724 | 1.66k | } |
725 | 1.66k | } |
726 | | |
727 | | void HexagonFrameLowering::updateEntryPaths(MachineFunction &MF, |
728 | 2.40k | MachineBasicBlock &SaveB) const { |
729 | 2.40k | SetVector<unsigned> Worklist; |
730 | 2.40k | |
731 | 2.40k | MachineBasicBlock &EntryB = MF.front(); |
732 | 2.40k | Worklist.insert(EntryB.getNumber()); |
733 | 2.40k | |
734 | 2.40k | unsigned SaveN = SaveB.getNumber(); |
735 | 2.40k | auto &CSI = MF.getFrameInfo().getCalleeSavedInfo(); |
736 | 2.40k | |
737 | 4.84k | for (unsigned i = 0; i < Worklist.size()4.84k ; ++i2.43k ) { |
738 | 2.43k | unsigned BN = Worklist[i]; |
739 | 2.43k | MachineBasicBlock &MBB = *MF.getBlockNumbered(BN); |
740 | 2.43k | for (auto &R : CSI) |
741 | 126 | if (126 !MBB.isLiveIn(R.getReg())126 ) |
742 | 5 | MBB.addLiveIn(R.getReg()); |
743 | 2.43k | if (BN != SaveN) |
744 | 31 | for (auto &SB : MBB.successors()) |
745 | 38 | Worklist.insert(SB->getNumber()); |
746 | 2.43k | } |
747 | 2.40k | } |
748 | | |
749 | | bool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB, |
750 | | MachineBasicBlock &RestoreB, BitVector &DoneT, BitVector &DoneF, |
751 | 1.72k | BitVector &Path) const { |
752 | 1.72k | assert(MBB.getNumber() >= 0); |
753 | 1.72k | unsigned BN = MBB.getNumber(); |
754 | 1.72k | if (Path[BN] || 1.72k DoneF[BN]1.72k ) |
755 | 0 | return false; |
756 | 1.72k | if (1.72k DoneT[BN]1.72k ) |
757 | 7 | return true; |
758 | 1.72k | |
759 | 1.72k | auto &CSI = MBB.getParent()->getFrameInfo().getCalleeSavedInfo(); |
760 | 1.72k | |
761 | 1.72k | Path[BN] = true; |
762 | 1.72k | bool ReachedExit = false; |
763 | 1.72k | for (auto &SB : MBB.successors()) |
764 | 21 | ReachedExit |= updateExitPaths(*SB, RestoreB, DoneT, DoneF, Path); |
765 | 1.72k | |
766 | 1.72k | if (!MBB.empty() && 1.72k MBB.back().isReturn()1.71k ) { |
767 | 1.69k | // Add implicit uses of all callee-saved registers to the reached |
768 | 1.69k | // return instructions. This is to prevent the anti-dependency breaker |
769 | 1.69k | // from renaming these registers. |
770 | 1.69k | MachineInstr &RetI = MBB.back(); |
771 | 1.69k | if (!isRestoreCall(RetI.getOpcode())) |
772 | 1.69k | for (auto &R : CSI) |
773 | 47 | RetI.addOperand(MachineOperand::CreateReg(R.getReg(), false, true)); |
774 | 1.69k | ReachedExit = true; |
775 | 1.69k | } |
776 | 1.72k | |
777 | 1.72k | // We don't want to add unnecessary live-ins to the restore block: since |
778 | 1.72k | // the callee-saved registers are being defined in it, the entry of the |
779 | 1.72k | // restore block cannot be on the path from the definitions to any exit. |
780 | 1.72k | if (ReachedExit && 1.72k &MBB != &RestoreB1.70k ) { |
781 | 12 | for (auto &R : CSI) |
782 | 1 | if (1 !MBB.isLiveIn(R.getReg())1 ) |
783 | 1 | MBB.addLiveIn(R.getReg()); |
784 | 12 | DoneT[BN] = true; |
785 | 12 | } |
786 | 1.72k | if (!ReachedExit) |
787 | 11 | DoneF[BN] = true; |
788 | 1.72k | |
789 | 1.72k | Path[BN] = false; |
790 | 1.72k | return ReachedExit; |
791 | 1.72k | } |
792 | | |
793 | | static Optional<MachineBasicBlock::iterator> |
794 | 1.82k | findCFILocation(MachineBasicBlock &B) { |
795 | 1.82k | // The CFI instructions need to be inserted right after allocframe. |
796 | 1.82k | // An exception to this is a situation where allocframe is bundled |
797 | 1.82k | // with a call: then the CFI instructions need to be inserted before |
798 | 1.82k | // the packet with the allocframe+call (in case the call throws an |
799 | 1.82k | // exception). |
800 | 1.82k | auto End = B.instr_end(); |
801 | 1.82k | |
802 | 2.21k | for (MachineInstr &I : B) { |
803 | 2.21k | MachineBasicBlock::iterator It = I.getIterator(); |
804 | 2.21k | if (!I.isBundle()2.21k ) { |
805 | 1.74k | if (I.getOpcode() == Hexagon::S2_allocframe) |
806 | 1.47k | return std::next(It); |
807 | 271 | continue; |
808 | 271 | } |
809 | 469 | // I is a bundle. |
810 | 469 | bool HasCall = false, HasAllocFrame = false; |
811 | 469 | auto T = It.getInstrIterator(); |
812 | 2.80k | while (++T != End && 2.80k T->isBundled()2.46k ) { |
813 | 2.33k | if (T->getOpcode() == Hexagon::S2_allocframe) |
814 | 28 | HasAllocFrame = true; |
815 | 2.30k | else if (2.30k T->isCall()2.30k ) |
816 | 81 | HasCall = true; |
817 | 2.33k | } |
818 | 469 | if (HasAllocFrame) |
819 | 28 | return HasCall ? 28 It18 : std::next(It)10 ; |
820 | 322 | } |
821 | 322 | return None; |
822 | 322 | } |
823 | | |
824 | 1.65k | void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const { |
825 | 1.82k | for (auto &B : MF) { |
826 | 1.82k | auto At = findCFILocation(B); |
827 | 1.82k | if (At.hasValue()) |
828 | 1.49k | insertCFIInstructionsAt(B, At.getValue()); |
829 | 1.82k | } |
830 | 1.65k | } |
831 | | |
832 | | void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, |
833 | 1.49k | MachineBasicBlock::iterator At) const { |
834 | 1.49k | MachineFunction &MF = *MBB.getParent(); |
835 | 1.49k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
836 | 1.49k | MachineModuleInfo &MMI = MF.getMMI(); |
837 | 1.49k | auto &HST = MF.getSubtarget<HexagonSubtarget>(); |
838 | 1.49k | auto &HII = *HST.getInstrInfo(); |
839 | 1.49k | auto &HRI = *HST.getRegisterInfo(); |
840 | 1.49k | |
841 | 1.49k | // If CFI instructions have debug information attached, something goes |
842 | 1.49k | // wrong with the final assembly generation: the prolog_end is placed |
843 | 1.49k | // in a wrong location. |
844 | 1.49k | DebugLoc DL; |
845 | 1.49k | const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION); |
846 | 1.49k | |
847 | 1.49k | MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); |
848 | 1.49k | bool HasFP = hasFP(MF); |
849 | 1.49k | |
850 | 1.49k | if (HasFP1.49k ) { |
851 | 1.49k | unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true); |
852 | 1.49k | unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true); |
853 | 1.49k | |
854 | 1.49k | // Define CFA via an offset from the value of FP. |
855 | 1.49k | // |
856 | 1.49k | // -8 -4 0 (SP) |
857 | 1.49k | // --+----+----+--------------------- |
858 | 1.49k | // | FP | LR | increasing addresses --> |
859 | 1.49k | // --+----+----+--------------------- |
860 | 1.49k | // | +-- Old SP (before allocframe) |
861 | 1.49k | // +-- New FP (after allocframe) |
862 | 1.49k | // |
863 | 1.49k | // MCCFIInstruction::createDefCfa subtracts the offset from the register. |
864 | 1.49k | // MCCFIInstruction::createOffset takes the offset without sign change. |
865 | 1.49k | auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8); |
866 | 1.49k | BuildMI(MBB, At, DL, CFID) |
867 | 1.49k | .addCFIIndex(MF.addFrameInst(DefCfa)); |
868 | 1.49k | // R31 (return addr) = CFA - 4 |
869 | 1.49k | auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4); |
870 | 1.49k | BuildMI(MBB, At, DL, CFID) |
871 | 1.49k | .addCFIIndex(MF.addFrameInst(OffR31)); |
872 | 1.49k | // R30 (frame ptr) = CFA - 8 |
873 | 1.49k | auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8); |
874 | 1.49k | BuildMI(MBB, At, DL, CFID) |
875 | 1.49k | .addCFIIndex(MF.addFrameInst(OffR30)); |
876 | 1.49k | } |
877 | 1.49k | |
878 | 1.49k | static unsigned int RegsToMove[] = { |
879 | 1.49k | Hexagon::R1, Hexagon::R0, Hexagon::R3, Hexagon::R2, |
880 | 1.49k | Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18, |
881 | 1.49k | Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22, |
882 | 1.49k | Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26, |
883 | 1.49k | Hexagon::D0, Hexagon::D1, Hexagon::D8, Hexagon::D9, |
884 | 1.49k | Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13, |
885 | 1.49k | Hexagon::NoRegister |
886 | 1.49k | }; |
887 | 1.49k | |
888 | 1.49k | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
889 | 1.49k | |
890 | 37.4k | for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister37.4k ; ++i35.9k ) { |
891 | 35.9k | unsigned Reg = RegsToMove[i]; |
892 | 509 | auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool { |
893 | 509 | return C.getReg() == Reg; |
894 | 509 | }; |
895 | 35.9k | auto F = find_if(CSI, IfR); |
896 | 35.9k | if (F == CSI.end()) |
897 | 35.9k | continue; |
898 | 22 | |
899 | 22 | int64_t Offset; |
900 | 22 | if (HasFP22 ) { |
901 | 22 | // If the function has a frame pointer (i.e. has an allocframe), |
902 | 22 | // then the CFA has been defined in terms of FP. Any offsets in |
903 | 22 | // the following CFI instructions have to be defined relative |
904 | 22 | // to FP, which points to the bottom of the stack frame. |
905 | 22 | // The function getFrameIndexReference can still choose to use SP |
906 | 22 | // for the offset calculation, so we cannot simply call it here. |
907 | 22 | // Instead, get the offset (relative to the FP) directly. |
908 | 22 | Offset = MFI.getObjectOffset(F->getFrameIdx()); |
909 | 22 | } else { |
910 | 0 | unsigned FrameReg; |
911 | 0 | Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg); |
912 | 0 | } |
913 | 22 | // Subtract 8 to make room for R30 and R31, which are added above. |
914 | 22 | Offset -= 8; |
915 | 22 | |
916 | 22 | if (Reg < Hexagon::D0 || 22 Reg > Hexagon::D1522 ) { |
917 | 0 | unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true); |
918 | 0 | auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg, |
919 | 0 | Offset); |
920 | 0 | BuildMI(MBB, At, DL, CFID) |
921 | 0 | .addCFIIndex(MF.addFrameInst(OffReg)); |
922 | 22 | } else { |
923 | 22 | // Split the double regs into subregs, and generate appropriate |
924 | 22 | // cfi_offsets. |
925 | 22 | // The only reason, we are split double regs is, llvm-mc does not |
926 | 22 | // understand paired registers for cfi_offset. |
927 | 22 | // Eg .cfi_offset r1:0, -64 |
928 | 22 | |
929 | 22 | unsigned HiReg = HRI.getSubReg(Reg, Hexagon::isub_hi); |
930 | 22 | unsigned LoReg = HRI.getSubReg(Reg, Hexagon::isub_lo); |
931 | 22 | unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true); |
932 | 22 | unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true); |
933 | 22 | auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg, |
934 | 22 | Offset+4); |
935 | 22 | BuildMI(MBB, At, DL, CFID) |
936 | 22 | .addCFIIndex(MF.addFrameInst(OffHi)); |
937 | 22 | auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg, |
938 | 22 | Offset); |
939 | 22 | BuildMI(MBB, At, DL, CFID) |
940 | 22 | .addCFIIndex(MF.addFrameInst(OffLo)); |
941 | 22 | } |
942 | 35.9k | } |
943 | 1.49k | } |
944 | | |
945 | 15.4k | bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { |
946 | 15.4k | if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) |
947 | 0 | return false; |
948 | 15.4k | |
949 | 15.4k | auto &MFI = MF.getFrameInfo(); |
950 | 15.4k | auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); |
951 | 15.4k | bool HasExtraAlign = HRI.needsStackRealignment(MF); |
952 | 15.4k | bool HasAlloca = MFI.hasVarSizedObjects(); |
953 | 15.4k | |
954 | 15.4k | // Insert ALLOCFRAME if we need to or at -O0 for the debugger. Think |
955 | 15.4k | // that this shouldn't be required, but doing so now because gcc does and |
956 | 15.4k | // gdb can't break at the start of the function without it. Will remove if |
957 | 15.4k | // this turns out to be a gdb bug. |
958 | 15.4k | // |
959 | 15.4k | if (MF.getTarget().getOptLevel() == CodeGenOpt::None) |
960 | 10.8k | return true; |
961 | 4.61k | |
962 | 4.61k | // By default we want to use SP (since it's always there). FP requires |
963 | 4.61k | // some setup (i.e. ALLOCFRAME). |
964 | 4.61k | // Both, alloca and stack alignment modify the stack pointer by an |
965 | 4.61k | // undetermined value, so we need to save it at the entry to the function |
966 | 4.61k | // (i.e. use allocframe). |
967 | 4.61k | if (4.61k HasAlloca || 4.61k HasExtraAlign4.60k ) |
968 | 1.14k | return true; |
969 | 3.47k | |
970 | 3.47k | if (3.47k MFI.getStackSize() > 03.47k ) { |
971 | 1.09k | // If FP-elimination is disabled, we have to use FP at this point. |
972 | 1.09k | const TargetMachine &TM = MF.getTarget(); |
973 | 1.09k | if (TM.Options.DisableFramePointerElim(MF) || 1.09k !EliminateFramePointer803 ) |
974 | 289 | return true; |
975 | 803 | if (803 EnableStackOVFSanitizer803 ) |
976 | 0 | return true; |
977 | 3.18k | } |
978 | 3.18k | |
979 | 3.18k | const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>(); |
980 | 3.18k | if (MFI.hasCalls() || 3.18k HMFI.hasClobberLR()2.45k ) |
981 | 731 | return true; |
982 | 2.45k | |
983 | 2.45k | return false; |
984 | 2.45k | } |
985 | | |
986 | | enum SpillKind { |
987 | | SK_ToMem, |
988 | | SK_FromMem, |
989 | | SK_FromMemTailcall |
990 | | }; |
991 | | |
992 | | static const char *getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType, |
993 | 13 | bool Stkchk = false) { |
994 | 13 | const char * V4SpillToMemoryFunctions[] = { |
995 | 13 | "__save_r16_through_r17", |
996 | 13 | "__save_r16_through_r19", |
997 | 13 | "__save_r16_through_r21", |
998 | 13 | "__save_r16_through_r23", |
999 | 13 | "__save_r16_through_r25", |
1000 | 13 | "__save_r16_through_r27" }; |
1001 | 13 | |
1002 | 13 | const char * V4SpillToMemoryStkchkFunctions[] = { |
1003 | 13 | "__save_r16_through_r17_stkchk", |
1004 | 13 | "__save_r16_through_r19_stkchk", |
1005 | 13 | "__save_r16_through_r21_stkchk", |
1006 | 13 | "__save_r16_through_r23_stkchk", |
1007 | 13 | "__save_r16_through_r25_stkchk", |
1008 | 13 | "__save_r16_through_r27_stkchk" }; |
1009 | 13 | |
1010 | 13 | const char * V4SpillFromMemoryFunctions[] = { |
1011 | 13 | "__restore_r16_through_r17_and_deallocframe", |
1012 | 13 | "__restore_r16_through_r19_and_deallocframe", |
1013 | 13 | "__restore_r16_through_r21_and_deallocframe", |
1014 | 13 | "__restore_r16_through_r23_and_deallocframe", |
1015 | 13 | "__restore_r16_through_r25_and_deallocframe", |
1016 | 13 | "__restore_r16_through_r27_and_deallocframe" }; |
1017 | 13 | |
1018 | 13 | const char * V4SpillFromMemoryTailcallFunctions[] = { |
1019 | 13 | "__restore_r16_through_r17_and_deallocframe_before_tailcall", |
1020 | 13 | "__restore_r16_through_r19_and_deallocframe_before_tailcall", |
1021 | 13 | "__restore_r16_through_r21_and_deallocframe_before_tailcall", |
1022 | 13 | "__restore_r16_through_r23_and_deallocframe_before_tailcall", |
1023 | 13 | "__restore_r16_through_r25_and_deallocframe_before_tailcall", |
1024 | 13 | "__restore_r16_through_r27_and_deallocframe_before_tailcall" |
1025 | 13 | }; |
1026 | 13 | |
1027 | 13 | const char **SpillFunc = nullptr; |
1028 | 13 | |
1029 | 13 | switch(SpillType) { |
1030 | 5 | case SK_ToMem: |
1031 | 1 | SpillFunc = Stkchk ? V4SpillToMemoryStkchkFunctions |
1032 | 4 | : V4SpillToMemoryFunctions; |
1033 | 5 | break; |
1034 | 7 | case SK_FromMem: |
1035 | 7 | SpillFunc = V4SpillFromMemoryFunctions; |
1036 | 7 | break; |
1037 | 1 | case SK_FromMemTailcall: |
1038 | 1 | SpillFunc = V4SpillFromMemoryTailcallFunctions; |
1039 | 1 | break; |
1040 | 13 | } |
1041 | 13 | assert(SpillFunc && "Unknown spill kind"); |
1042 | 13 | |
1043 | 13 | // Spill all callee-saved registers up to the highest register used. |
1044 | 13 | switch (MaxReg) { |
1045 | 3 | case Hexagon::R17: |
1046 | 3 | return SpillFunc[0]; |
1047 | 4 | case Hexagon::R19: |
1048 | 4 | return SpillFunc[1]; |
1049 | 0 | case Hexagon::R21: |
1050 | 0 | return SpillFunc[2]; |
1051 | 0 | case Hexagon::R23: |
1052 | 0 | return SpillFunc[3]; |
1053 | 0 | case Hexagon::R25: |
1054 | 0 | return SpillFunc[4]; |
1055 | 6 | case Hexagon::R27: |
1056 | 6 | return SpillFunc[5]; |
1057 | 0 | default: |
1058 | 0 | llvm_unreachable("Unhandled maximum callee save register"); |
1059 | 0 | } |
1060 | 0 | return nullptr; |
1061 | 0 | } |
1062 | | |
1063 | | int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, |
1064 | 4.98k | int FI, unsigned &FrameReg) const { |
1065 | 4.98k | auto &MFI = MF.getFrameInfo(); |
1066 | 4.98k | auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); |
1067 | 4.98k | |
1068 | 4.98k | int Offset = MFI.getObjectOffset(FI); |
1069 | 4.98k | bool HasAlloca = MFI.hasVarSizedObjects(); |
1070 | 4.98k | bool HasExtraAlign = HRI.needsStackRealignment(MF); |
1071 | 4.98k | bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None; |
1072 | 4.98k | |
1073 | 4.98k | auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>(); |
1074 | 4.98k | unsigned FrameSize = MFI.getStackSize(); |
1075 | 4.98k | unsigned SP = HRI.getStackRegister(); |
1076 | 4.98k | unsigned FP = HRI.getFrameRegister(); |
1077 | 4.98k | unsigned AP = HMFI.getStackAlignBasePhysReg(); |
1078 | 4.98k | // It may happen that AP will be absent even HasAlloca && HasExtraAlign |
1079 | 4.98k | // is true. HasExtraAlign may be set because of vector spills, without |
1080 | 4.98k | // aligned locals or aligned outgoing function arguments. Since vector |
1081 | 4.98k | // spills will ultimately be "unaligned", it is safe to use FP as the |
1082 | 4.98k | // base register. |
1083 | 4.98k | // In fact, in such a scenario the stack is actually not required to be |
1084 | 4.98k | // aligned, although it may end up being aligned anyway, since this |
1085 | 4.98k | // particular case is not easily detectable. The alignment will be |
1086 | 4.98k | // unnecessary, but not incorrect. |
1087 | 4.98k | // Unfortunately there is no quick way to verify that the above is |
1088 | 4.98k | // indeed the case (and that it's not a result of an error), so just |
1089 | 4.98k | // assume that missing AP will be replaced by FP. |
1090 | 4.98k | // (A better fix would be to rematerialize AP from FP and always align |
1091 | 4.98k | // vector spills.) |
1092 | 4.98k | if (AP == 0) |
1093 | 4.98k | AP = FP; |
1094 | 4.98k | |
1095 | 4.98k | bool UseFP = false, UseAP = false; // Default: use SP (except at -O0). |
1096 | 4.98k | // Use FP at -O0, except when there are objects with extra alignment. |
1097 | 4.98k | // That additional alignment requirement may cause a pad to be inserted, |
1098 | 4.98k | // which will make it impossible to use FP to access objects located |
1099 | 4.98k | // past the pad. |
1100 | 4.98k | if (NoOpt && 4.98k !HasExtraAlign3.19k ) |
1101 | 3.16k | UseFP = true; |
1102 | 4.98k | if (MFI.isFixedObjectIndex(FI) || 4.98k MFI.isObjectPreAllocated(FI)4.78k ) { |
1103 | 211 | // Fixed and preallocated objects will be located before any padding |
1104 | 211 | // so FP must be used to access them. |
1105 | 205 | UseFP |= (HasAlloca || HasExtraAlign); |
1106 | 4.98k | } else { |
1107 | 4.77k | if (HasAlloca4.77k ) { |
1108 | 3 | if (HasExtraAlign) |
1109 | 2 | UseAP = true; |
1110 | 3 | else |
1111 | 1 | UseFP = true; |
1112 | 3 | } |
1113 | 4.77k | } |
1114 | 4.98k | |
1115 | 4.98k | // If FP was picked, then there had better be FP. |
1116 | 4.98k | bool HasFP = hasFP(MF); |
1117 | 4.98k | assert((HasFP || !UseFP) && "This function must have frame pointer"); |
1118 | 4.98k | |
1119 | 4.98k | // Having FP implies allocframe. Allocframe will store extra 8 bytes: |
1120 | 4.98k | // FP/LR. If the base register is used to access an object across these |
1121 | 4.98k | // 8 bytes, then the offset will need to be adjusted by 8. |
1122 | 4.98k | // |
1123 | 4.98k | // After allocframe: |
1124 | 4.98k | // HexagonISelLowering adds 8 to ---+ |
1125 | 4.98k | // the offsets of all stack-based | |
1126 | 4.98k | // arguments (*) | |
1127 | 4.98k | // | |
1128 | 4.98k | // getObjectOffset < 0 0 8 getObjectOffset >= 8 |
1129 | 4.98k | // ------------------------+-----+------------------------> increasing |
1130 | 4.98k | // <local objects> |FP/LR| <input arguments> addresses |
1131 | 4.98k | // -----------------+------+-----+------------------------> |
1132 | 4.98k | // | | |
1133 | 4.98k | // SP/AP point --+ +-- FP points here (**) |
1134 | 4.98k | // somewhere on |
1135 | 4.98k | // this side of FP/LR |
1136 | 4.98k | // |
1137 | 4.98k | // (*) See LowerFormalArguments. The FP/LR is assumed to be present. |
1138 | 4.98k | // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR. |
1139 | 4.98k | |
1140 | 4.98k | // The lowering assumes that FP/LR is present, and so the offsets of |
1141 | 4.98k | // the formal arguments start at 8. If FP/LR is not there we need to |
1142 | 4.98k | // reduce the offset by 8. |
1143 | 4.98k | if (Offset > 0 && 4.98k !HasFP8 ) |
1144 | 8 | Offset -= 8; |
1145 | 4.98k | |
1146 | 4.98k | if (UseFP) |
1147 | 3.18k | FrameReg = FP; |
1148 | 1.80k | else if (1.80k UseAP1.80k ) |
1149 | 2 | FrameReg = AP; |
1150 | 1.80k | else |
1151 | 1.80k | FrameReg = SP; |
1152 | 4.98k | |
1153 | 4.98k | // Calculate the actual offset in the instruction. If there is no FP |
1154 | 4.98k | // (in other words, no allocframe), then SP will not be adjusted (i.e. |
1155 | 4.98k | // there will be no SP -= FrameSize), so the frame size should not be |
1156 | 4.98k | // added to the calculated offset. |
1157 | 4.98k | int RealOffset = Offset; |
1158 | 4.98k | if (!UseFP && 4.98k !UseAP1.80k ) |
1159 | 1.80k | RealOffset = FrameSize+Offset; |
1160 | 4.98k | return RealOffset; |
1161 | 4.98k | } |
1162 | | |
1163 | | bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, |
1164 | | const CSIVect &CSI, const HexagonRegisterInfo &HRI, |
1165 | 2.40k | bool &PrologueStubs) const { |
1166 | 2.40k | if (CSI.empty()) |
1167 | 2.35k | return true; |
1168 | 54 | |
1169 | 54 | MachineBasicBlock::iterator MI = MBB.begin(); |
1170 | 54 | PrologueStubs = false; |
1171 | 54 | MachineFunction &MF = *MBB.getParent(); |
1172 | 54 | auto &HST = MF.getSubtarget<HexagonSubtarget>(); |
1173 | 54 | auto &HII = *HST.getInstrInfo(); |
1174 | 54 | |
1175 | 54 | if (useSpillFunction(MF, CSI)54 ) { |
1176 | 5 | PrologueStubs = true; |
1177 | 5 | unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI); |
1178 | 5 | bool StkOvrFlowEnabled = EnableStackOVFSanitizer; |
1179 | 5 | const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem, |
1180 | 5 | StkOvrFlowEnabled); |
1181 | 5 | auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget()); |
1182 | 5 | bool IsPIC = HTM.isPositionIndependent(); |
1183 | 5 | bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong; |
1184 | 5 | |
1185 | 5 | // Call spill function. |
1186 | 5 | DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()5 : DebugLoc()0 ; |
1187 | 5 | unsigned SpillOpc; |
1188 | 5 | if (StkOvrFlowEnabled5 ) { |
1189 | 1 | if (LongCalls) |
1190 | 0 | SpillOpc = IsPIC ? 0 Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT_PIC0 |
1191 | 0 | : Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT; |
1192 | 1 | else |
1193 | 1 | SpillOpc = IsPIC ? 1 Hexagon::SAVE_REGISTERS_CALL_V4STK_PIC0 |
1194 | 1 | : Hexagon::SAVE_REGISTERS_CALL_V4STK; |
1195 | 5 | } else { |
1196 | 4 | if (LongCalls) |
1197 | 0 | SpillOpc = IsPIC ? 0 Hexagon::SAVE_REGISTERS_CALL_V4_EXT_PIC0 |
1198 | 0 | : Hexagon::SAVE_REGISTERS_CALL_V4_EXT; |
1199 | 4 | else |
1200 | 4 | SpillOpc = IsPIC ? 4 Hexagon::SAVE_REGISTERS_CALL_V4_PIC3 |
1201 | 1 | : Hexagon::SAVE_REGISTERS_CALL_V4; |
1202 | 4 | } |
1203 | 5 | |
1204 | 5 | MachineInstr *SaveRegsCall = |
1205 | 5 | BuildMI(MBB, MI, DL, HII.get(SpillOpc)) |
1206 | 5 | .addExternalSymbol(SpillFun); |
1207 | 5 | |
1208 | 5 | // Add callee-saved registers as use. |
1209 | 5 | addCalleeSaveRegistersAsImpOperand(SaveRegsCall, CSI, false, true); |
1210 | 5 | // Add live in registers. |
1211 | 27 | for (unsigned I = 0; I < CSI.size()27 ; ++I22 ) |
1212 | 22 | MBB.addLiveIn(CSI[I].getReg()); |
1213 | 5 | return true; |
1214 | 5 | } |
1215 | 49 | |
1216 | 147 | for (unsigned i = 0, n = CSI.size(); 49 i < n147 ; ++i98 ) { |
1217 | 98 | unsigned Reg = CSI[i].getReg(); |
1218 | 98 | // Add live in registers. We treat eh_return callee saved register r0 - r3 |
1219 | 98 | // specially. They are not really callee saved registers as they are not |
1220 | 98 | // supposed to be killed. |
1221 | 98 | bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg); |
1222 | 98 | int FI = CSI[i].getFrameIdx(); |
1223 | 98 | const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); |
1224 | 98 | HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI); |
1225 | 98 | if (IsKill) |
1226 | 96 | MBB.addLiveIn(Reg); |
1227 | 98 | } |
1228 | 2.40k | return true; |
1229 | 2.40k | } |
1230 | | |
1231 | | bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, |
1232 | 2.40k | const CSIVect &CSI, const HexagonRegisterInfo &HRI) const { |
1233 | 2.40k | if (CSI.empty()) |
1234 | 2.35k | return false; |
1235 | 53 | |
1236 | 53 | MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); |
1237 | 53 | MachineFunction &MF = *MBB.getParent(); |
1238 | 53 | auto &HST = MF.getSubtarget<HexagonSubtarget>(); |
1239 | 53 | auto &HII = *HST.getInstrInfo(); |
1240 | 53 | |
1241 | 53 | if (useRestoreFunction(MF, CSI)53 ) { |
1242 | 7 | bool HasTC = hasTailCall(MBB) || !hasReturn(MBB); |
1243 | 8 | unsigned MaxR = getMaxCalleeSavedReg(CSI, HRI); |
1244 | 8 | SpillKind Kind = HasTC ? SK_FromMemTailcall1 : SK_FromMem7 ; |
1245 | 8 | const char *RestoreFn = getSpillFunctionFor(MaxR, Kind); |
1246 | 8 | auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget()); |
1247 | 8 | bool IsPIC = HTM.isPositionIndependent(); |
1248 | 7 | bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong; |
1249 | 8 | |
1250 | 8 | // Call spill function. |
1251 | 8 | DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() |
1252 | 0 | : MBB.getLastNonDebugInstr()->getDebugLoc(); |
1253 | 8 | MachineInstr *DeallocCall = nullptr; |
1254 | 8 | |
1255 | 8 | if (HasTC8 ) { |
1256 | 1 | unsigned RetOpc; |
1257 | 1 | if (LongCalls) |
1258 | 0 | RetOpc = IsPIC ? 0 Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC0 |
1259 | 0 | : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT; |
1260 | 1 | else |
1261 | 1 | RetOpc = IsPIC ? 1 Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC0 |
1262 | 1 | : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4; |
1263 | 1 | DeallocCall = BuildMI(MBB, MI, DL, HII.get(RetOpc)) |
1264 | 1 | .addExternalSymbol(RestoreFn); |
1265 | 8 | } else { |
1266 | 7 | // The block has a return. |
1267 | 7 | MachineBasicBlock::iterator It = MBB.getFirstTerminator(); |
1268 | 7 | assert(It->isReturn() && std::next(It) == MBB.end()); |
1269 | 7 | unsigned RetOpc; |
1270 | 7 | if (LongCalls) |
1271 | 2 | RetOpc = IsPIC ? 2 Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC0 |
1272 | 2 | : Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT; |
1273 | 7 | else |
1274 | 5 | RetOpc = IsPIC ? 5 Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC3 |
1275 | 2 | : Hexagon::RESTORE_DEALLOC_RET_JMP_V4; |
1276 | 7 | DeallocCall = BuildMI(MBB, It, DL, HII.get(RetOpc)) |
1277 | 7 | .addExternalSymbol(RestoreFn); |
1278 | 7 | // Transfer the function live-out registers. |
1279 | 7 | DeallocCall->copyImplicitOps(MF, *It); |
1280 | 7 | } |
1281 | 8 | addCalleeSaveRegistersAsImpOperand(DeallocCall, CSI, true, false); |
1282 | 8 | return true; |
1283 | 8 | } |
1284 | 45 | |
1285 | 145 | for (unsigned i = 0; 45 i < CSI.size()145 ; ++i100 ) { |
1286 | 100 | unsigned Reg = CSI[i].getReg(); |
1287 | 100 | const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); |
1288 | 100 | int FI = CSI[i].getFrameIdx(); |
1289 | 100 | HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI); |
1290 | 100 | } |
1291 | 2.40k | |
1292 | 2.40k | return true; |
1293 | 2.40k | } |
1294 | | |
1295 | | MachineBasicBlock::iterator HexagonFrameLowering::eliminateCallFramePseudoInstr( |
1296 | | MachineFunction &MF, MachineBasicBlock &MBB, |
1297 | 410 | MachineBasicBlock::iterator I) const { |
1298 | 410 | MachineInstr &MI = *I; |
1299 | 410 | unsigned Opc = MI.getOpcode(); |
1300 | 410 | (void)Opc; // Silence compiler warning. |
1301 | 410 | assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) && |
1302 | 410 | "Cannot handle this call frame pseudo instruction"); |
1303 | 410 | return MBB.erase(I); |
1304 | 410 | } |
1305 | | |
1306 | | void HexagonFrameLowering::processFunctionBeforeFrameFinalized( |
1307 | 2.40k | MachineFunction &MF, RegScavenger *RS) const { |
1308 | 2.40k | // If this function has uses aligned stack and also has variable sized stack |
1309 | 2.40k | // objects, then we need to map all spill slots to fixed positions, so that |
1310 | 2.40k | // they can be accessed through FP. Otherwise they would have to be accessed |
1311 | 2.40k | // via AP, which may not be available at the particular place in the program. |
1312 | 2.40k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1313 | 2.40k | bool HasAlloca = MFI.hasVarSizedObjects(); |
1314 | 2.40k | bool NeedsAlign = (MFI.getMaxAlignment() > getStackAlignment()); |
1315 | 2.40k | |
1316 | 2.40k | if (!HasAlloca || 2.40k !NeedsAlign3 ) |
1317 | 2.40k | return; |
1318 | 2 | |
1319 | 2 | unsigned LFS = MFI.getLocalFrameSize(); |
1320 | 11 | for (int i = 0, e = MFI.getObjectIndexEnd(); i != e11 ; ++i9 ) { |
1321 | 9 | if (!MFI.isSpillSlotObjectIndex(i) || 9 MFI.isDeadObjectIndex(i)5 ) |
1322 | 4 | continue; |
1323 | 5 | unsigned S = MFI.getObjectSize(i); |
1324 | 5 | // Reduce the alignment to at most 8. This will require unaligned vector |
1325 | 5 | // stores if they happen here. |
1326 | 5 | unsigned A = std::max(MFI.getObjectAlignment(i), 8U); |
1327 | 5 | MFI.setObjectAlignment(i, 8); |
1328 | 5 | LFS = alignTo(LFS+S, A); |
1329 | 5 | MFI.mapLocalFrameObject(i, -LFS); |
1330 | 5 | } |
1331 | 2 | |
1332 | 2 | MFI.setLocalFrameSize(LFS); |
1333 | 2 | unsigned A = MFI.getLocalFrameMaxAlign(); |
1334 | 2 | assert(A <= 8 && "Unexpected local frame alignment"); |
1335 | 2 | if (A == 0) |
1336 | 2 | MFI.setLocalFrameMaxAlign(8); |
1337 | 2 | MFI.setUseLocalStackAllocationBlock(true); |
1338 | 2 | |
1339 | 2 | // Set the physical aligned-stack base address register. |
1340 | 2 | unsigned AP = 0; |
1341 | 2 | if (const MachineInstr *AI = getAlignaInstr(MF)) |
1342 | 1 | AP = AI->getOperand(0).getReg(); |
1343 | 2.40k | auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>(); |
1344 | 2.40k | HMFI.setStackAlignBasePhysReg(AP); |
1345 | 2.40k | } |
1346 | | |
1347 | | /// Returns true if there are no caller-saved registers available in class RC. |
1348 | | static bool needToReserveScavengingSpillSlots(MachineFunction &MF, |
1349 | 15 | const HexagonRegisterInfo &HRI, const TargetRegisterClass *RC) { |
1350 | 15 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
1351 | 15 | |
1352 | 273 | auto IsUsed = [&HRI,&MRI] (unsigned Reg) -> bool { |
1353 | 281 | for (MCRegAliasIterator AI(Reg, &HRI, true); AI.isValid()281 ; ++AI8 ) |
1354 | 278 | if (278 MRI.isPhysRegUsed(*AI)278 ) |
1355 | 270 | return true; |
1356 | 3 | return false; |
1357 | 273 | }; |
1358 | 15 | |
1359 | 15 | // Check for an unused caller-saved register. Callee-saved registers |
1360 | 15 | // have become pristine by now. |
1361 | 285 | for (const MCPhysReg *P = HRI.getCallerSavedRegs(&MF, RC); *P285 ; ++P270 ) |
1362 | 273 | if (273 !IsUsed(*P)273 ) |
1363 | 3 | return false; |
1364 | 15 | |
1365 | 15 | // All caller-saved registers are used. |
1366 | 12 | return true; |
1367 | 15 | } |
1368 | | |
1369 | | #ifndef NDEBUG |
1370 | | static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) { |
1371 | | dbgs() << '{'; |
1372 | | for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) { |
1373 | | unsigned R = x; |
1374 | | dbgs() << ' ' << PrintReg(R, &TRI); |
1375 | | } |
1376 | | dbgs() << " }"; |
1377 | | } |
1378 | | #endif |
1379 | | |
1380 | | bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, |
1381 | 2.40k | const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const { |
1382 | 2.40k | DEBUG(dbgs() << __func__ << " on " |
1383 | 2.40k | << MF.getFunction()->getName() << '\n'); |
1384 | 2.40k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1385 | 2.40k | BitVector SRegs(Hexagon::NUM_TARGET_REGS); |
1386 | 2.40k | |
1387 | 2.40k | // Generate a set of unique, callee-saved registers (SRegs), where each |
1388 | 2.40k | // register in the set is maximal in terms of sub-/super-register relation, |
1389 | 2.40k | // i.e. for each R in SRegs, no proper super-register of R is also in SRegs. |
1390 | 2.40k | |
1391 | 2.40k | // (1) For each callee-saved register, add that register and all of its |
1392 | 2.40k | // sub-registers to SRegs. |
1393 | 2.40k | DEBUG(dbgs() << "Initial CS registers: {"); |
1394 | 2.62k | for (unsigned i = 0, n = CSI.size(); i < n2.62k ; ++i212 ) { |
1395 | 212 | unsigned R = CSI[i].getReg(); |
1396 | 212 | DEBUG(dbgs() << ' ' << PrintReg(R, TRI)); |
1397 | 424 | for (MCSubRegIterator SR(R, TRI, true); SR.isValid()424 ; ++SR212 ) |
1398 | 212 | SRegs[*SR] = true; |
1399 | 212 | } |
1400 | 2.40k | DEBUG(dbgs() << " }\n"); |
1401 | 2.40k | DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); |
1402 | 2.40k | |
1403 | 2.40k | // (2) For each reserved register, remove that register and all of its |
1404 | 2.40k | // sub- and super-registers from SRegs. |
1405 | 2.40k | BitVector Reserved = TRI->getReservedRegs(MF); |
1406 | 89.0k | for (int x = Reserved.find_first(); x >= 089.0k ; x = Reserved.find_next(x)86.6k ) { |
1407 | 86.6k | unsigned R = x; |
1408 | 228k | for (MCSuperRegIterator SR(R, TRI, true); SR.isValid()228k ; ++SR142k ) |
1409 | 142k | SRegs[*SR] = false; |
1410 | 86.6k | } |
1411 | 2.40k | DEBUG(dbgs() << "Res: "; dump_registers(Reserved, *TRI); dbgs() << "\n"); |
1412 | 2.40k | DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); |
1413 | 2.40k | |
1414 | 2.40k | // (3) Collect all registers that have at least one sub-register in SRegs, |
1415 | 2.40k | // and also have no sub-registers that are reserved. These will be the can- |
1416 | 2.40k | // didates for saving as a whole instead of their individual sub-registers. |
1417 | 2.40k | // (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.) |
1418 | 2.40k | BitVector TmpSup(Hexagon::NUM_TARGET_REGS); |
1419 | 2.62k | for (int x = SRegs.find_first(); x >= 02.62k ; x = SRegs.find_next(x)212 ) { |
1420 | 212 | unsigned R = x; |
1421 | 424 | for (MCSuperRegIterator SR(R, TRI); SR.isValid()424 ; ++SR212 ) |
1422 | 212 | TmpSup[*SR] = true; |
1423 | 212 | } |
1424 | 2.52k | for (int x = TmpSup.find_first(); x >= 02.52k ; x = TmpSup.find_next(x)120 ) { |
1425 | 120 | unsigned R = x; |
1426 | 480 | for (MCSubRegIterator SR(R, TRI, true); SR.isValid()480 ; ++SR360 ) { |
1427 | 360 | if (!Reserved[*SR]) |
1428 | 360 | continue; |
1429 | 0 | TmpSup[R] = false; |
1430 | 0 | break; |
1431 | 0 | } |
1432 | 120 | } |
1433 | 2.40k | DEBUG(dbgs() << "TmpSup: "; dump_registers(TmpSup, *TRI); dbgs() << "\n"); |
1434 | 2.40k | |
1435 | 2.40k | // (4) Include all super-registers found in (3) into SRegs. |
1436 | 2.40k | SRegs |= TmpSup; |
1437 | 2.40k | DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); |
1438 | 2.40k | |
1439 | 2.40k | // (5) For each register R in SRegs, if any super-register of R is in SRegs, |
1440 | 2.40k | // remove R from SRegs. |
1441 | 2.74k | for (int x = SRegs.find_first(); x >= 02.74k ; x = SRegs.find_next(x)332 ) { |
1442 | 332 | unsigned R = x; |
1443 | 332 | for (MCSuperRegIterator SR(R, TRI); SR.isValid()332 ; ++SR0 ) { |
1444 | 212 | if (!SRegs[*SR]) |
1445 | 0 | continue; |
1446 | 212 | SRegs[R] = false; |
1447 | 212 | break; |
1448 | 212 | } |
1449 | 332 | } |
1450 | 2.40k | DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); |
1451 | 2.40k | |
1452 | 2.40k | // Now, for each register that has a fixed stack slot, create the stack |
1453 | 2.40k | // object for it. |
1454 | 2.40k | CSI.clear(); |
1455 | 2.40k | |
1456 | 2.40k | using SpillSlot = TargetFrameLowering::SpillSlot; |
1457 | 2.40k | |
1458 | 2.40k | unsigned NumFixed; |
1459 | 2.40k | int MinOffset = 0; // CS offsets are negative. |
1460 | 2.40k | const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed); |
1461 | 45.7k | for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed45.7k ; ++S43.3k ) { |
1462 | 43.3k | if (!SRegs[S->Reg]) |
1463 | 43.2k | continue; |
1464 | 118 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg); |
1465 | 118 | int FI = MFI.CreateFixedSpillStackObject(TRI->getSpillSize(*RC), S->Offset); |
1466 | 118 | MinOffset = std::min(MinOffset, S->Offset); |
1467 | 118 | CSI.push_back(CalleeSavedInfo(S->Reg, FI)); |
1468 | 118 | SRegs[S->Reg] = false; |
1469 | 118 | } |
1470 | 2.40k | |
1471 | 2.40k | // There can be some registers that don't have fixed slots. For example, |
1472 | 2.40k | // we need to store R0-R3 in functions with exception handling. For each |
1473 | 2.40k | // such register, create a non-fixed stack object. |
1474 | 2.41k | for (int x = SRegs.find_first(); x >= 02.41k ; x = SRegs.find_next(x)2 ) { |
1475 | 2 | unsigned R = x; |
1476 | 2 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R); |
1477 | 2 | unsigned Size = TRI->getSpillSize(*RC); |
1478 | 2 | int Off = MinOffset - Size; |
1479 | 2 | unsigned Align = std::min(TRI->getSpillAlignment(*RC), getStackAlignment()); |
1480 | 2 | assert(isPowerOf2_32(Align)); |
1481 | 2 | Off &= -Align; |
1482 | 2 | int FI = MFI.CreateFixedSpillStackObject(Size, Off); |
1483 | 2 | MinOffset = std::min(MinOffset, Off); |
1484 | 2 | CSI.push_back(CalleeSavedInfo(R, FI)); |
1485 | 2 | SRegs[R] = false; |
1486 | 2 | } |
1487 | 2.40k | |
1488 | 2.40k | DEBUG({ |
1489 | 2.40k | dbgs() << "CS information: {"; |
1490 | 2.40k | for (unsigned i = 0, n = CSI.size(); i < n; ++i) { |
1491 | 2.40k | int FI = CSI[i].getFrameIdx(); |
1492 | 2.40k | int Off = MFI.getObjectOffset(FI); |
1493 | 2.40k | dbgs() << ' ' << PrintReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp"; |
1494 | 2.40k | if (Off >= 0) |
1495 | 2.40k | dbgs() << '+'; |
1496 | 2.40k | dbgs() << Off; |
1497 | 2.40k | } |
1498 | 2.40k | dbgs() << " }\n"; |
1499 | 2.40k | }); |
1500 | 2.40k | |
1501 | | #ifndef NDEBUG |
1502 | | // Verify that all registers were handled. |
1503 | | bool MissedReg = false; |
1504 | | for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { |
1505 | | unsigned R = x; |
1506 | | dbgs() << PrintReg(R, TRI) << ' '; |
1507 | | MissedReg = true; |
1508 | | } |
1509 | | if (MissedReg) |
1510 | | llvm_unreachable("...there are unhandled callee-saved registers!"); |
1511 | | #endif |
1512 | | |
1513 | 2.40k | return true; |
1514 | 2.40k | } |
1515 | | |
1516 | | bool HexagonFrameLowering::expandCopy(MachineBasicBlock &B, |
1517 | | MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, |
1518 | 3.55k | const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { |
1519 | 3.55k | MachineInstr *MI = &*It; |
1520 | 3.55k | DebugLoc DL = MI->getDebugLoc(); |
1521 | 3.55k | unsigned DstR = MI->getOperand(0).getReg(); |
1522 | 3.55k | unsigned SrcR = MI->getOperand(1).getReg(); |
1523 | 3.55k | if (!Hexagon::ModRegsRegClass.contains(DstR) || |
1524 | 16 | !Hexagon::ModRegsRegClass.contains(SrcR)) |
1525 | 3.55k | return false; |
1526 | 0 |
|
1527 | 0 | unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); |
1528 | 0 | BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR).add(MI->getOperand(1)); |
1529 | 0 | BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), DstR) |
1530 | 0 | .addReg(TmpR, RegState::Kill); |
1531 | 0 |
|
1532 | 0 | NewRegs.push_back(TmpR); |
1533 | 0 | B.erase(It); |
1534 | 0 | return true; |
1535 | 0 | } |
1536 | | |
1537 | | bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B, |
1538 | | MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, |
1539 | 17 | const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { |
1540 | 17 | MachineInstr *MI = &*It; |
1541 | 17 | if (!MI->getOperand(0).isFI()) |
1542 | 0 | return false; |
1543 | 17 | |
1544 | 17 | DebugLoc DL = MI->getDebugLoc(); |
1545 | 17 | unsigned Opc = MI->getOpcode(); |
1546 | 17 | unsigned SrcR = MI->getOperand(2).getReg(); |
1547 | 17 | bool IsKill = MI->getOperand(2).isKill(); |
1548 | 17 | int FI = MI->getOperand(0).getIndex(); |
1549 | 17 | |
1550 | 17 | // TmpR = C2_tfrpr SrcR if SrcR is a predicate register |
1551 | 17 | // TmpR = A2_tfrcrr SrcR if SrcR is a modifier register |
1552 | 17 | unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); |
1553 | 17 | unsigned TfrOpc = (Opc == Hexagon::STriw_pred) ? Hexagon::C2_tfrpr |
1554 | 0 | : Hexagon::A2_tfrcrr; |
1555 | 17 | BuildMI(B, It, DL, HII.get(TfrOpc), TmpR) |
1556 | 17 | .addReg(SrcR, getKillRegState(IsKill)); |
1557 | 17 | |
1558 | 17 | // S2_storeri_io FI, 0, TmpR |
1559 | 17 | BuildMI(B, It, DL, HII.get(Hexagon::S2_storeri_io)) |
1560 | 17 | .addFrameIndex(FI) |
1561 | 17 | .addImm(0) |
1562 | 17 | .addReg(TmpR, RegState::Kill) |
1563 | 17 | .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); |
1564 | 17 | |
1565 | 17 | NewRegs.push_back(TmpR); |
1566 | 17 | B.erase(It); |
1567 | 17 | return true; |
1568 | 17 | } |
1569 | | |
1570 | | bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B, |
1571 | | MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, |
1572 | 17 | const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { |
1573 | 17 | MachineInstr *MI = &*It; |
1574 | 17 | if (!MI->getOperand(1).isFI()) |
1575 | 0 | return false; |
1576 | 17 | |
1577 | 17 | DebugLoc DL = MI->getDebugLoc(); |
1578 | 17 | unsigned Opc = MI->getOpcode(); |
1579 | 17 | unsigned DstR = MI->getOperand(0).getReg(); |
1580 | 17 | int FI = MI->getOperand(1).getIndex(); |
1581 | 17 | |
1582 | 17 | // TmpR = L2_loadri_io FI, 0 |
1583 | 17 | unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); |
1584 | 17 | BuildMI(B, It, DL, HII.get(Hexagon::L2_loadri_io), TmpR) |
1585 | 17 | .addFrameIndex(FI) |
1586 | 17 | .addImm(0) |
1587 | 17 | .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); |
1588 | 17 | |
1589 | 17 | // DstR = C2_tfrrp TmpR if DstR is a predicate register |
1590 | 17 | // DstR = A2_tfrrcr TmpR if DstR is a modifier register |
1591 | 17 | unsigned TfrOpc = (Opc == Hexagon::LDriw_pred) ? Hexagon::C2_tfrrp |
1592 | 0 | : Hexagon::A2_tfrrcr; |
1593 | 17 | BuildMI(B, It, DL, HII.get(TfrOpc), DstR) |
1594 | 17 | .addReg(TmpR, RegState::Kill); |
1595 | 17 | |
1596 | 17 | NewRegs.push_back(TmpR); |
1597 | 17 | B.erase(It); |
1598 | 17 | return true; |
1599 | 17 | } |
1600 | | |
1601 | | bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B, |
1602 | | MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, |
1603 | 6 | const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { |
1604 | 6 | MachineInstr *MI = &*It; |
1605 | 6 | if (!MI->getOperand(0).isFI()) |
1606 | 0 | return false; |
1607 | 6 | |
1608 | 6 | DebugLoc DL = MI->getDebugLoc(); |
1609 | 6 | unsigned SrcR = MI->getOperand(2).getReg(); |
1610 | 6 | bool IsKill = MI->getOperand(2).isKill(); |
1611 | 6 | int FI = MI->getOperand(0).getIndex(); |
1612 | 6 | auto *RC = &Hexagon::HvxVRRegClass; |
1613 | 6 | |
1614 | 6 | // Insert transfer to general vector register. |
1615 | 6 | // TmpR0 = A2_tfrsi 0x01010101 |
1616 | 6 | // TmpR1 = V6_vandqrt Qx, TmpR0 |
1617 | 6 | // store FI, 0, TmpR1 |
1618 | 6 | unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); |
1619 | 6 | unsigned TmpR1 = MRI.createVirtualRegister(RC); |
1620 | 6 | |
1621 | 6 | BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0) |
1622 | 6 | .addImm(0x01010101); |
1623 | 6 | |
1624 | 6 | BuildMI(B, It, DL, HII.get(Hexagon::V6_vandqrt), TmpR1) |
1625 | 6 | .addReg(SrcR, getKillRegState(IsKill)) |
1626 | 6 | .addReg(TmpR0, RegState::Kill); |
1627 | 6 | |
1628 | 6 | auto *HRI = B.getParent()->getSubtarget<HexagonSubtarget>().getRegisterInfo(); |
1629 | 6 | HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, HRI); |
1630 | 6 | expandStoreVec(B, std::prev(It), MRI, HII, NewRegs); |
1631 | 6 | |
1632 | 6 | NewRegs.push_back(TmpR0); |
1633 | 6 | NewRegs.push_back(TmpR1); |
1634 | 6 | B.erase(It); |
1635 | 6 | return true; |
1636 | 6 | } |
1637 | | |
1638 | | bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B, |
1639 | | MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, |
1640 | 9 | const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { |
1641 | 9 | MachineInstr *MI = &*It; |
1642 | 9 | if (!MI->getOperand(1).isFI()) |
1643 | 0 | return false; |
1644 | 9 | |
1645 | 9 | DebugLoc DL = MI->getDebugLoc(); |
1646 | 9 | unsigned DstR = MI->getOperand(0).getReg(); |
1647 | 9 | int FI = MI->getOperand(1).getIndex(); |
1648 | 9 | auto *RC = &Hexagon::HvxVRRegClass; |
1649 | 9 | |
1650 | 9 | // TmpR0 = A2_tfrsi 0x01010101 |
1651 | 9 | // TmpR1 = load FI, 0 |
1652 | 9 | // DstR = V6_vandvrt TmpR1, TmpR0 |
1653 | 9 | unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); |
1654 | 9 | unsigned TmpR1 = MRI.createVirtualRegister(RC); |
1655 | 9 | |
1656 | 9 | BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0) |
1657 | 9 | .addImm(0x01010101); |
1658 | 9 | MachineFunction &MF = *B.getParent(); |
1659 | 9 | auto *HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); |
1660 | 9 | HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, HRI); |
1661 | 9 | expandLoadVec(B, std::prev(It), MRI, HII, NewRegs); |
1662 | 9 | |
1663 | 9 | BuildMI(B, It, DL, HII.get(Hexagon::V6_vandvrt), DstR) |
1664 | 9 | .addReg(TmpR1, RegState::Kill) |
1665 | 9 | .addReg(TmpR0, RegState::Kill); |
1666 | 9 | |
1667 | 9 | NewRegs.push_back(TmpR0); |
1668 | 9 | NewRegs.push_back(TmpR1); |
1669 | 9 | B.erase(It); |
1670 | 9 | return true; |
1671 | 9 | } |
1672 | | |
1673 | | bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B, |
1674 | | MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, |
1675 | 89 | const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { |
1676 | 89 | MachineFunction &MF = *B.getParent(); |
1677 | 89 | auto &MFI = MF.getFrameInfo(); |
1678 | 89 | auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); |
1679 | 89 | MachineInstr *MI = &*It; |
1680 | 89 | if (!MI->getOperand(0).isFI()) |
1681 | 86 | return false; |
1682 | 3 | |
1683 | 3 | // It is possible that the double vector being stored is only partially |
1684 | 3 | // defined. From the point of view of the liveness tracking, it is ok to |
1685 | 3 | // store it as a whole, but if we break it up we may end up storing a |
1686 | 3 | // register that is entirely undefined. |
1687 | 3 | LivePhysRegs LPR(HRI); |
1688 | 3 | LPR.addLiveIns(B); |
1689 | 3 | SmallVector<std::pair<unsigned, const MachineOperand*>,2> Clobbers; |
1690 | 22 | for (auto R = B.begin(); R != It22 ; ++R19 ) { |
1691 | 19 | Clobbers.clear(); |
1692 | 19 | LPR.stepForward(*R, Clobbers); |
1693 | 19 | // Dead defs are recorded in Clobbers, but are not automatically removed |
1694 | 19 | // from the live set. |
1695 | 19 | for (auto &C : Clobbers) |
1696 | 22 | if (22 C.second->isReg() && 22 C.second->isDead()22 ) |
1697 | 4 | LPR.removeReg(C.first); |
1698 | 19 | } |
1699 | 3 | |
1700 | 3 | DebugLoc DL = MI->getDebugLoc(); |
1701 | 3 | unsigned SrcR = MI->getOperand(2).getReg(); |
1702 | 3 | unsigned SrcLo = HRI.getSubReg(SrcR, Hexagon::vsub_lo); |
1703 | 3 | unsigned SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi); |
1704 | 3 | bool IsKill = MI->getOperand(2).isKill(); |
1705 | 3 | int FI = MI->getOperand(0).getIndex(); |
1706 | 3 | |
1707 | 3 | unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass); |
1708 | 3 | unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass); |
1709 | 3 | unsigned HasAlign = MFI.getObjectAlignment(FI); |
1710 | 3 | unsigned StoreOpc; |
1711 | 3 | |
1712 | 3 | // Store low part. |
1713 | 3 | if (LPR.contains(SrcLo)3 ) { |
1714 | 3 | StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai |
1715 | 0 | : Hexagon::V6_vS32Ub_ai; |
1716 | 3 | BuildMI(B, It, DL, HII.get(StoreOpc)) |
1717 | 3 | .addFrameIndex(FI) |
1718 | 3 | .addImm(0) |
1719 | 3 | .addReg(SrcLo, getKillRegState(IsKill)) |
1720 | 3 | .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); |
1721 | 3 | } |
1722 | 3 | |
1723 | 3 | // Store high part. |
1724 | 3 | if (LPR.contains(SrcHi)3 ) { |
1725 | 2 | StoreOpc = NeedAlign <= MinAlign(HasAlign, Size) ? Hexagon::V6_vS32b_ai |
1726 | 0 | : Hexagon::V6_vS32Ub_ai; |
1727 | 2 | BuildMI(B, It, DL, HII.get(StoreOpc)) |
1728 | 2 | .addFrameIndex(FI) |
1729 | 2 | .addImm(Size) |
1730 | 2 | .addReg(SrcHi, getKillRegState(IsKill)) |
1731 | 2 | .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); |
1732 | 2 | } |
1733 | 89 | |
1734 | 89 | B.erase(It); |
1735 | 89 | return true; |
1736 | 89 | } |
1737 | | |
1738 | | bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B, |
1739 | | MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, |
1740 | 94 | const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { |
1741 | 94 | MachineFunction &MF = *B.getParent(); |
1742 | 94 | auto &MFI = MF.getFrameInfo(); |
1743 | 94 | auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); |
1744 | 94 | MachineInstr *MI = &*It; |
1745 | 94 | if (!MI->getOperand(1).isFI()) |
1746 | 92 | return false; |
1747 | 2 | |
1748 | 2 | DebugLoc DL = MI->getDebugLoc(); |
1749 | 2 | unsigned DstR = MI->getOperand(0).getReg(); |
1750 | 2 | unsigned DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi); |
1751 | 2 | unsigned DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo); |
1752 | 2 | int FI = MI->getOperand(1).getIndex(); |
1753 | 2 | |
1754 | 2 | unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass); |
1755 | 2 | unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass); |
1756 | 2 | unsigned HasAlign = MFI.getObjectAlignment(FI); |
1757 | 2 | unsigned LoadOpc; |
1758 | 2 | |
1759 | 2 | // Load low part. |
1760 | 2 | LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai |
1761 | 0 | : Hexagon::V6_vL32Ub_ai; |
1762 | 2 | BuildMI(B, It, DL, HII.get(LoadOpc), DstLo) |
1763 | 2 | .addFrameIndex(FI) |
1764 | 2 | .addImm(0) |
1765 | 2 | .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); |
1766 | 2 | |
1767 | 2 | // Load high part. |
1768 | 2 | LoadOpc = NeedAlign <= MinAlign(HasAlign, Size) ? Hexagon::V6_vL32b_ai |
1769 | 0 | : Hexagon::V6_vL32Ub_ai; |
1770 | 94 | BuildMI(B, It, DL, HII.get(LoadOpc), DstHi) |
1771 | 94 | .addFrameIndex(FI) |
1772 | 94 | .addImm(Size) |
1773 | 94 | .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); |
1774 | 94 | |
1775 | 94 | B.erase(It); |
1776 | 94 | return true; |
1777 | 94 | } |
1778 | | |
1779 | | bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B, |
1780 | | MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, |
1781 | 6 | const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { |
1782 | 6 | MachineFunction &MF = *B.getParent(); |
1783 | 6 | auto &MFI = MF.getFrameInfo(); |
1784 | 6 | MachineInstr *MI = &*It; |
1785 | 6 | if (!MI->getOperand(0).isFI()) |
1786 | 0 | return false; |
1787 | 6 | |
1788 | 6 | auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); |
1789 | 6 | DebugLoc DL = MI->getDebugLoc(); |
1790 | 6 | unsigned SrcR = MI->getOperand(2).getReg(); |
1791 | 6 | bool IsKill = MI->getOperand(2).isKill(); |
1792 | 6 | int FI = MI->getOperand(0).getIndex(); |
1793 | 6 | |
1794 | 6 | unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass); |
1795 | 6 | unsigned HasAlign = MFI.getObjectAlignment(FI); |
1796 | 6 | unsigned StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai |
1797 | 0 | : Hexagon::V6_vS32Ub_ai; |
1798 | 6 | BuildMI(B, It, DL, HII.get(StoreOpc)) |
1799 | 6 | .addFrameIndex(FI) |
1800 | 6 | .addImm(0) |
1801 | 6 | .addReg(SrcR, getKillRegState(IsKill)) |
1802 | 6 | .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); |
1803 | 6 | |
1804 | 6 | B.erase(It); |
1805 | 6 | return true; |
1806 | 6 | } |
1807 | | |
1808 | | bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B, |
1809 | | MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, |
1810 | 9 | const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { |
1811 | 9 | MachineFunction &MF = *B.getParent(); |
1812 | 9 | auto &MFI = MF.getFrameInfo(); |
1813 | 9 | MachineInstr *MI = &*It; |
1814 | 9 | if (!MI->getOperand(1).isFI()) |
1815 | 0 | return false; |
1816 | 9 | |
1817 | 9 | auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); |
1818 | 9 | DebugLoc DL = MI->getDebugLoc(); |
1819 | 9 | unsigned DstR = MI->getOperand(0).getReg(); |
1820 | 9 | int FI = MI->getOperand(1).getIndex(); |
1821 | 9 | |
1822 | 9 | unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass); |
1823 | 9 | unsigned HasAlign = MFI.getObjectAlignment(FI); |
1824 | 9 | unsigned LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai |
1825 | 0 | : Hexagon::V6_vL32Ub_ai; |
1826 | 9 | BuildMI(B, It, DL, HII.get(LoadOpc), DstR) |
1827 | 9 | .addFrameIndex(FI) |
1828 | 9 | .addImm(0) |
1829 | 9 | .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); |
1830 | 9 | |
1831 | 9 | B.erase(It); |
1832 | 9 | return true; |
1833 | 9 | } |
1834 | | |
1835 | | bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF, |
1836 | 2.40k | SmallVectorImpl<unsigned> &NewRegs) const { |
1837 | 2.40k | auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); |
1838 | 2.40k | MachineRegisterInfo &MRI = MF.getRegInfo(); |
1839 | 2.40k | bool Changed = false; |
1840 | 2.40k | |
1841 | 3.61k | for (auto &B : MF) { |
1842 | 3.61k | // Traverse the basic block. |
1843 | 3.61k | MachineBasicBlock::iterator NextI; |
1844 | 24.6k | for (auto I = B.begin(), E = B.end(); I != E24.6k ; I = NextI20.9k ) { |
1845 | 20.9k | MachineInstr *MI = &*I; |
1846 | 20.9k | NextI = std::next(I); |
1847 | 20.9k | unsigned Opc = MI->getOpcode(); |
1848 | 20.9k | |
1849 | 20.9k | switch (Opc) { |
1850 | 3.55k | case TargetOpcode::COPY: |
1851 | 3.55k | Changed |= expandCopy(B, I, MRI, HII, NewRegs); |
1852 | 3.55k | break; |
1853 | 17 | case Hexagon::STriw_pred: |
1854 | 17 | case Hexagon::STriw_mod: |
1855 | 17 | Changed |= expandStoreInt(B, I, MRI, HII, NewRegs); |
1856 | 17 | break; |
1857 | 17 | case Hexagon::LDriw_pred: |
1858 | 17 | case Hexagon::LDriw_mod: |
1859 | 17 | Changed |= expandLoadInt(B, I, MRI, HII, NewRegs); |
1860 | 17 | break; |
1861 | 6 | case Hexagon::PS_vstorerq_ai: |
1862 | 6 | Changed |= expandStoreVecPred(B, I, MRI, HII, NewRegs); |
1863 | 6 | break; |
1864 | 9 | case Hexagon::PS_vloadrq_ai: |
1865 | 9 | Changed |= expandLoadVecPred(B, I, MRI, HII, NewRegs); |
1866 | 9 | break; |
1867 | 94 | case Hexagon::PS_vloadrw_ai: |
1868 | 94 | case Hexagon::PS_vloadrwu_ai: |
1869 | 94 | Changed |= expandLoadVec2(B, I, MRI, HII, NewRegs); |
1870 | 94 | break; |
1871 | 89 | case Hexagon::PS_vstorerw_ai: |
1872 | 89 | case Hexagon::PS_vstorerwu_ai: |
1873 | 89 | Changed |= expandStoreVec2(B, I, MRI, HII, NewRegs); |
1874 | 89 | break; |
1875 | 20.9k | } |
1876 | 20.9k | } |
1877 | 3.61k | } |
1878 | 2.40k | |
1879 | 2.40k | return Changed; |
1880 | 2.40k | } |
1881 | | |
1882 | | void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF, |
1883 | | BitVector &SavedRegs, |
1884 | 2.40k | RegScavenger *RS) const { |
1885 | 2.40k | auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); |
1886 | 2.40k | |
1887 | 2.40k | SavedRegs.resize(HRI.getNumRegs()); |
1888 | 2.40k | |
1889 | 2.40k | // If we have a function containing __builtin_eh_return we want to spill and |
1890 | 2.40k | // restore all callee saved registers. Pretend that they are used. |
1891 | 2.40k | if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn()) |
1892 | 17 | for (const MCPhysReg *R = HRI.getCalleeSavedRegs(&MF); 1 *R17 ; ++R16 ) |
1893 | 16 | SavedRegs.set(*R); |
1894 | 2.40k | |
1895 | 2.40k | // Replace predicate register pseudo spill code. |
1896 | 2.40k | SmallVector<unsigned,8> NewRegs; |
1897 | 2.40k | expandSpillMacros(MF, NewRegs); |
1898 | 2.40k | if (OptimizeSpillSlots && 2.40k !isOptNone(MF)2.40k ) |
1899 | 860 | optimizeSpillSlots(MF, NewRegs); |
1900 | 2.40k | |
1901 | 2.40k | // We need to reserve a a spill slot if scavenging could potentially require |
1902 | 2.40k | // spilling a scavenged register. |
1903 | 2.40k | if (!NewRegs.empty() || 2.40k mayOverflowFrameOffset(MF)2.39k ) { |
1904 | 12 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1905 | 12 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
1906 | 12 | SetVector<const TargetRegisterClass*> SpillRCs; |
1907 | 12 | // Reserve an int register in any case, because it could be used to hold |
1908 | 12 | // the stack offset in case it does not fit into a spill instruction. |
1909 | 12 | SpillRCs.insert(&Hexagon::IntRegsRegClass); |
1910 | 12 | |
1911 | 12 | for (unsigned VR : NewRegs) |
1912 | 64 | SpillRCs.insert(MRI.getRegClass(VR)); |
1913 | 12 | |
1914 | 15 | for (auto *RC : SpillRCs) { |
1915 | 15 | if (!needToReserveScavengingSpillSlots(MF, HRI, RC)) |
1916 | 3 | continue; |
1917 | 12 | unsigned Num = RC == &Hexagon::IntRegsRegClass ? 12 NumberScavengerSlots10 : 12 ; |
1918 | 12 | unsigned S = HRI.getSpillSize(*RC), A = HRI.getSpillAlignment(*RC); |
1919 | 34 | for (unsigned i = 0; i < Num34 ; i++22 ) { |
1920 | 22 | int NewFI = MFI.CreateSpillStackObject(S, A); |
1921 | 22 | RS->addScavengingFrameIndex(NewFI); |
1922 | 22 | } |
1923 | 15 | } |
1924 | 12 | } |
1925 | 2.40k | |
1926 | 2.40k | TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); |
1927 | 2.40k | } |
1928 | | |
1929 | | unsigned HexagonFrameLowering::findPhysReg(MachineFunction &MF, |
1930 | | HexagonBlockRanges::IndexRange &FIR, |
1931 | | HexagonBlockRanges::InstrIndexMap &IndexMap, |
1932 | | HexagonBlockRanges::RegToRangeMap &DeadMap, |
1933 | 84 | const TargetRegisterClass *RC) const { |
1934 | 84 | auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); |
1935 | 84 | auto &MRI = MF.getRegInfo(); |
1936 | 84 | |
1937 | 2.26k | auto isDead = [&FIR,&DeadMap] (unsigned Reg) -> bool { |
1938 | 2.26k | auto F = DeadMap.find({Reg,0}); |
1939 | 2.26k | if (F == DeadMap.end()) |
1940 | 205 | return false; |
1941 | 2.06k | for (auto &DR : F->second) |
1942 | 13.1k | if (13.1k DR.contains(FIR)13.1k ) |
1943 | 19 | return true; |
1944 | 2.04k | return false; |
1945 | 2.04k | }; |
1946 | 84 | |
1947 | 2.26k | for (unsigned Reg : RC->getRawAllocationOrder(MF)) { |
1948 | 2.26k | bool Dead = true; |
1949 | 2.26k | for (auto R : HexagonBlockRanges::expandToSubRegs({Reg,0}, MRI, HRI)) { |
1950 | 2.26k | if (isDead(R.Reg)) |
1951 | 19 | continue; |
1952 | 2.24k | Dead = false; |
1953 | 2.24k | break; |
1954 | 2.24k | } |
1955 | 2.26k | if (Dead) |
1956 | 16 | return Reg; |
1957 | 68 | } |
1958 | 68 | return 0; |
1959 | 68 | } |
1960 | | |
1961 | | void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, |
1962 | 860 | SmallVectorImpl<unsigned> &VRegs) const { |
1963 | 860 | auto &HST = MF.getSubtarget<HexagonSubtarget>(); |
1964 | 860 | auto &HII = *HST.getInstrInfo(); |
1965 | 860 | auto &HRI = *HST.getRegisterInfo(); |
1966 | 860 | auto &MRI = MF.getRegInfo(); |
1967 | 860 | HexagonBlockRanges HBR(MF); |
1968 | 860 | |
1969 | 860 | using BlockIndexMap = |
1970 | 860 | std::map<MachineBasicBlock *, HexagonBlockRanges::InstrIndexMap>; |
1971 | 860 | using BlockRangeMap = |
1972 | 860 | std::map<MachineBasicBlock *, HexagonBlockRanges::RangeList>; |
1973 | 860 | using IndexType = HexagonBlockRanges::IndexType; |
1974 | 860 | |
1975 | 860 | struct SlotInfo { |
1976 | 860 | BlockRangeMap Map; |
1977 | 860 | unsigned Size = 0; |
1978 | 860 | const TargetRegisterClass *RC = nullptr; |
1979 | 860 | |
1980 | 282 | SlotInfo() = default; |
1981 | 860 | }; |
1982 | 860 | |
1983 | 860 | BlockIndexMap BlockIndexes; |
1984 | 860 | SmallSet<int,4> BadFIs; |
1985 | 860 | std::map<int,SlotInfo> FIRangeMap; |
1986 | 860 | |
1987 | 860 | // Accumulate register classes: get a common class for a pre-existing |
1988 | 860 | // class HaveRC and a new class NewRC. Return nullptr if a common class |
1989 | 860 | // cannot be found, otherwise return the resulting class. If HaveRC is |
1990 | 860 | // nullptr, assume that it is still unset. |
1991 | 860 | auto getCommonRC = |
1992 | 860 | [](const TargetRegisterClass *HaveRC, |
1993 | 471 | const TargetRegisterClass *NewRC) -> const TargetRegisterClass * { |
1994 | 471 | if (HaveRC == nullptr || 471 HaveRC == NewRC189 ) |
1995 | 468 | return NewRC; |
1996 | 3 | // Different classes, both non-null. Pick the more general one. |
1997 | 3 | if (3 HaveRC->hasSubClassEq(NewRC)3 ) |
1998 | 0 | return HaveRC; |
1999 | 3 | if (3 NewRC->hasSubClassEq(HaveRC)3 ) |
2000 | 0 | return NewRC; |
2001 | 3 | return nullptr; |
2002 | 3 | }; |
2003 | 860 | |
2004 | 860 | // Scan all blocks in the function. Check all occurrences of frame indexes, |
2005 | 860 | // and collect relevant information. |
2006 | 2.06k | for (auto &B : MF) { |
2007 | 2.06k | std::map<int,IndexType> LastStore, LastLoad; |
2008 | 2.06k | // Emplace appears not to be supported in gcc 4.7.2-4. |
2009 | 2.06k | //auto P = BlockIndexes.emplace(&B, HexagonBlockRanges::InstrIndexMap(B)); |
2010 | 2.06k | auto P = BlockIndexes.insert( |
2011 | 2.06k | std::make_pair(&B, HexagonBlockRanges::InstrIndexMap(B))); |
2012 | 2.06k | auto &IndexMap = P.first->second; |
2013 | 2.06k | DEBUG(dbgs() << "Index map for BB#" << B.getNumber() << "\n" |
2014 | 2.06k | << IndexMap << '\n'); |
2015 | 2.06k | |
2016 | 11.3k | for (auto &In : B) { |
2017 | 11.3k | int LFI, SFI; |
2018 | 169 | bool Load = HII.isLoadFromStackSlot(In, LFI) && !HII.isPredicated(In); |
2019 | 302 | bool Store = HII.isStoreToStackSlot(In, SFI) && !HII.isPredicated(In); |
2020 | 11.3k | if (Load && 11.3k Store169 ) { |
2021 | 0 | // If it's both a load and a store, then we won't handle it. |
2022 | 0 | BadFIs.insert(LFI); |
2023 | 0 | BadFIs.insert(SFI); |
2024 | 0 | continue; |
2025 | 0 | } |
2026 | 11.3k | // Check for register classes of the register used as the source for |
2027 | 11.3k | // the store, and the register used as the destination for the load. |
2028 | 11.3k | // Also, only accept base+imm_offset addressing modes. Other addressing |
2029 | 11.3k | // modes can have side-effects (post-increments, etc.). For stack |
2030 | 11.3k | // slots they are very unlikely, so there is not much loss due to |
2031 | 11.3k | // this restriction. |
2032 | 11.3k | if (11.3k Load || 11.3k Store11.1k ) { |
2033 | 471 | int TFI = Load ? LFI169 : SFI302 ; |
2034 | 471 | unsigned AM = HII.getAddrMode(In); |
2035 | 471 | SlotInfo &SI = FIRangeMap[TFI]; |
2036 | 471 | bool Bad = (AM != HexagonII::BaseImmOffset); |
2037 | 471 | if (!Bad471 ) { |
2038 | 471 | // If the addressing mode is ok, check the register class. |
2039 | 471 | unsigned OpNum = Load ? 0169 : 2302 ; |
2040 | 471 | auto *RC = HII.getRegClass(In.getDesc(), OpNum, &HRI, MF); |
2041 | 471 | RC = getCommonRC(SI.RC, RC); |
2042 | 471 | if (RC == nullptr) |
2043 | 3 | Bad = true; |
2044 | 471 | else |
2045 | 468 | SI.RC = RC; |
2046 | 471 | } |
2047 | 471 | if (!Bad471 ) { |
2048 | 468 | // Check sizes. |
2049 | 468 | unsigned S = HII.getMemAccessSize(In); |
2050 | 468 | if (SI.Size != 0 && 468 SI.Size != S186 ) |
2051 | 0 | Bad = true; |
2052 | 468 | else |
2053 | 468 | SI.Size = S; |
2054 | 468 | } |
2055 | 471 | if (!Bad471 ) { |
2056 | 468 | for (auto *Mo : In.memoperands()) { |
2057 | 468 | if (!Mo->isVolatile()) |
2058 | 428 | continue; |
2059 | 40 | Bad = true; |
2060 | 40 | break; |
2061 | 40 | } |
2062 | 468 | } |
2063 | 471 | if (Bad) |
2064 | 43 | BadFIs.insert(TFI); |
2065 | 471 | } |
2066 | 11.3k | |
2067 | 11.3k | // Locate uses of frame indices. |
2068 | 45.4k | for (unsigned i = 0, n = In.getNumOperands(); i < n45.4k ; ++i34.1k ) { |
2069 | 34.1k | const MachineOperand &Op = In.getOperand(i); |
2070 | 34.1k | if (!Op.isFI()) |
2071 | 32.5k | continue; |
2072 | 1.65k | int FI = Op.getIndex(); |
2073 | 1.65k | // Make sure that the following operand is an immediate and that |
2074 | 1.65k | // it is 0. This is the offset in the stack object. |
2075 | 1.65k | if (i+1 >= n || 1.65k !In.getOperand(i+1).isImm()1.65k || |
2076 | 1.65k | In.getOperand(i+1).getImm() != 0) |
2077 | 1.04k | BadFIs.insert(FI); |
2078 | 1.65k | if (BadFIs.count(FI)) |
2079 | 1.18k | continue; |
2080 | 470 | |
2081 | 470 | IndexType Index = IndexMap.getIndex(&In); |
2082 | 470 | if (Load470 ) { |
2083 | 129 | if (LastStore[FI] == IndexType::None) |
2084 | 38 | LastStore[FI] = IndexType::Entry; |
2085 | 129 | LastLoad[FI] = Index; |
2086 | 470 | } else if (341 Store341 ) { |
2087 | 267 | HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B]; |
2088 | 267 | if (LastStore[FI] != IndexType::None) |
2089 | 20 | RL.add(LastStore[FI], LastLoad[FI], false, false); |
2090 | 247 | else if (247 LastLoad[FI] != IndexType::None247 ) |
2091 | 0 | RL.add(IndexType::Entry, LastLoad[FI], false, false); |
2092 | 267 | LastLoad[FI] = IndexType::None; |
2093 | 267 | LastStore[FI] = Index; |
2094 | 341 | } else { |
2095 | 74 | BadFIs.insert(FI); |
2096 | 74 | } |
2097 | 34.1k | } |
2098 | 11.3k | } |
2099 | 2.06k | |
2100 | 285 | for (auto &I : LastLoad) { |
2101 | 285 | IndexType LL = I.second; |
2102 | 285 | if (LL == IndexType::None) |
2103 | 169 | continue; |
2104 | 116 | auto &RL = FIRangeMap[I.first].Map[&B]; |
2105 | 116 | IndexType &LS = LastStore[I.first]; |
2106 | 116 | if (LS != IndexType::None) |
2107 | 116 | RL.add(LS, LL, false, false); |
2108 | 116 | else |
2109 | 0 | RL.add(IndexType::Entry, LL, false, false); |
2110 | 285 | LS = IndexType::None; |
2111 | 285 | } |
2112 | 285 | for (auto &I : LastStore) { |
2113 | 285 | IndexType LS = I.second; |
2114 | 285 | if (LS == IndexType::None) |
2115 | 116 | continue; |
2116 | 169 | auto &RL = FIRangeMap[I.first].Map[&B]; |
2117 | 169 | RL.add(LS, IndexType::None, false, false); |
2118 | 169 | } |
2119 | 2.06k | } |
2120 | 860 | |
2121 | 860 | DEBUG({ |
2122 | 860 | for (auto &P : FIRangeMap) { |
2123 | 860 | dbgs() << "fi#" << P.first; |
2124 | 860 | if (BadFIs.count(P.first)) |
2125 | 860 | dbgs() << " (bad)"; |
2126 | 860 | dbgs() << " RC: "; |
2127 | 860 | if (P.second.RC != nullptr) |
2128 | 860 | dbgs() << HRI.getRegClassName(P.second.RC) << '\n'; |
2129 | 860 | else |
2130 | 860 | dbgs() << "<null>\n"; |
2131 | 860 | for (auto &R : P.second.Map) |
2132 | 860 | dbgs() << " BB#" << R.first->getNumber() << " { " << R.second << "}\n"; |
2133 | 860 | } |
2134 | 860 | }); |
2135 | 860 | |
2136 | 860 | // When a slot is loaded from in a block without being stored to in the |
2137 | 860 | // same block, it is live-on-entry to this block. To avoid CFG analysis, |
2138 | 860 | // consider this slot to be live-on-exit from all blocks. |
2139 | 860 | SmallSet<int,4> LoxFIs; |
2140 | 860 | |
2141 | 860 | std::map<MachineBasicBlock*,std::vector<int>> BlockFIMap; |
2142 | 860 | |
2143 | 282 | for (auto &P : FIRangeMap) { |
2144 | 282 | // P = pair(FI, map: BB->RangeList) |
2145 | 282 | if (BadFIs.count(P.first)) |
2146 | 58 | continue; |
2147 | 224 | for (auto &B : MF) 224 { |
2148 | 1.30k | auto F = P.second.Map.find(&B); |
2149 | 1.30k | // F = pair(BB, RangeList) |
2150 | 1.30k | if (F == P.second.Map.end() || 1.30k F->second.empty()262 ) |
2151 | 1.04k | continue; |
2152 | 262 | HexagonBlockRanges::IndexRange &IR = F->second.front(); |
2153 | 262 | if (IR.start() == IndexType::Entry) |
2154 | 35 | LoxFIs.insert(P.first); |
2155 | 1.30k | BlockFIMap[&B].push_back(P.first); |
2156 | 1.30k | } |
2157 | 282 | } |
2158 | 860 | |
2159 | 860 | DEBUG({ |
2160 | 860 | dbgs() << "Block-to-FI map (* -- live-on-exit):\n"; |
2161 | 860 | for (auto &P : BlockFIMap) { |
2162 | 860 | auto &FIs = P.second; |
2163 | 860 | if (FIs.empty()) |
2164 | 860 | continue; |
2165 | 860 | dbgs() << " BB#" << P.first->getNumber() << ": {"; |
2166 | 860 | for (auto I : FIs) { |
2167 | 860 | dbgs() << " fi#" << I; |
2168 | 860 | if (LoxFIs.count(I)) |
2169 | 860 | dbgs() << '*'; |
2170 | 860 | } |
2171 | 860 | dbgs() << " }\n"; |
2172 | 860 | } |
2173 | 860 | }); |
2174 | 860 | |
2175 | | #ifndef NDEBUG |
2176 | | bool HasOptLimit = SpillOptMax.getPosition(); |
2177 | | #endif |
2178 | | |
2179 | 860 | // eliminate loads, when all loads eliminated, eliminate all stores. |
2180 | 2.06k | for (auto &B : MF) { |
2181 | 2.06k | auto F = BlockIndexes.find(&B); |
2182 | 2.06k | assert(F != BlockIndexes.end()); |
2183 | 2.06k | HexagonBlockRanges::InstrIndexMap &IM = F->second; |
2184 | 2.06k | HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM); |
2185 | 2.06k | HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM); |
2186 | 2.06k | DEBUG(dbgs() << "BB#" << B.getNumber() << " dead map\n" |
2187 | 2.06k | << HexagonBlockRanges::PrintRangeMap(DM, HRI)); |
2188 | 2.06k | |
2189 | 262 | for (auto FI : BlockFIMap[&B]) { |
2190 | 262 | if (BadFIs.count(FI)) |
2191 | 0 | continue; |
2192 | 262 | DEBUG262 (dbgs() << "Working on fi#" << FI << '\n'); |
2193 | 262 | HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B]; |
2194 | 282 | for (auto &Range : RL) { |
2195 | 282 | DEBUG(dbgs() << "--Examining range:" << RL << '\n'); |
2196 | 282 | if (!IndexType::isInstr(Range.start()) || |
2197 | 247 | !IndexType::isInstr(Range.end())) |
2198 | 198 | continue; |
2199 | 84 | MachineInstr &SI = *IM.getInstr(Range.start()); |
2200 | 84 | MachineInstr &EI = *IM.getInstr(Range.end()); |
2201 | 84 | assert(SI.mayStore() && "Unexpected start instruction"); |
2202 | 84 | assert(EI.mayLoad() && "Unexpected end instruction"); |
2203 | 84 | MachineOperand &SrcOp = SI.getOperand(2); |
2204 | 84 | |
2205 | 84 | HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(), |
2206 | 84 | SrcOp.getSubReg() }; |
2207 | 84 | auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI, MF); |
2208 | 84 | // The this-> is needed to unconfuse MSVC. |
2209 | 84 | unsigned FoundR = this->findPhysReg(MF, Range, IM, DM, RC); |
2210 | 84 | DEBUG(dbgs() << "Replacement reg:" << PrintReg(FoundR, &HRI) << '\n'); |
2211 | 84 | if (FoundR == 0) |
2212 | 68 | continue; |
2213 | | #ifndef NDEBUG |
2214 | | if (HasOptLimit) { |
2215 | | if (SpillOptCount >= SpillOptMax) |
2216 | | return; |
2217 | | SpillOptCount++; |
2218 | | } |
2219 | | #endif |
2220 | | |
2221 | 16 | // Generate the copy-in: "FoundR = COPY SrcR" at the store location. |
2222 | 16 | MachineBasicBlock::iterator StartIt = SI.getIterator(), NextIt; |
2223 | 16 | MachineInstr *CopyIn = nullptr; |
2224 | 16 | if (SrcRR.Reg != FoundR || 16 SrcRR.Sub != 00 ) { |
2225 | 16 | const DebugLoc &DL = SI.getDebugLoc(); |
2226 | 16 | CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR) |
2227 | 16 | .add(SrcOp); |
2228 | 16 | } |
2229 | 16 | |
2230 | 16 | ++StartIt; |
2231 | 16 | // Check if this is a last store and the FI is live-on-exit. |
2232 | 16 | if (LoxFIs.count(FI) && 16 (&Range == &RL.back())0 ) { |
2233 | 0 | // Update store's source register. |
2234 | 0 | if (unsigned SR = SrcOp.getSubReg()) |
2235 | 0 | SrcOp.setReg(HRI.getSubReg(FoundR, SR)); |
2236 | 0 | else |
2237 | 0 | SrcOp.setReg(FoundR); |
2238 | 0 | SrcOp.setSubReg(0); |
2239 | 0 | // We are keeping this register live. |
2240 | 0 | SrcOp.setIsKill(false); |
2241 | 16 | } else { |
2242 | 16 | B.erase(&SI); |
2243 | 16 | IM.replaceInstr(&SI, CopyIn); |
2244 | 16 | } |
2245 | 16 | |
2246 | 16 | auto EndIt = std::next(EI.getIterator()); |
2247 | 357 | for (auto It = StartIt; It != EndIt357 ; It = NextIt341 ) { |
2248 | 341 | MachineInstr &MI = *It; |
2249 | 341 | NextIt = std::next(It); |
2250 | 341 | int TFI; |
2251 | 341 | if (!HII.isLoadFromStackSlot(MI, TFI) || 341 TFI != FI38 ) |
2252 | 324 | continue; |
2253 | 17 | unsigned DstR = MI.getOperand(0).getReg(); |
2254 | 17 | assert(MI.getOperand(0).getSubReg() == 0); |
2255 | 17 | MachineInstr *CopyOut = nullptr; |
2256 | 17 | if (DstR != FoundR17 ) { |
2257 | 17 | DebugLoc DL = MI.getDebugLoc(); |
2258 | 17 | unsigned MemSize = HII.getMemAccessSize(MI); |
2259 | 17 | assert(HII.getAddrMode(MI) == HexagonII::BaseImmOffset); |
2260 | 17 | unsigned CopyOpc = TargetOpcode::COPY; |
2261 | 17 | if (HII.isSignExtendingLoad(MI)) |
2262 | 0 | CopyOpc = (MemSize == 1) ? 0 Hexagon::A2_sxtb0 : Hexagon::A2_sxth0 ; |
2263 | 17 | else if (17 HII.isZeroExtendingLoad(MI)17 ) |
2264 | 0 | CopyOpc = (MemSize == 1) ? 0 Hexagon::A2_zxtb0 : Hexagon::A2_zxth0 ; |
2265 | 17 | CopyOut = BuildMI(B, It, DL, HII.get(CopyOpc), DstR) |
2266 | 17 | .addReg(FoundR, getKillRegState(&MI == &EI)); |
2267 | 17 | } |
2268 | 341 | IM.replaceInstr(&MI, CopyOut); |
2269 | 341 | B.erase(It); |
2270 | 341 | } |
2271 | 16 | |
2272 | 16 | // Update the dead map. |
2273 | 16 | HexagonBlockRanges::RegisterRef FoundRR = { FoundR, 0 }; |
2274 | 16 | for (auto RR : HexagonBlockRanges::expandToSubRegs(FoundRR, MRI, HRI)) |
2275 | 16 | DM[RR].subtract(Range); |
2276 | 282 | } // for Range in range list |
2277 | 262 | } |
2278 | 2.06k | } |
2279 | 860 | } |
2280 | | |
2281 | | void HexagonFrameLowering::expandAlloca(MachineInstr *AI, |
2282 | 3 | const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const { |
2283 | 3 | MachineBasicBlock &MB = *AI->getParent(); |
2284 | 3 | DebugLoc DL = AI->getDebugLoc(); |
2285 | 3 | unsigned A = AI->getOperand(2).getImm(); |
2286 | 3 | |
2287 | 3 | // Have |
2288 | 3 | // Rd = alloca Rs, #A |
2289 | 3 | // |
2290 | 3 | // If Rs and Rd are different registers, use this sequence: |
2291 | 3 | // Rd = sub(r29, Rs) |
2292 | 3 | // r29 = sub(r29, Rs) |
2293 | 3 | // Rd = and(Rd, #-A) ; if necessary |
2294 | 3 | // r29 = and(r29, #-A) ; if necessary |
2295 | 3 | // Rd = add(Rd, #CF) ; CF size aligned to at most A |
2296 | 3 | // otherwise, do |
2297 | 3 | // Rd = sub(r29, Rs) |
2298 | 3 | // Rd = and(Rd, #-A) ; if necessary |
2299 | 3 | // r29 = Rd |
2300 | 3 | // Rd = add(Rd, #CF) ; CF size aligned to at most A |
2301 | 3 | |
2302 | 3 | MachineOperand &RdOp = AI->getOperand(0); |
2303 | 3 | MachineOperand &RsOp = AI->getOperand(1); |
2304 | 3 | unsigned Rd = RdOp.getReg(), Rs = RsOp.getReg(); |
2305 | 3 | |
2306 | 3 | // Rd = sub(r29, Rs) |
2307 | 3 | BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd) |
2308 | 3 | .addReg(SP) |
2309 | 3 | .addReg(Rs); |
2310 | 3 | if (Rs != Rd3 ) { |
2311 | 1 | // r29 = sub(r29, Rs) |
2312 | 1 | BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP) |
2313 | 1 | .addReg(SP) |
2314 | 1 | .addReg(Rs); |
2315 | 1 | } |
2316 | 3 | if (A > 83 ) { |
2317 | 0 | // Rd = and(Rd, #-A) |
2318 | 0 | BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd) |
2319 | 0 | .addReg(Rd) |
2320 | 0 | .addImm(-int64_t(A)); |
2321 | 0 | if (Rs != Rd) |
2322 | 0 | BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP) |
2323 | 0 | .addReg(SP) |
2324 | 0 | .addImm(-int64_t(A)); |
2325 | 0 | } |
2326 | 3 | if (Rs == Rd3 ) { |
2327 | 2 | // r29 = Rd |
2328 | 2 | BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP) |
2329 | 2 | .addReg(Rd); |
2330 | 2 | } |
2331 | 3 | if (CF > 03 ) { |
2332 | 1 | // Rd = add(Rd, #CF) |
2333 | 1 | BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd) |
2334 | 1 | .addReg(Rd) |
2335 | 1 | .addImm(CF); |
2336 | 1 | } |
2337 | 3 | } |
2338 | | |
2339 | 3.79k | bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const { |
2340 | 3.79k | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
2341 | 3.79k | if (!MFI.hasVarSizedObjects()) |
2342 | 3.78k | return false; |
2343 | 3 | unsigned MaxA = MFI.getMaxAlignment(); |
2344 | 3 | if (MaxA <= getStackAlignment()) |
2345 | 2 | return false; |
2346 | 1 | return true; |
2347 | 1 | } |
2348 | | |
2349 | | const MachineInstr *HexagonFrameLowering::getAlignaInstr( |
2350 | 2 | const MachineFunction &MF) const { |
2351 | 2 | for (auto &B : MF) |
2352 | 8 | for (auto &I : B) |
2353 | 29 | if (29 I.getOpcode() == Hexagon::PS_aligna29 ) |
2354 | 1 | return &I; |
2355 | 1 | return nullptr; |
2356 | 1 | } |
2357 | | |
2358 | | /// Adds all callee-saved registers as implicit uses or defs to the |
2359 | | /// instruction. |
2360 | | void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI, |
2361 | 13 | const CSIVect &CSI, bool IsDef, bool IsKill) const { |
2362 | 13 | // Add the callee-saved registers as implicit uses. |
2363 | 13 | for (auto &R : CSI) |
2364 | 47 | MI->addOperand(MachineOperand::CreateReg(R.getReg(), IsDef, true, IsKill)); |
2365 | 13 | } |
2366 | | |
2367 | | /// Determine whether the callee-saved register saves and restores should |
2368 | | /// be generated via inline code. If this function returns "true", inline |
2369 | | /// code will be generated. If this function returns "false", additional |
2370 | | /// checks are performed, which may still lead to the inline code. |
2371 | | bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF, |
2372 | 107 | const CSIVect &CSI) const { |
2373 | 107 | if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn()) |
2374 | 3 | return true; |
2375 | 104 | if (104 !hasFP(MF)104 ) |
2376 | 14 | return true; |
2377 | 90 | if (90 !isOptSize(MF) && 90 !isMinSize(MF)72 ) |
2378 | 66 | if (66 MF.getTarget().getOptLevel() > CodeGenOpt::Default66 ) |
2379 | 0 | return true; |
2380 | 90 | |
2381 | 90 | // Check if CSI only has double registers, and if the registers form |
2382 | 90 | // a contiguous block starting from D8. |
2383 | 90 | BitVector Regs(Hexagon::NUM_TARGET_REGS); |
2384 | 263 | for (unsigned i = 0, n = CSI.size(); i < n263 ; ++i173 ) { |
2385 | 173 | unsigned R = CSI[i].getReg(); |
2386 | 173 | if (!Hexagon::DoubleRegsRegClass.contains(R)) |
2387 | 0 | return true; |
2388 | 173 | Regs[R] = true; |
2389 | 173 | } |
2390 | 90 | int F = Regs.find_first(); |
2391 | 90 | if (F != Hexagon::D8) |
2392 | 0 | return true; |
2393 | 263 | while (90 F >= 0263 ) { |
2394 | 173 | int N = Regs.find_next(F); |
2395 | 173 | if (N >= 0 && 173 N != F+183 ) |
2396 | 0 | return true; |
2397 | 173 | F = N; |
2398 | 173 | } |
2399 | 90 | |
2400 | 90 | return false; |
2401 | 107 | } |
2402 | | |
2403 | | bool HexagonFrameLowering::useSpillFunction(const MachineFunction &MF, |
2404 | 54 | const CSIVect &CSI) const { |
2405 | 54 | if (shouldInlineCSR(MF, CSI)) |
2406 | 8 | return false; |
2407 | 46 | unsigned NumCSI = CSI.size(); |
2408 | 46 | if (NumCSI <= 1) |
2409 | 28 | return false; |
2410 | 18 | |
2411 | 18 | unsigned Threshold = isOptSize(MF) ? 18 SpillFuncThresholdOs5 |
2412 | 13 | : SpillFuncThreshold; |
2413 | 54 | return Threshold < NumCSI; |
2414 | 54 | } |
2415 | | |
2416 | | bool HexagonFrameLowering::useRestoreFunction(const MachineFunction &MF, |
2417 | 53 | const CSIVect &CSI) const { |
2418 | 53 | if (shouldInlineCSR(MF, CSI)) |
2419 | 9 | return false; |
2420 | 44 | // The restore functions do a bit more than just restoring registers. |
2421 | 44 | // The non-returning versions will go back directly to the caller's |
2422 | 44 | // caller, others will clean up the stack frame in preparation for |
2423 | 44 | // a tail call. Using them can still save code size even if only one |
2424 | 44 | // register is getting restores. Make the decision based on -Oz: |
2425 | 44 | // using -Os will use inline restore for a single register. |
2426 | 44 | if (44 isMinSize(MF)44 ) |
2427 | 3 | return true; |
2428 | 41 | unsigned NumCSI = CSI.size(); |
2429 | 41 | if (NumCSI <= 1) |
2430 | 24 | return false; |
2431 | 17 | |
2432 | 17 | unsigned Threshold = isOptSize(MF) ? 17 SpillFuncThresholdOs-15 |
2433 | 12 | : SpillFuncThreshold; |
2434 | 53 | return Threshold < NumCSI; |
2435 | 53 | } |
2436 | | |
2437 | 2.39k | bool HexagonFrameLowering::mayOverflowFrameOffset(MachineFunction &MF) const { |
2438 | 2.39k | unsigned StackSize = MF.getFrameInfo().estimateStackSize(MF); |
2439 | 2.39k | auto &HST = MF.getSubtarget<HexagonSubtarget>(); |
2440 | 2.39k | // A fairly simplistic guess as to whether a potential load/store to a |
2441 | 2.39k | // stack location could require an extra register. |
2442 | 2.39k | if (HST.useHVXOps() && 2.39k StackSize > 2561.20k ) |
2443 | 2 | return true; |
2444 | 2.39k | |
2445 | 2.39k | // Check if the function has store-immediate instructions that access |
2446 | 2.39k | // the stack. Since the offset field is not extendable, if the stack |
2447 | 2.39k | // size exceeds the offset limit (6 bits, shifted), the stores will |
2448 | 2.39k | // require a new base register. |
2449 | 2.39k | bool HasImmStack = false; |
2450 | 2.39k | unsigned MinLS = ~0u; // Log_2 of the memory access size. |
2451 | 2.39k | |
2452 | 3.54k | for (const MachineBasicBlock &B : MF) { |
2453 | 19.4k | for (const MachineInstr &MI : B) { |
2454 | 19.4k | unsigned LS = 0; |
2455 | 19.4k | switch (MI.getOpcode()) { |
2456 | 95 | case Hexagon::S4_storeirit_io: |
2457 | 95 | case Hexagon::S4_storeirif_io: |
2458 | 95 | case Hexagon::S4_storeiri_io: |
2459 | 95 | ++LS; |
2460 | 95 | LLVM_FALLTHROUGH; |
2461 | 98 | case Hexagon::S4_storeirht_io: |
2462 | 98 | case Hexagon::S4_storeirhf_io: |
2463 | 98 | case Hexagon::S4_storeirh_io: |
2464 | 98 | ++LS; |
2465 | 98 | LLVM_FALLTHROUGH; |
2466 | 102 | case Hexagon::S4_storeirbt_io: |
2467 | 102 | case Hexagon::S4_storeirbf_io: |
2468 | 102 | case Hexagon::S4_storeirb_io: |
2469 | 102 | if (MI.getOperand(0).isFI()) |
2470 | 29 | HasImmStack = true; |
2471 | 95 | MinLS = std::min(MinLS, LS); |
2472 | 95 | break; |
2473 | 2.39k | } |
2474 | 2.39k | } |
2475 | 3.54k | } |
2476 | 2.39k | |
2477 | 2.39k | if (2.39k HasImmStack2.39k ) |
2478 | 15 | return !isUInt<6>(StackSize >> MinLS); |
2479 | 2.38k | |
2480 | 2.38k | return false; |
2481 | 2.38k | } |