Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains the AArch64 implementation of TargetFrameLowering class.
10
//
11
// On AArch64, stack frames are structured as follows:
12
//
13
// The stack grows downward.
14
//
15
// All of the individual frame areas on the frame below are optional, i.e. it's
16
// possible to create a function so that the particular area isn't present
17
// in the frame.
18
//
19
// At function entry, the "frame" looks as follows:
20
//
21
// |                                   | Higher address
22
// |-----------------------------------|
23
// |                                   |
24
// | arguments passed on the stack     |
25
// |                                   |
26
// |-----------------------------------| <- sp
27
// |                                   | Lower address
28
//
29
//
30
// After the prologue has run, the frame has the following general structure.
31
// Note that this doesn't depict the case where a red-zone is used. Also,
32
// technically the last frame area (VLAs) doesn't get created until in the
33
// main function body, after the prologue is run. However, it's depicted here
34
// for completeness.
35
//
36
// |                                   | Higher address
37
// |-----------------------------------|
38
// |                                   |
39
// | arguments passed on the stack     |
40
// |                                   |
41
// |-----------------------------------|
42
// |                                   |
43
// | (Win64 only) varargs from reg     |
44
// |                                   |
45
// |-----------------------------------|
46
// |                                   |
47
// | prev_fp, prev_lr                  |
48
// | (a.k.a. "frame record")           |
49
// |-----------------------------------| <- fp(=x29)
50
// |                                   |
51
// | other callee-saved registers      |
52
// |                                   |
53
// |-----------------------------------|
54
// |.empty.space.to.make.part.below....|
55
// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
56
// |.the.standard.16-byte.alignment....|  compile time; if present)
57
// |-----------------------------------|
58
// |                                   |
59
// | local variables of fixed size     |
60
// | including spill slots             |
61
// |-----------------------------------| <- bp(not defined by ABI,
62
// |.variable-sized.local.variables....|       LLVM chooses X19)
63
// |.(VLAs)............................| (size of this area is unknown at
64
// |...................................|  compile time)
65
// |-----------------------------------| <- sp
66
// |                                   | Lower address
67
//
68
//
69
// To access the data in a frame, at-compile time, a constant offset must be
70
// computable from one of the pointers (fp, bp, sp) to access it. The size
71
// of the areas with a dotted background cannot be computed at compile-time
72
// if they are present, making it required to have all three of fp, bp and
73
// sp to be set up to be able to access all contents in the frame areas,
74
// assuming all of the frame areas are non-empty.
75
//
76
// For most functions, some of the frame areas are empty. For those functions,
77
// it may not be necessary to set up fp or bp:
78
// * A base pointer is definitely needed when there are both VLAs and local
79
//   variables with more-than-default alignment requirements.
80
// * A frame pointer is definitely needed when there are local variables with
81
//   more-than-default alignment requirements.
82
//
83
// In some cases when a base pointer is not strictly needed, it is generated
84
// anyway when offsets from the frame pointer to access local variables become
85
// so large that the offset can't be encoded in the immediate fields of loads
86
// or stores.
87
//
88
// FIXME: also explain the redzone concept.
89
// FIXME: also explain the concept of reserved call frames.
90
//
91
//===----------------------------------------------------------------------===//
92
93
#include "AArch64FrameLowering.h"
94
#include "AArch64InstrInfo.h"
95
#include "AArch64MachineFunctionInfo.h"
96
#include "AArch64RegisterInfo.h"
97
#include "AArch64Subtarget.h"
98
#include "AArch64TargetMachine.h"
99
#include "MCTargetDesc/AArch64AddressingModes.h"
100
#include "llvm/ADT/ScopeExit.h"
101
#include "llvm/ADT/SmallVector.h"
102
#include "llvm/ADT/Statistic.h"
103
#include "llvm/CodeGen/LivePhysRegs.h"
104
#include "llvm/CodeGen/MachineBasicBlock.h"
105
#include "llvm/CodeGen/MachineFrameInfo.h"
106
#include "llvm/CodeGen/MachineFunction.h"
107
#include "llvm/CodeGen/MachineInstr.h"
108
#include "llvm/CodeGen/MachineInstrBuilder.h"
109
#include "llvm/CodeGen/MachineMemOperand.h"
110
#include "llvm/CodeGen/MachineModuleInfo.h"
111
#include "llvm/CodeGen/MachineOperand.h"
112
#include "llvm/CodeGen/MachineRegisterInfo.h"
113
#include "llvm/CodeGen/RegisterScavenging.h"
114
#include "llvm/CodeGen/TargetInstrInfo.h"
115
#include "llvm/CodeGen/TargetRegisterInfo.h"
116
#include "llvm/CodeGen/TargetSubtargetInfo.h"
117
#include "llvm/CodeGen/WinEHFuncInfo.h"
118
#include "llvm/IR/Attributes.h"
119
#include "llvm/IR/CallingConv.h"
120
#include "llvm/IR/DataLayout.h"
121
#include "llvm/IR/DebugLoc.h"
122
#include "llvm/IR/Function.h"
123
#include "llvm/MC/MCAsmInfo.h"
124
#include "llvm/MC/MCDwarf.h"
125
#include "llvm/Support/CommandLine.h"
126
#include "llvm/Support/Debug.h"
127
#include "llvm/Support/ErrorHandling.h"
128
#include "llvm/Support/MathExtras.h"
129
#include "llvm/Support/raw_ostream.h"
130
#include "llvm/Target/TargetMachine.h"
131
#include "llvm/Target/TargetOptions.h"
132
#include <cassert>
133
#include <cstdint>
134
#include <iterator>
135
#include <vector>
136
137
using namespace llvm;
138
139
#define DEBUG_TYPE "frame-info"
140
141
static cl::opt<bool> EnableRedZone("aarch64-redzone",
142
                                   cl::desc("enable use of redzone on AArch64"),
143
                                   cl::init(false), cl::Hidden);
144
145
static cl::opt<bool>
146
    ReverseCSRRestoreSeq("reverse-csr-restore-seq",
147
                         cl::desc("reverse the CSR restore sequence"),
148
                         cl::init(false), cl::Hidden);
149
150
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
151
152
/// This is the biggest offset to the stack pointer we can encode in aarch64
153
/// instructions (without using a separate calculation and a temp register).
154
/// Note that the exception here are vector stores/loads which cannot encode any
155
/// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()).
156
static const unsigned DefaultSafeSPDisplacement = 255;
157
158
/// Look at each instruction that references stack frames and return the stack
159
/// size limit beyond which some of these instructions will require a scratch
160
/// register during their expansion later.
161
279k
static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
162
279k
  // FIXME: For now, just conservatively guestimate based on unscaled indexing
163
279k
  // range. We'll end up allocating an unnecessary spill slot a lot, but
164
279k
  // realistically that's not a big deal at this stage of the game.
165
3.14M
  for (MachineBasicBlock &MBB : MF) {
166
17.8M
    for (MachineInstr &MI : MBB) {
167
17.8M
      if (MI.isDebugInstr() || 
MI.isPseudo()17.8M
||
168
17.8M
          
MI.getOpcode() == AArch64::ADDXri15.0M
||
169
17.8M
          
MI.getOpcode() == AArch64::ADDSXri14.7M
)
170
3.09M
        continue;
171
14.7M
172
48.7M
      
for (const MachineOperand &MO : MI.operands())14.7M
{
173
48.7M
        if (!MO.isFI())
174
48.1M
          continue;
175
566k
176
566k
        int Offset = 0;
177
566k
        if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
178
566k
            AArch64FrameOffsetCannotUpdate)
179
245
          return 0;
180
566k
      }
181
14.7M
    }
182
3.14M
  }
183
279k
  
return DefaultSafeSPDisplacement279k
;
184
279k
}
185
186
787k
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
187
787k
  if (!EnableRedZone)
188
787k
    return false;
189
94
  // Don't use the red zone if the function explicitly asks us not to.
190
94
  // This is typically used for kernel code.
191
94
  if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
192
0
    return false;
193
94
194
94
  const MachineFrameInfo &MFI = MF.getFrameInfo();
195
94
  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
196
94
  unsigned NumBytes = AFI->getLocalStackSize();
197
94
198
94
  return !(MFI.hasCalls() || 
hasFP(MF)88
||
NumBytes > 12888
);
199
94
}
200
201
/// hasFP - Return true if the specified function should have a dedicated frame
202
/// pointer register.
203
17.2M
bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
204
17.2M
  const MachineFrameInfo &MFI = MF.getFrameInfo();
205
17.2M
  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
206
17.2M
  // Win64 EH requires a frame pointer if funclets are present, as the locals
207
17.2M
  // are accessed off the frame pointer in both the parent function and the
208
17.2M
  // funclets.
209
17.2M
  if (MF.hasEHFunclets())
210
850
    return true;
211
17.2M
  // Retain behavior of always omitting the FP for leaf functions when possible.
212
17.2M
  if (MFI.hasCalls() && 
MF.getTarget().Options.DisableFramePointerElim(MF)13.6M
)
213
12.7M
    return true;
214
4.56M
  if (MFI.hasVarSizedObjects() || 
MFI.isFrameAddressTaken()4.56M
||
215
4.56M
      
MFI.hasStackMap()4.52M
||
MFI.hasPatchPoint()4.52M
||
216
4.56M
      
RegInfo->needsStackRealignment(MF)4.52M
)
217
44.5k
    return true;
218
4.52M
  // With large callframes around we may need to use FP to access the scavenging
219
4.52M
  // emergency spillslot.
220
4.52M
  //
221
4.52M
  // Unfortunately some calls to hasFP() like machine verifier ->
222
4.52M
  // getReservedReg() -> hasFP in the middle of global isel are too early
223
4.52M
  // to know the max call frame size. Hopefully conservatively returning "true"
224
4.52M
  // in those cases is fine.
225
4.52M
  // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs.
226
4.52M
  if (!MFI.isMaxCallFrameSizeComputed() ||
227
4.52M
      
MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement1.74M
)
228
2.77M
    return true;
229
1.74M
230
1.74M
  return false;
231
1.74M
}
232
233
/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
234
/// not required, we reserve argument space for call sites in the function
235
/// immediately on entry to the current function.  This eliminates the need for
236
/// add/sub sp brackets around call sites.  Returns true if the call frame is
237
/// included as part of the stack frame.
238
bool
239
5.38M
AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
240
5.38M
  return !MF.getFrameInfo().hasVarSizedObjects();
241
5.38M
}
242
243
MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
244
    MachineFunction &MF, MachineBasicBlock &MBB,
245
2.49M
    MachineBasicBlock::iterator I) const {
246
2.49M
  const AArch64InstrInfo *TII =
247
2.49M
      static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
248
2.49M
  DebugLoc DL = I->getDebugLoc();
249
2.49M
  unsigned Opc = I->getOpcode();
250
2.49M
  bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
251
2.49M
  uint64_t CalleePopAmount = IsDestroy ? 
I->getOperand(1).getImm()1.24M
:
01.24M
;
252
2.49M
253
2.49M
  if (!hasReservedCallFrame(MF)) {
254
2.00k
    unsigned Align = getStackAlignment();
255
2.00k
256
2.00k
    int64_t Amount = I->getOperand(0).getImm();
257
2.00k
    Amount = alignTo(Amount, Align);
258
2.00k
    if (!IsDestroy)
259
1.00k
      Amount = -Amount;
260
2.00k
261
2.00k
    // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
262
2.00k
    // doesn't have to pop anything), then the first operand will be zero too so
263
2.00k
    // this adjustment is a no-op.
264
2.00k
    if (CalleePopAmount == 0) {
265
2.00k
      // FIXME: in-function stack adjustment for calls is limited to 24-bits
266
2.00k
      // because there's no guaranteed temporary register available.
267
2.00k
      //
268
2.00k
      // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
269
2.00k
      // 1) For offset <= 12-bit, we use LSL #0
270
2.00k
      // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
271
2.00k
      // LSL #0, and the other uses LSL #12.
272
2.00k
      //
273
2.00k
      // Most call frames will be allocated at the start of a function so
274
2.00k
      // this is OK, but it is a limitation that needs dealing with.
275
2.00k
      assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
276
2.00k
      emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
277
2.00k
    }
278
2.49M
  } else if (CalleePopAmount != 0) {
279
12
    // If the calling convention demands that the callee pops arguments from the
280
12
    // stack, we want to add it back if we have a reserved call frame.
281
12
    assert(CalleePopAmount < 0xffffff && "call frame too large");
282
12
    emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
283
12
                    TII);
284
12
  }
285
2.49M
  return MBB.erase(I);
286
2.49M
}
287
288
623k
static bool ShouldSignReturnAddress(MachineFunction &MF) {
289
623k
  // The function should be signed in the following situations:
290
623k
  // - sign-return-address=all
291
623k
  // - sign-return-address=non-leaf and the functions spills the LR
292
623k
293
623k
  const Function &F = MF.getFunction();
294
623k
  if (!F.hasFnAttribute("sign-return-address"))
295
623k
    return false;
296
34
297
34
  StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString();
298
34
  if (Scope.equals("none"))
299
2
    return false;
300
32
301
32
  if (Scope.equals("all"))
302
26
    return true;
303
6
304
6
  assert(Scope.equals("non-leaf") && "Expected all, none or non-leaf");
305
6
306
6
  for (const auto &Info : MF.getFrameInfo().getCalleeSavedInfo())
307
4
    if (Info.getReg() == AArch64::LR)
308
4
      return true;
309
6
310
6
  
return false2
;
311
6
}
312
313
void AArch64FrameLowering::emitCalleeSavedFrameMoves(
314
31.9k
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
315
31.9k
  MachineFunction &MF = *MBB.getParent();
316
31.9k
  MachineFrameInfo &MFI = MF.getFrameInfo();
317
31.9k
  const TargetSubtargetInfo &STI = MF.getSubtarget();
318
31.9k
  const MCRegisterInfo *MRI = STI.getRegisterInfo();
319
31.9k
  const TargetInstrInfo *TII = STI.getInstrInfo();
320
31.9k
  DebugLoc DL = MBB.findDebugLoc(MBBI);
321
31.9k
322
31.9k
  // Add callee saved registers to move list.
323
31.9k
  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
324
31.9k
  if (CSI.empty())
325
0
    return;
326
31.9k
327
225k
  
for (const auto &Info : CSI)31.9k
{
328
225k
    unsigned Reg = Info.getReg();
329
225k
    int64_t Offset =
330
225k
        MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
331
225k
    unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
332
225k
    unsigned CFIIndex = MF.addFrameInst(
333
225k
        MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
334
225k
    BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
335
225k
        .addCFIIndex(CFIIndex)
336
225k
        .setMIFlags(MachineInstr::FrameSetup);
337
225k
  }
338
31.9k
}
339
340
// Find a scratch register that we can use at the start of the prologue to
341
// re-align the stack pointer.  We avoid using callee-save registers since they
342
// may appear to be free when this is called from canUseAsPrologue (during
343
// shrink wrapping), but then no longer be free when this is called from
344
// emitPrologue.
345
//
346
// FIXME: This is a bit conservative, since in the above case we could use one
347
// of the callee-save registers as a scratch temp to re-align the stack pointer,
348
// but we would then have to make sure that we were in fact saving at least one
349
// callee-save register in the prologue, which is additional complexity that
350
// doesn't seem worth the benefit.
351
55
static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
352
55
  MachineFunction *MF = MBB->getParent();
353
55
354
55
  // If MBB is an entry block, use X9 as the scratch register
355
55
  if (&MF->front() == MBB)
356
49
    return AArch64::X9;
357
6
358
6
  const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
359
6
  const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
360
6
  LivePhysRegs LiveRegs(TRI);
361
6
  LiveRegs.addLiveIns(*MBB);
362
6
363
6
  // Mark callee saved registers as used so we will not choose them.
364
6
  const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
365
126
  for (unsigned i = 0; CSRegs[i]; 
++i120
)
366
120
    LiveRegs.addReg(CSRegs[i]);
367
6
368
6
  // Prefer X9 since it was historically used for the prologue scratch reg.
369
6
  const MachineRegisterInfo &MRI = MF->getRegInfo();
370
6
  if (LiveRegs.available(MRI, AArch64::X9))
371
6
    return AArch64::X9;
372
0
373
0
  for (unsigned Reg : AArch64::GPR64RegClass) {
374
0
    if (LiveRegs.available(MRI, Reg))
375
0
      return Reg;
376
0
  }
377
0
  return AArch64::NoRegister;
378
0
}
379
380
bool AArch64FrameLowering::canUseAsPrologue(
381
3.61k
    const MachineBasicBlock &MBB) const {
382
3.61k
  const MachineFunction *MF = MBB.getParent();
383
3.61k
  MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
384
3.61k
  const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
385
3.61k
  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
386
3.61k
387
3.61k
  // Don't need a scratch register if we're not going to re-align the stack.
388
3.61k
  if (!RegInfo->needsStackRealignment(*MF))
389
3.61k
    return true;
390
4
  // Otherwise, we can use any block as long as it has a scratch register
391
4
  // available.
392
4
  return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
393
4
}
394
395
static bool windowsRequiresStackProbe(MachineFunction &MF,
396
452k
                                      unsigned StackSizeInBytes) {
397
452k
  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
398
452k
  if (!Subtarget.isTargetWindows())
399
452k
    return false;
400
375
  const Function &F = MF.getFunction();
401
375
  // TODO: When implementing stack protectors, take that into account
402
375
  // for the probe threshold.
403
375
  unsigned StackProbeSize = 4096;
404
375
  if (F.hasFnAttribute("stack-probe-size"))
405
0
    F.getFnAttribute("stack-probe-size")
406
0
        .getValueAsString()
407
0
        .getAsInteger(0, StackProbeSize);
408
375
  return (StackSizeInBytes >= StackProbeSize) &&
409
375
         
!F.hasFnAttribute("no-stack-arg-probe")12
;
410
375
}
411
412
bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
413
557k
    MachineFunction &MF, unsigned StackBumpBytes) const {
414
557k
  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
415
557k
  const MachineFrameInfo &MFI = MF.getFrameInfo();
416
557k
  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
417
557k
  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
418
557k
419
557k
  if (AFI->getLocalStackSize() == 0)
420
433k
    return false;
421
124k
422
124k
  // 512 is the maximum immediate for stp/ldp that will be used for
423
124k
  // callee-save save/restores
424
124k
  if (StackBumpBytes >= 512 || 
windowsRequiresStackProbe(MF, StackBumpBytes)103k
)
425
21.0k
    return false;
426
103k
427
103k
  if (MFI.hasVarSizedObjects())
428
128
    return false;
429
103k
430
103k
  if (RegInfo->needsStackRealignment(MF))
431
32
    return false;
432
103k
433
103k
  // This isn't strictly necessary, but it simplifies things a bit since the
434
103k
  // current RedZone handling code assumes the SP is adjusted by the
435
103k
  // callee-save save/restore code.
436
103k
  if (canUseRedZone(MF))
437
5
    return false;
438
103k
439
103k
  return true;
440
103k
}
441
442
// Given a load or a store instruction, generate an appropriate unwinding SEH
443
// code on Windows.
444
static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
445
                                             const TargetInstrInfo &TII,
446
414
                                             MachineInstr::MIFlag Flag) {
447
414
  unsigned Opc = MBBI->getOpcode();
448
414
  MachineBasicBlock *MBB = MBBI->getParent();
449
414
  MachineFunction &MF = *MBB->getParent();
450
414
  DebugLoc DL = MBBI->getDebugLoc();
451
414
  unsigned ImmIdx = MBBI->getNumOperands() - 1;
452
414
  int Imm = MBBI->getOperand(ImmIdx).getImm();
453
414
  MachineInstrBuilder MIB;
454
414
  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
455
414
  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
456
414
457
414
  switch (Opc) {
458
414
  default:
459
0
    llvm_unreachable("No SEH Opcode for this instruction");
460
414
  case AArch64::LDPDpost:
461
1
    Imm = -Imm;
462
1
    LLVM_FALLTHROUGH;
463
2
  case AArch64::STPDpre: {
464
2
    unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
465
2
    unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
466
2
    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
467
2
              .addImm(Reg0)
468
2
              .addImm(Reg1)
469
2
              .addImm(Imm * 8)
470
2
              .setMIFlag(Flag);
471
2
    break;
472
1
  }
473
18
  case AArch64::LDPXpost:
474
18
    Imm = -Imm;
475
18
    LLVM_FALLTHROUGH;
476
36
  case AArch64::STPXpre: {
477
36
    unsigned Reg0 = MBBI->getOperand(1).getReg();
478
36
    unsigned Reg1 = MBBI->getOperand(2).getReg();
479
36
    if (Reg0 == AArch64::FP && 
Reg1 == AArch64::LR34
)
480
34
      MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
481
34
                .addImm(Imm * 8)
482
34
                .setMIFlag(Flag);
483
2
    else
484
2
      MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
485
2
                .addImm(RegInfo->getSEHRegNum(Reg0))
486
2
                .addImm(RegInfo->getSEHRegNum(Reg1))
487
2
                .addImm(Imm * 8)
488
2
                .setMIFlag(Flag);
489
36
    break;
490
18
  }
491
18
  case AArch64::LDRDpost:
492
2
    Imm = -Imm;
493
2
    LLVM_FALLTHROUGH;
494
4
  case AArch64::STRDpre: {
495
4
    unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
496
4
    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
497
4
              .addImm(Reg)
498
4
              .addImm(Imm)
499
4
              .setMIFlag(Flag);
500
4
    break;
501
2
  }
502
36
  case AArch64::LDRXpost:
503
36
    Imm = -Imm;
504
36
    LLVM_FALLTHROUGH;
505
72
  case AArch64::STRXpre: {
506
72
    unsigned Reg =  RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
507
72
    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
508
72
              .addImm(Reg)
509
72
              .addImm(Imm)
510
72
              .setMIFlag(Flag);
511
72
    break;
512
36
  }
513
36
  case AArch64::STPDi:
514
8
  case AArch64::LDPDi: {
515
8
    unsigned Reg0 =  RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
516
8
    unsigned Reg1 =  RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
517
8
    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
518
8
              .addImm(Reg0)
519
8
              .addImm(Reg1)
520
8
              .addImm(Imm * 8)
521
8
              .setMIFlag(Flag);
522
8
    break;
523
8
  }
524
146
  case AArch64::STPXi:
525
146
  case AArch64::LDPXi: {
526
146
    unsigned Reg0 = MBBI->getOperand(0).getReg();
527
146
    unsigned Reg1 = MBBI->getOperand(1).getReg();
528
146
    if (Reg0 == AArch64::FP && 
Reg1 == AArch64::LR98
)
529
98
      MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
530
98
                .addImm(Imm * 8)
531
98
                .setMIFlag(Flag);
532
48
    else
533
48
      MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
534
48
                .addImm(RegInfo->getSEHRegNum(Reg0))
535
48
                .addImm(RegInfo->getSEHRegNum(Reg1))
536
48
                .addImm(Imm * 8)
537
48
                .setMIFlag(Flag);
538
146
    break;
539
146
  }
540
146
  case AArch64::STRXui:
541
140
  case AArch64::LDRXui: {
542
140
    int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
543
140
    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
544
140
              .addImm(Reg)
545
140
              .addImm(Imm * 8)
546
140
              .setMIFlag(Flag);
547
140
    break;
548
140
  }
549
140
  case AArch64::STRDui:
550
6
  case AArch64::LDRDui: {
551
6
    unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
552
6
    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
553
6
              .addImm(Reg)
554
6
              .addImm(Imm * 8)
555
6
              .setMIFlag(Flag);
556
6
    break;
557
414
  }
558
414
  }
559
414
  auto I = MBB->insertAfter(MBBI, MIB);
560
414
  return I;
561
414
}
562
563
// Fix up the SEH opcode associated with the save/restore instruction.
564
static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
565
84
                           unsigned LocalStackSize) {
566
84
  MachineOperand *ImmOpnd = nullptr;
567
84
  unsigned ImmIdx = MBBI->getNumOperands() - 1;
568
84
  switch (MBBI->getOpcode()) {
569
84
  default:
570
0
    llvm_unreachable("Fix the offset in the SEH instruction");
571
84
  case AArch64::SEH_SaveFPLR:
572
84
  case AArch64::SEH_SaveRegP:
573
84
  case AArch64::SEH_SaveReg:
574
84
  case AArch64::SEH_SaveFRegP:
575
84
  case AArch64::SEH_SaveFReg:
576
84
    ImmOpnd = &MBBI->getOperand(ImmIdx);
577
84
    break;
578
84
  }
579
84
  if (ImmOpnd)
580
84
    ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
581
84
}
582
583
// Convert callee-save register save/restore instruction to do stack pointer
584
// decrement/increment to allocate/deallocate the callee-save stack area by
585
// converting store/load to use pre/post increment version.
586
static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
587
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
588
    const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
589
346k
    bool NeedsWinCFI, bool *HasWinCFI, bool InProlog = true) {
590
346k
  // Ignore instructions that do not operate on SP, i.e. shadow call stack
591
346k
  // instructions and associated CFI instruction.
592
346k
  while (MBBI->getOpcode() == AArch64::STRXpost ||
593
346k
         
MBBI->getOpcode() == AArch64::LDRXpre346k
||
594
346k
         
MBBI->getOpcode() == AArch64::CFI_INSTRUCTION346k
) {
595
5
    if (MBBI->getOpcode() != AArch64::CFI_INSTRUCTION)
596
5
      assert(MBBI->getOperand(0).getReg() != AArch64::SP);
597
5
    ++MBBI;
598
5
  }
599
346k
  unsigned NewOpc;
600
346k
  int Scale = 1;
601
346k
  switch (MBBI->getOpcode()) {
602
346k
  default:
603
0
    llvm_unreachable("Unexpected callee-save save/restore opcode!");
604
346k
  case AArch64::STPXi:
605
151k
    NewOpc = AArch64::STPXpre;
606
151k
    Scale = 8;
607
151k
    break;
608
346k
  case AArch64::STPDi:
609
6.32k
    NewOpc = AArch64::STPDpre;
610
6.32k
    Scale = 8;
611
6.32k
    break;
612
346k
  case AArch64::STPQi:
613
4
    NewOpc = AArch64::STPQpre;
614
4
    Scale = 16;
615
4
    break;
616
346k
  case AArch64::STRXui:
617
422
    NewOpc = AArch64::STRXpre;
618
422
    break;
619
346k
  case AArch64::STRDui:
620
47
    NewOpc = AArch64::STRDpre;
621
47
    break;
622
346k
  case AArch64::STRQui:
623
0
    NewOpc = AArch64::STRQpre;
624
0
    break;
625
346k
  case AArch64::LDPXi:
626
180k
    NewOpc = AArch64::LDPXpost;
627
180k
    Scale = 8;
628
180k
    break;
629
346k
  case AArch64::LDPDi:
630
7.38k
    NewOpc = AArch64::LDPDpost;
631
7.38k
    Scale = 8;
632
7.38k
    break;
633
346k
  case AArch64::LDPQi:
634
0
    NewOpc = AArch64::LDPQpost;
635
0
    Scale = 16;
636
0
    break;
637
346k
  case AArch64::LDRXui:
638
435
    NewOpc = AArch64::LDRXpost;
639
435
    break;
640
346k
  case AArch64::LDRDui:
641
48
    NewOpc = AArch64::LDRDpost;
642
48
    break;
643
346k
  case AArch64::LDRQui:
644
0
    NewOpc = AArch64::LDRQpost;
645
0
    break;
646
346k
  }
647
346k
  // Get rid of the SEH code associated with the old instruction.
648
346k
  if (NeedsWinCFI) {
649
114
    auto SEH = std::next(MBBI);
650
114
    if (AArch64InstrInfo::isSEHInstruction(*SEH))
651
114
      SEH->eraseFromParent();
652
114
  }
653
346k
654
346k
  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
655
346k
  MIB.addReg(AArch64::SP, RegState::Define);
656
346k
657
346k
  // Copy all operands other than the immediate offset.
658
346k
  unsigned OpndIdx = 0;
659
1.38M
  for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
660
1.03M
       ++OpndIdx)
661
1.03M
    MIB.add(MBBI->getOperand(OpndIdx));
662
346k
663
346k
  assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
664
346k
         "Unexpected immediate offset in first/last callee-save save/restore "
665
346k
         "instruction!");
666
346k
  assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
667
346k
         "Unexpected base register in callee-save save/restore instruction!");
668
346k
  assert(CSStackSizeInc % Scale == 0);
669
346k
  MIB.addImm(CSStackSizeInc / Scale);
670
346k
671
346k
  MIB.setMIFlags(MBBI->getFlags());
672
346k
  MIB.setMemRefs(MBBI->memoperands());
673
346k
674
346k
  // Generate a new SEH code that corresponds to the new instruction.
675
346k
  if (NeedsWinCFI) {
676
114
    *HasWinCFI = true;
677
114
    InsertSEH(*MIB, *TII,
678
114
              InProlog ? 
MachineInstr::FrameSetup57
:
MachineInstr::FrameDestroy57
);
679
114
  }
680
346k
681
346k
  return std::prev(MBB.erase(MBBI));
682
346k
}
683
684
// Fixup callee-save register save/restore instructions to take into account
685
// combined SP bump by adding the local stack size to the stack offsets.
686
static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
687
                                              unsigned LocalStackSize,
688
                                              bool NeedsWinCFI,
689
317k
                                              bool *HasWinCFI) {
690
317k
  if (AArch64InstrInfo::isSEHInstruction(MI))
691
84
    return;
692
317k
693
317k
  unsigned Opc = MI.getOpcode();
694
317k
695
317k
  // Ignore instructions that do not operate on SP, i.e. shadow call stack
696
317k
  // instructions and associated CFI instruction.
697
317k
  if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre ||
698
317k
      Opc == AArch64::CFI_INSTRUCTION) {
699
0
    if (Opc != AArch64::CFI_INSTRUCTION)
700
0
      assert(MI.getOperand(0).getReg() != AArch64::SP);
701
0
    return;
702
0
  }
703
317k
704
317k
  unsigned Scale;
705
317k
  switch (Opc) {
706
317k
  case AArch64::STPXi:
707
317k
  case AArch64::STRXui:
708
317k
  case AArch64::STPDi:
709
317k
  case AArch64::STRDui:
710
317k
  case AArch64::LDPXi:
711
317k
  case AArch64::LDRXui:
712
317k
  case AArch64::LDPDi:
713
317k
  case AArch64::LDRDui:
714
317k
    Scale = 8;
715
317k
    break;
716
317k
  case AArch64::STPQi:
717
8
  case AArch64::STRQui:
718
8
  case AArch64::LDPQi:
719
8
  case AArch64::LDRQui:
720
8
    Scale = 16;
721
8
    break;
722
8
  default:
723
0
    llvm_unreachable("Unexpected callee-save save/restore opcode!");
724
317k
  }
725
317k
726
317k
  unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
727
317k
  assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
728
317k
         "Unexpected base register in callee-save save/restore instruction!");
729
317k
  // Last operand is immediate offset that needs fixing.
730
317k
  MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
731
317k
  // All generated opcodes have scaled offsets.
732
317k
  assert(LocalStackSize % Scale == 0);
733
317k
  OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
734
317k
735
317k
  if (NeedsWinCFI) {
736
84
    *HasWinCFI = true;
737
84
    auto MBBI = std::next(MachineBasicBlock::iterator(MI));
738
84
    assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
739
84
    assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
740
84
           "Expecting a SEH instruction");
741
84
    fixupSEHOpcode(MBBI, LocalStackSize);
742
84
  }
743
317k
}
744
745
static void adaptForLdStOpt(MachineBasicBlock &MBB,
746
                            MachineBasicBlock::iterator FirstSPPopI,
747
130k
                            MachineBasicBlock::iterator LastPopI) {
748
130k
  // Sometimes (when we restore in the same order as we save), we can end up
749
130k
  // with code like this:
750
130k
  //
751
130k
  // ldp      x26, x25, [sp]
752
130k
  // ldp      x24, x23, [sp, #16]
753
130k
  // ldp      x22, x21, [sp, #32]
754
130k
  // ldp      x20, x19, [sp, #48]
755
130k
  // add      sp, sp, #64
756
130k
  //
757
130k
  // In this case, it is always better to put the first ldp at the end, so
758
130k
  // that the load-store optimizer can run and merge the ldp and the add into
759
130k
  // a post-index ldp.
760
130k
  // If we managed to grab the first pop instruction, move it to the end.
761
130k
  if (ReverseCSRRestoreSeq)
762
4
    MBB.splice(FirstSPPopI, &MBB, LastPopI);
763
130k
  // We should end up with something like this now:
764
130k
  //
765
130k
  // ldp      x24, x23, [sp, #16]
766
130k
  // ldp      x22, x21, [sp, #32]
767
130k
  // ldp      x20, x19, [sp, #48]
768
130k
  // ldp      x26, x25, [sp]
769
130k
  // add      sp, sp, #64
770
130k
  //
771
130k
  // and the load-store optimizer can merge the last two instructions into:
772
130k
  //
773
130k
  // ldp      x26, x25, [sp], #64
774
130k
  //
775
130k
}
776
777
30
static bool ShouldSignWithAKey(MachineFunction &MF) {
778
30
  const Function &F = MF.getFunction();
779
30
  if (!F.hasFnAttribute("sign-return-address-key"))
780
16
    return true;
781
14
782
14
  const StringRef Key =
783
14
      F.getFnAttribute("sign-return-address-key").getValueAsString();
784
14
  assert(Key.equals_lower("a_key") || Key.equals_lower("b_key"));
785
14
  return Key.equals_lower("a_key");
786
14
}
787
788
1.52M
static bool needsWinCFI(const MachineFunction &MF) {
789
1.52M
  const Function &F = MF.getFunction();
790
1.52M
  return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
791
1.52M
         
F.needsUnwindTableEntry()693
;
792
1.52M
}
793
794
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
795
257k
                                        MachineBasicBlock &MBB) const {
796
257k
  MachineBasicBlock::iterator MBBI = MBB.begin();
797
257k
  const MachineFrameInfo &MFI = MF.getFrameInfo();
798
257k
  const Function &F = MF.getFunction();
799
257k
  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
800
257k
  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
801
257k
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
802
257k
  MachineModuleInfo &MMI = MF.getMMI();
803
257k
  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
804
257k
  bool needsFrameMoves = (MMI.hasDebugInfo() || 
F.needsUnwindTableEntry()251k
) &&
805
257k
                         
!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()49.5k
;
806
257k
  bool HasFP = hasFP(MF);
807
257k
  bool NeedsWinCFI = needsWinCFI(MF);
808
257k
  bool HasWinCFI = false;
809
257k
  auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
810
257k
811
257k
  bool IsFunclet = MBB.isEHFuncletEntry();
812
257k
813
257k
  // At this point, we're going to decide whether or not the function uses a
814
257k
  // redzone. In most cases, the function doesn't have a redzone so let's
815
257k
  // assume that's false and set it to true in the case that there's a redzone.
816
257k
  AFI->setHasRedZone(false);
817
257k
818
257k
  // Debug location must be unknown since the first debug location is used
819
257k
  // to determine the end of the prologue.
820
257k
  DebugLoc DL;
821
257k
822
257k
  if (ShouldSignReturnAddress(MF)) {
823
15
    if (ShouldSignWithAKey(MF))
824
10
      BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
825
10
          .setMIFlag(MachineInstr::FrameSetup);
826
5
    else {
827
5
      BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
828
5
          .setMIFlag(MachineInstr::FrameSetup);
829
5
      BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP))
830
5
          .setMIFlag(MachineInstr::FrameSetup);
831
5
    }
832
15
833
15
    unsigned CFIIndex =
834
15
        MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
835
15
    BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
836
15
        .addCFIIndex(CFIIndex)
837
15
        .setMIFlags(MachineInstr::FrameSetup);
838
15
  }
839
257k
840
257k
  // All calls are tail calls in GHC calling conv, and functions have no
841
257k
  // prologue/epilogue.
842
257k
  if (MF.getFunction().getCallingConv() == CallingConv::GHC)
843
6
    return;
844
257k
845
257k
  // Set tagged base pointer to the bottom of the stack frame.
846
257k
  // Ideally it should match SP value after prologue.
847
257k
  AFI->setTaggedBasePointerOffset(MFI.getStackSize());
848
257k
849
257k
  // getStackSize() includes all the locals in its size calculation. We don't
850
257k
  // include these locals when computing the stack size of a funclet, as they
851
257k
  // are allocated in the parent's stack frame and accessed via the frame
852
257k
  // pointer from the funclet.  We only save the callee saved registers in the
853
257k
  // funclet, which are really the callee saved registers of the parent
854
257k
  // function, including the funclet.
855
257k
  int NumBytes = IsFunclet ? 
(int)getWinEHFuncletFrameSize(MF)12
856
257k
                           : 
(int)MFI.getStackSize()257k
;
857
257k
  if (!AFI->hasStackFrame() && 
!windowsRequiresStackProbe(MF, NumBytes)65.9k
) {
858
65.9k
    assert(!HasFP && "unexpected function without stack frame but with FP");
859
65.9k
    // All of the stack allocation is for locals.
860
65.9k
    AFI->setLocalStackSize(NumBytes);
861
65.9k
    if (!NumBytes)
862
65.6k
      return;
863
284
    // REDZONE: If the stack size is less than 128 bytes, we don't need
864
284
    // to actually allocate.
865
284
    if (canUseRedZone(MF)) {
866
3
      AFI->setHasRedZone(true);
867
3
      ++NumRedZoneFunctions;
868
281
    } else {
869
281
      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
870
281
                      MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
871
281
      if (!NeedsWinCFI) {
872
274
        // Label used to tie together the PROLOG_LABEL and the MachineMoves.
873
274
        MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
874
274
        // Encode the stack size of the leaf function.
875
274
        unsigned CFIIndex = MF.addFrameInst(
876
274
            MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
877
274
        BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
878
274
            .addCFIIndex(CFIIndex)
879
274
            .setMIFlags(MachineInstr::FrameSetup);
880
274
      }
881
281
    }
882
284
883
284
    if (NeedsWinCFI) {
884
7
      HasWinCFI = true;
885
7
      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
886
7
          .setMIFlag(MachineInstr::FrameSetup);
887
7
    }
888
284
889
284
    return;
890
284
  }
891
191k
892
191k
  bool IsWin64 =
893
191k
      Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
894
191k
  // Var args are accounted for in the containing function, so don't
895
191k
  // include them for funclets.
896
191k
  unsigned FixedObject = (IsWin64 && 
!IsFunclet90
) ?
897
191k
                         
alignTo(AFI->getVarArgsGPRSize(), 16)78
: 0;
898
191k
899
191k
  auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
900
191k
  // All of the remaining stack allocations are for locals.
901
191k
  AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
902
191k
  bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
903
191k
  if (CombineSPBump) {
904
33.8k
    emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
905
33.8k
                    MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
906
33.8k
    NumBytes = 0;
907
158k
  } else if (PrologueSaveSize != 0) {
908
158k
    MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
909
158k
        MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI);
910
158k
    NumBytes -= PrologueSaveSize;
911
158k
  }
912
191k
  assert(NumBytes >= 0 && "Negative stack allocation size!?");
913
191k
914
191k
  // Move past the saves of the callee-saved registers, fixing up the offsets
915
191k
  // and pre-inc if we decided to combine the callee-save and local stack
916
191k
  // pointer bump above.
917
191k
  MachineBasicBlock::iterator End = MBB.end();
918
815k
  while (MBBI != End && 
MBBI->getFlag(MachineInstr::FrameSetup)815k
) {
919
623k
    if (CombineSPBump)
920
132k
      fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
921
132k
                                        NeedsWinCFI, &HasWinCFI);
922
623k
    ++MBBI;
923
623k
  }
924
191k
925
191k
  // The code below is not applicable to funclets. We have emitted all the SEH
926
191k
  // opcodes that we needed to emit.  The FP and BP belong to the containing
927
191k
  // function.
928
191k
  if (IsFunclet) {
929
12
    if (NeedsWinCFI) {
930
12
      HasWinCFI = true;
931
12
      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
932
12
          .setMIFlag(MachineInstr::FrameSetup);
933
12
    }
934
12
935
12
    // SEH funclets are passed the frame pointer in X1.  If the parent
936
12
    // function uses the base register, then the base register is used
937
12
    // directly, and is not retrieved from X1.
938
12
    if (F.hasPersonalityFn()) {
939
12
      EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
940
12
      if (isAsynchronousEHPersonality(Per)) {
941
5
        BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
942
5
            .addReg(AArch64::X1).setMIFlag(MachineInstr::FrameSetup);
943
5
        MBB.addLiveIn(AArch64::X1);
944
5
      }
945
12
    }
946
12
947
12
    return;
948
12
  }
949
191k
950
191k
  if (HasFP) {
951
183k
    // Only set up FP if we actually need to. Frame pointer is fp =
952
183k
    // sp - fixedobject - 16.
953
183k
    int FPOffset = AFI->getCalleeSavedStackSize() - 16;
954
183k
    if (CombineSPBump)
955
27.5k
      FPOffset += AFI->getLocalStackSize();
956
183k
957
183k
    // Issue    sub fp, sp, FPOffset or
958
183k
    //          mov fp,sp          when FPOffset is zero.
959
183k
    // Note: All stores of callee-saved registers are marked as "FrameSetup".
960
183k
    // This code marks the instruction(s) that set the FP also.
961
183k
    emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
962
183k
                    MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
963
183k
  }
964
191k
965
191k
  if (windowsRequiresStackProbe(MF, NumBytes)) {
966
5
    uint32_t NumWords = NumBytes >> 4;
967
5
    if (NeedsWinCFI) {
968
5
      HasWinCFI = true;
969
5
      // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
970
5
      // exceed this amount.  We need to move at most 2^24 - 1 into x15.
971
5
      // This is at most two instructions, MOVZ follwed by MOVK.
972
5
      // TODO: Fix to use multiple stack alloc unwind codes for stacks
973
5
      // exceeding 256MB in size.
974
5
      if (NumBytes >= (1 << 28))
975
0
        report_fatal_error("Stack size cannot exceed 256MB for stack "
976
0
                            "unwinding purposes");
977
5
978
5
      uint32_t LowNumWords = NumWords & 0xFFFF;
979
5
      BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
980
5
            .addImm(LowNumWords)
981
5
            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
982
5
            .setMIFlag(MachineInstr::FrameSetup);
983
5
      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
984
5
            .setMIFlag(MachineInstr::FrameSetup);
985
5
      if ((NumWords & 0xFFFF0000) != 0) {
986
1
          BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
987
1
              .addReg(AArch64::X15)
988
1
              .addImm((NumWords & 0xFFFF0000) >> 16) // High half
989
1
              .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
990
1
              .setMIFlag(MachineInstr::FrameSetup);
991
1
          BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
992
1
            .setMIFlag(MachineInstr::FrameSetup);
993
1
      }
994
5
    } else {
995
0
      BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
996
0
          .addImm(NumWords)
997
0
          .setMIFlags(MachineInstr::FrameSetup);
998
0
    }
999
5
1000
5
    switch (MF.getTarget().getCodeModel()) {
1001
5
    case CodeModel::Tiny:
1002
3
    case CodeModel::Small:
1003
3
    case CodeModel::Medium:
1004
3
    case CodeModel::Kernel:
1005
3
      BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1006
3
          .addExternalSymbol("__chkstk")
1007
3
          .addReg(AArch64::X15, RegState::Implicit)
1008
3
          .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
1009
3
          .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
1010
3
          .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
1011
3
          .setMIFlags(MachineInstr::FrameSetup);
1012
3
      if (NeedsWinCFI) {
1013
3
        HasWinCFI = true;
1014
3
        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1015
3
            .setMIFlag(MachineInstr::FrameSetup);
1016
3
      }
1017
3
      break;
1018
3
    case CodeModel::Large:
1019
2
      BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
1020
2
          .addReg(AArch64::X16, RegState::Define)
1021
2
          .addExternalSymbol("__chkstk")
1022
2
          .addExternalSymbol("__chkstk")
1023
2
          .setMIFlags(MachineInstr::FrameSetup);
1024
2
      if (NeedsWinCFI) {
1025
2
        HasWinCFI = true;
1026
2
        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1027
2
            .setMIFlag(MachineInstr::FrameSetup);
1028
2
      }
1029
2
1030
2
      BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
1031
2
          .addReg(AArch64::X16, RegState::Kill)
1032
2
          .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
1033
2
          .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
1034
2
          .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
1035
2
          .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
1036
2
          .setMIFlags(MachineInstr::FrameSetup);
1037
2
      if (NeedsWinCFI) {
1038
2
        HasWinCFI = true;
1039
2
        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1040
2
            .setMIFlag(MachineInstr::FrameSetup);
1041
2
      }
1042
2
      break;
1043
5
    }
1044
5
1045
5
    BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
1046
5
        .addReg(AArch64::SP, RegState::Kill)
1047
5
        .addReg(AArch64::X15, RegState::Kill)
1048
5
        .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
1049
5
        .setMIFlags(MachineInstr::FrameSetup);
1050
5
    if (NeedsWinCFI) {
1051
5
      HasWinCFI = true;
1052
5
      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1053
5
          .addImm(NumBytes)
1054
5
          .setMIFlag(MachineInstr::FrameSetup);
1055
5
    }
1056
5
    NumBytes = 0;
1057
5
  }
1058
191k
1059
191k
  // Allocate space for the rest of the frame.
1060
191k
  if (NumBytes) {
1061
8.69k
    const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
1062
8.69k
    unsigned scratchSPReg = AArch64::SP;
1063
8.69k
1064
8.69k
    if (NeedsRealignment) {
1065
51
      scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
1066
51
      assert(scratchSPReg != AArch64::NoRegister);
1067
51
    }
1068
8.69k
1069
8.69k
    // If we're a leaf function, try using the red zone.
1070
8.69k
    if (!canUseRedZone(MF))
1071
8.69k
      // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
1072
8.69k
      // the correct value here, as NumBytes also includes padding bytes,
1073
8.69k
      // which shouldn't be counted here.
1074
8.69k
      emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
1075
8.69k
                      MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
1076
8.69k
1077
8.69k
    if (NeedsRealignment) {
1078
51
      const unsigned Alignment = MFI.getMaxAlignment();
1079
51
      const unsigned NrBitsToZero = countTrailingZeros(Alignment);
1080
51
      assert(NrBitsToZero > 1);
1081
51
      assert(scratchSPReg != AArch64::SP);
1082
51
1083
51
      // SUB X9, SP, NumBytes
1084
51
      //   -- X9 is temporary register, so shouldn't contain any live data here,
1085
51
      //   -- free to use. This is already produced by emitFrameOffset above.
1086
51
      // AND SP, X9, 0b11111...0000
1087
51
      // The logical immediates have a non-trivial encoding. The following
1088
51
      // formula computes the encoded immediate with all ones but
1089
51
      // NrBitsToZero zero bits as least significant bits.
1090
51
      uint32_t andMaskEncoded = (1 << 12)                         // = N
1091
51
                                | ((64 - NrBitsToZero) << 6)      // immr
1092
51
                                | ((64 - NrBitsToZero - 1) << 0); // imms
1093
51
1094
51
      BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
1095
51
          .addReg(scratchSPReg, RegState::Kill)
1096
51
          .addImm(andMaskEncoded);
1097
51
      AFI->setStackRealigned(true);
1098
51
      if (NeedsWinCFI) {
1099
3
        HasWinCFI = true;
1100
3
        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1101
3
            .addImm(NumBytes & andMaskEncoded)
1102
3
            .setMIFlag(MachineInstr::FrameSetup);
1103
3
      }
1104
51
    }
1105
8.69k
  }
1106
191k
1107
191k
  // If we need a base pointer, set it up here. It's whatever the value of the
1108
191k
  // stack pointer is at this point. Any variable size objects will be allocated
1109
191k
  // after this, so we can still use the base pointer to reference locals.
1110
191k
  //
1111
191k
  // FIXME: Clarify FrameSetup flags here.
1112
191k
  // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
1113
191k
  // needed.
1114
191k
  if (RegInfo->hasBasePointer(MF)) {
1115
42
    TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
1116
42
                     false);
1117
42
    if (NeedsWinCFI) {
1118
5
      HasWinCFI = true;
1119
5
      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1120
5
          .setMIFlag(MachineInstr::FrameSetup);
1121
5
    }
1122
42
  }
1123
191k
1124
191k
  // The very last FrameSetup instruction indicates the end of prologue. Emit a
1125
191k
  // SEH opcode indicating the prologue end.
1126
191k
  if (NeedsWinCFI && 
HasWinCFI74
) {
1127
74
    BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1128
74
        .setMIFlag(MachineInstr::FrameSetup);
1129
74
  }
1130
191k
1131
191k
  if (needsFrameMoves) {
1132
31.9k
    const DataLayout &TD = MF.getDataLayout();
1133
31.9k
    const int StackGrowth = -TD.getPointerSize(0);
1134
31.9k
    unsigned FramePtr = RegInfo->getFrameRegister(MF);
1135
31.9k
    // An example of the prologue:
1136
31.9k
    //
1137
31.9k
    //     .globl __foo
1138
31.9k
    //     .align 2
1139
31.9k
    //  __foo:
1140
31.9k
    // Ltmp0:
1141
31.9k
    //     .cfi_startproc
1142
31.9k
    //     .cfi_personality 155, ___gxx_personality_v0
1143
31.9k
    // Leh_func_begin:
1144
31.9k
    //     .cfi_lsda 16, Lexception33
1145
31.9k
    //
1146
31.9k
    //     stp  xa,bx, [sp, -#offset]!
1147
31.9k
    //     ...
1148
31.9k
    //     stp  x28, x27, [sp, #offset-32]
1149
31.9k
    //     stp  fp, lr, [sp, #offset-16]
1150
31.9k
    //     add  fp, sp, #offset - 16
1151
31.9k
    //     sub  sp, sp, #1360
1152
31.9k
    //
1153
31.9k
    // The Stack:
1154
31.9k
    //       +-------------------------------------------+
1155
31.9k
    // 10000 | ........ | ........ | ........ | ........ |
1156
31.9k
    // 10004 | ........ | ........ | ........ | ........ |
1157
31.9k
    //       +-------------------------------------------+
1158
31.9k
    // 10008 | ........ | ........ | ........ | ........ |
1159
31.9k
    // 1000c | ........ | ........ | ........ | ........ |
1160
31.9k
    //       +===========================================+
1161
31.9k
    // 10010 |                X28 Register               |
1162
31.9k
    // 10014 |                X28 Register               |
1163
31.9k
    //       +-------------------------------------------+
1164
31.9k
    // 10018 |                X27 Register               |
1165
31.9k
    // 1001c |                X27 Register               |
1166
31.9k
    //       +===========================================+
1167
31.9k
    // 10020 |                Frame Pointer              |
1168
31.9k
    // 10024 |                Frame Pointer              |
1169
31.9k
    //       +-------------------------------------------+
1170
31.9k
    // 10028 |                Link Register              |
1171
31.9k
    // 1002c |                Link Register              |
1172
31.9k
    //       +===========================================+
1173
31.9k
    // 10030 | ........ | ........ | ........ | ........ |
1174
31.9k
    // 10034 | ........ | ........ | ........ | ........ |
1175
31.9k
    //       +-------------------------------------------+
1176
31.9k
    // 10038 | ........ | ........ | ........ | ........ |
1177
31.9k
    // 1003c | ........ | ........ | ........ | ........ |
1178
31.9k
    //       +-------------------------------------------+
1179
31.9k
    //
1180
31.9k
    //     [sp] = 10030        ::    >>initial value<<
1181
31.9k
    //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
1182
31.9k
    //     fp = sp == 10020    ::  mov fp, sp
1183
31.9k
    //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
1184
31.9k
    //     sp == 10010         ::    >>final value<<
1185
31.9k
    //
1186
31.9k
    // The frame pointer (w29) points to address 10020. If we use an offset of
1187
31.9k
    // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
1188
31.9k
    // for w27, and -32 for w28:
1189
31.9k
    //
1190
31.9k
    //  Ltmp1:
1191
31.9k
    //     .cfi_def_cfa w29, 16
1192
31.9k
    //  Ltmp2:
1193
31.9k
    //     .cfi_offset w30, -8
1194
31.9k
    //  Ltmp3:
1195
31.9k
    //     .cfi_offset w29, -16
1196
31.9k
    //  Ltmp4:
1197
31.9k
    //     .cfi_offset w27, -24
1198
31.9k
    //  Ltmp5:
1199
31.9k
    //     .cfi_offset w28, -32
1200
31.9k
1201
31.9k
    if (HasFP) {
1202
30.4k
      // Define the current CFA rule to use the provided FP.
1203
30.4k
      unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
1204
30.4k
      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
1205
30.4k
          nullptr, Reg, 2 * StackGrowth - FixedObject));
1206
30.4k
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1207
30.4k
          .addCFIIndex(CFIIndex)
1208
30.4k
          .setMIFlags(MachineInstr::FrameSetup);
1209
30.4k
    } else {
1210
1.45k
      // Encode the stack size of the leaf function.
1211
1.45k
      unsigned CFIIndex = MF.addFrameInst(
1212
1.45k
          MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
1213
1.45k
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1214
1.45k
          .addCFIIndex(CFIIndex)
1215
1.45k
          .setMIFlags(MachineInstr::FrameSetup);
1216
1.45k
    }
1217
31.9k
1218
31.9k
    // Now emit the moves for whatever callee saved regs we have (including FP,
1219
31.9k
    // LR if those are saved).
1220
31.9k
    emitCalleeSavedFrameMoves(MBB, MBBI);
1221
31.9k
  }
1222
191k
}
1223
1224
static void InsertReturnAddressAuth(MachineFunction &MF,
1225
365k
                                    MachineBasicBlock &MBB) {
1226
365k
  if (!ShouldSignReturnAddress(MF))
1227
365k
    return;
1228
15
  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1229
15
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1230
15
1231
15
  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1232
15
  DebugLoc DL;
1233
15
  if (MBBI != MBB.end())
1234
15
    DL = MBBI->getDebugLoc();
1235
15
1236
15
  // The AUTIASP instruction assembles to a hint instruction before v8.3a so
1237
15
  // this instruction can safely used for any v8a architecture.
1238
15
  // From v8.3a onwards there are optimised authenticate LR and return
1239
15
  // instructions, namely RETA{A,B}, that can be used instead.
1240
15
  if (Subtarget.hasV8_3aOps() && 
MBBI != MBB.end()2
&&
1241
15
      
MBBI->getOpcode() == AArch64::RET_ReallyLR2
) {
1242
2
    BuildMI(MBB, MBBI, DL,
1243
2
            TII->get(ShouldSignWithAKey(MF) ? 
AArch64::RETAA1
:
AArch64::RETAB1
))
1244
2
        .copyImplicitOps(*MBBI);
1245
2
    MBB.erase(MBBI);
1246
13
  } else {
1247
13
    BuildMI(
1248
13
        MBB, MBBI, DL,
1249
13
        TII->get(ShouldSignWithAKey(MF) ? 
AArch64::AUTIASP9
:
AArch64::AUTIBSP4
))
1250
13
        .setMIFlag(MachineInstr::FrameDestroy);
1251
13
  }
1252
15
}
1253
1254
365k
static bool isFuncletReturnInstr(const MachineInstr &MI) {
1255
365k
  switch (MI.getOpcode()) {
1256
365k
  default:
1257
365k
    return false;
1258
365k
  case AArch64::CATCHRET:
1259
12
  case AArch64::CLEANUPRET:
1260
12
    return true;
1261
365k
  }
1262
365k
}
1263
1264
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
1265
365k
                                        MachineBasicBlock &MBB) const {
1266
365k
  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1267
365k
  MachineFrameInfo &MFI = MF.getFrameInfo();
1268
365k
  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1269
365k
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1270
365k
  DebugLoc DL;
1271
365k
  bool IsTailCallReturn = false;
1272
365k
  bool NeedsWinCFI = needsWinCFI(MF);
1273
365k
  bool HasWinCFI = false;
1274
365k
  bool IsFunclet = false;
1275
365k
  auto WinCFI = make_scope_exit([&]() {
1276
365k
    if (!MF.hasWinCFI())
1277
365k
      MF.setHasWinCFI(HasWinCFI);
1278
365k
  });
1279
365k
1280
365k
  if (MBB.end() != MBBI) {
1281
365k
    DL = MBBI->getDebugLoc();
1282
365k
    unsigned RetOpcode = MBBI->getOpcode();
1283
365k
    IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
1284
365k
                       
RetOpcode == AArch64::TCRETURNri313k
||
1285
365k
                       
RetOpcode == AArch64::TCRETURNriBTI313k
;
1286
365k
    IsFunclet = isFuncletReturnInstr(*MBBI);
1287
365k
  }
1288
365k
1289
365k
  int NumBytes = IsFunclet ? 
(int)getWinEHFuncletFrameSize(MF)12
1290
365k
                           : 
MFI.getStackSize()365k
;
1291
365k
  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1292
365k
1293
365k
  // All calls are tail calls in GHC calling conv, and functions have no
1294
365k
  // prologue/epilogue.
1295
365k
  if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1296
6
    return;
1297
365k
1298
365k
  // Initial and residual are named for consistency with the prologue. Note that
1299
365k
  // in the epilogue, the residual adjustment is executed first.
1300
365k
  uint64_t ArgumentPopSize = 0;
1301
365k
  if (IsTailCallReturn) {
1302
52.8k
    MachineOperand &StackAdjust = MBBI->getOperand(1);
1303
52.8k
1304
52.8k
    // For a tail-call in a callee-pops-arguments environment, some or all of
1305
52.8k
    // the stack may actually be in use for the call's arguments, this is
1306
52.8k
    // calculated during LowerCall and consumed here...
1307
52.8k
    ArgumentPopSize = StackAdjust.getImm();
1308
313k
  } else {
1309
313k
    // ... otherwise the amount to pop is *all* of the argument space,
1310
313k
    // conveniently stored in the MachineFunctionInfo by
1311
313k
    // LowerFormalArguments. This will, of course, be zero for the C calling
1312
313k
    // convention.
1313
313k
    ArgumentPopSize = AFI->getArgumentStackToRestore();
1314
313k
  }
1315
365k
1316
365k
  // The stack frame should be like below,
1317
365k
  //
1318
365k
  //      ----------------------                     ---
1319
365k
  //      |                    |                      |
1320
365k
  //      | BytesInStackArgArea|              CalleeArgStackSize
1321
365k
  //      | (NumReusableBytes) |                (of tail call)
1322
365k
  //      |                    |                     ---
1323
365k
  //      |                    |                      |
1324
365k
  //      ---------------------|        ---           |
1325
365k
  //      |                    |         |            |
1326
365k
  //      |   CalleeSavedReg   |         |            |
1327
365k
  //      | (CalleeSavedStackSize)|      |            |
1328
365k
  //      |                    |         |            |
1329
365k
  //      ---------------------|         |         NumBytes
1330
365k
  //      |                    |     StackSize  (StackAdjustUp)
1331
365k
  //      |   LocalStackSize   |         |            |
1332
365k
  //      | (covering callee   |         |            |
1333
365k
  //      |       args)        |         |            |
1334
365k
  //      |                    |         |            |
1335
365k
  //      ----------------------        ---          ---
1336
365k
  //
1337
365k
  // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
1338
365k
  //             = StackSize + ArgumentPopSize
1339
365k
  //
1340
365k
  // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
1341
365k
  // it as the 2nd argument of AArch64ISD::TC_RETURN.
1342
365k
1343
365k
  auto Cleanup = make_scope_exit([&] { InsertReturnAddressAuth(MF, MBB); });
1344
365k
1345
365k
  bool IsWin64 =
1346
365k
      Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
1347
365k
  // Var args are accounted for in the containing function, so don't
1348
365k
  // include them for funclets.
1349
365k
  unsigned FixedObject =
1350
365k
      (IsWin64 && 
!IsFunclet172
) ?
alignTo(AFI->getVarArgsGPRSize(), 16)160
:
0365k
;
1351
365k
1352
365k
  uint64_t AfterCSRPopSize = ArgumentPopSize;
1353
365k
  auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1354
365k
  // We cannot rely on the local stack size set in emitPrologue if the function
1355
365k
  // has funclets, as funclets have different local stack size requirements, and
1356
365k
  // the current value set in emitPrologue may be that of the containing
1357
365k
  // function.
1358
365k
  if (MF.hasEHFunclets())
1359
27
    AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1360
365k
  bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
1361
365k
  // Assume we can't combine the last pop with the sp restore.
1362
365k
1363
365k
  if (!CombineSPBump && 
PrologueSaveSize != 0296k
) {
1364
188k
    MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
1365
188k
    while (AArch64InstrInfo::isSEHInstruction(*Pop))
1366
57
      Pop = std::prev(Pop);
1367
188k
    // Converting the last ldp to a post-index ldp is valid only if the last
1368
188k
    // ldp's offset is 0.
1369
188k
    const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
1370
188k
    // If the offset is 0, convert it to a post-index ldp.
1371
188k
    if (OffsetOp.getImm() == 0)
1372
188k
      convertCalleeSaveRestoreToSPPrePostIncDec(
1373
188k
          MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, false);
1374
7
    else {
1375
7
      // If not, make sure to emit an add after the last ldp.
1376
7
      // We're doing this by transfering the size to be restored from the
1377
7
      // adjustment *before* the CSR pops to the adjustment *after* the CSR
1378
7
      // pops.
1379
7
      AfterCSRPopSize += PrologueSaveSize;
1380
7
    }
1381
188k
  }
1382
365k
1383
365k
  // Move past the restores of the callee-saved registers.
1384
365k
  // If we plan on combining the sp bump of the local stack size and the callee
1385
365k
  // save stack size, we might need to adjust the CSR save and restore offsets.
1386
365k
  MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
1387
365k
  MachineBasicBlock::iterator Begin = MBB.begin();
1388
1.13M
  while (LastPopI != Begin) {
1389
1.09M
    --LastPopI;
1390
1.09M
    if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
1391
327k
      ++LastPopI;
1392
327k
      break;
1393
770k
    } else if (CombineSPBump)
1394
185k
      fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
1395
185k
                                        NeedsWinCFI, &HasWinCFI);
1396
1.09M
  }
1397
365k
1398
365k
  if (NeedsWinCFI) {
1399
159
    HasWinCFI = true;
1400
159
    BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
1401
159
        .setMIFlag(MachineInstr::FrameDestroy);
1402
159
  }
1403
365k
1404
365k
  // If there is a single SP update, insert it before the ret and we're done.
1405
365k
  if (CombineSPBump) {
1406
69.1k
    emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1407
69.1k
                    NumBytes + AfterCSRPopSize, TII, MachineInstr::FrameDestroy,
1408
69.1k
                    false, NeedsWinCFI, &HasWinCFI);
1409
69.1k
    if (NeedsWinCFI && 
HasWinCFI36
)
1410
36
      BuildMI(MBB, MBB.getFirstTerminator(), DL,
1411
36
              TII->get(AArch64::SEH_EpilogEnd))
1412
36
          .setMIFlag(MachineInstr::FrameDestroy);
1413
69.1k
    return;
1414
69.1k
  }
1415
296k
1416
296k
  NumBytes -= PrologueSaveSize;
1417
296k
  assert(NumBytes >= 0 && "Negative stack allocation size!?");
1418
296k
1419
296k
  if (!hasFP(MF)) {
1420
130k
    bool RedZone = canUseRedZone(MF);
1421
130k
    // If this was a redzone leaf function, we don't need to restore the
1422
130k
    // stack pointer (but we may need to pop stack args for fastcc).
1423
130k
    if (RedZone && 
AfterCSRPopSize == 056
)
1424
53
      return;
1425
130k
1426
130k
    bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1427
130k
    int StackRestoreBytes = RedZone ? 
03
:
NumBytes130k
;
1428
130k
    if (NoCalleeSaveRestore)
1429
107k
      StackRestoreBytes += AfterCSRPopSize;
1430
130k
1431
130k
    // If we were able to combine the local stack pop with the argument pop,
1432
130k
    // then we're done.
1433
130k
    bool Done = NoCalleeSaveRestore || 
AfterCSRPopSize == 022.9k
;
1434
130k
1435
130k
    // If we're done after this, make sure to help the load store optimizer.
1436
130k
    if (Done)
1437
130k
      adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
1438
130k
1439
130k
    emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
1440
130k
                    StackRestoreBytes, TII, MachineInstr::FrameDestroy, false,
1441
130k
                    NeedsWinCFI, &HasWinCFI);
1442
130k
    if (Done) {
1443
130k
      if (NeedsWinCFI) {
1444
94
        HasWinCFI = true;
1445
94
        BuildMI(MBB, MBB.getFirstTerminator(), DL,
1446
94
                TII->get(AArch64::SEH_EpilogEnd))
1447
94
            .setMIFlag(MachineInstr::FrameDestroy);
1448
94
      }
1449
130k
      return;
1450
130k
    }
1451
10
1452
10
    NumBytes = 0;
1453
10
  }
1454
296k
1455
296k
  // Restore the original stack pointer.
1456
296k
  // FIXME: Rather than doing the math here, we should instead just use
1457
296k
  // non-post-indexed loads for the restores if we aren't actually going to
1458
296k
  // be able to save any instructions.
1459
296k
  
if (165k
!IsFunclet165k
&&
(165k
MFI.hasVarSizedObjects()165k
||
AFI->isStackRealigned()165k
))
1460
139
    emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
1461
139
                    -AFI->getCalleeSavedStackSize() + 16, TII,
1462
139
                    MachineInstr::FrameDestroy, false, NeedsWinCFI);
1463
165k
  else if (NumBytes)
1464
8.66k
    emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
1465
8.66k
                    MachineInstr::FrameDestroy, false, NeedsWinCFI);
1466
165k
1467
165k
  // This must be placed after the callee-save restore code because that code
1468
165k
  // assumes the SP is at the same location as it was after the callee-save save
1469
165k
  // code in the prologue.
1470
165k
  if (AfterCSRPopSize) {
1471
10
    // Find an insertion point for the first ldp so that it goes before the
1472
10
    // shadow call stack epilog instruction. This ensures that the restore of
1473
10
    // lr from x18 is placed after the restore from sp.
1474
10
    auto FirstSPPopI = MBB.getFirstTerminator();
1475
13
    while (FirstSPPopI != Begin) {
1476
13
      auto Prev = std::prev(FirstSPPopI);
1477
13
      if (Prev->getOpcode() != AArch64::LDRXpre ||
1478
13
          
Prev->getOperand(0).getReg() == AArch64::SP3
)
1479
10
        break;
1480
3
      FirstSPPopI = Prev;
1481
3
    }
1482
10
1483
10
    adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
1484
10
1485
10
    emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
1486
10
                    AfterCSRPopSize, TII, MachineInstr::FrameDestroy, false,
1487
10
                    NeedsWinCFI, &HasWinCFI);
1488
10
  }
1489
165k
  if (NeedsWinCFI && 
HasWinCFI29
)
1490
29
    BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
1491
29
        .setMIFlag(MachineInstr::FrameDestroy);
1492
165k
1493
165k
  MF.setHasWinCFI(HasWinCFI);
1494
165k
}
1495
1496
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1497
/// debug info.  It's the same as what we use for resolving the code-gen
1498
/// references for now.  FIXME: This can go wrong when references are
1499
/// SP-relative and simple call frames aren't used.
1500
int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
1501
                                                 int FI,
1502
24
                                                 unsigned &FrameReg) const {
1503
24
  return resolveFrameIndexReference(
1504
24
      MF, FI, FrameReg,
1505
24
      /*PreferFP=*/
1506
24
      MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
1507
24
      /*ForSimm=*/false);
1508
24
}
1509
1510
int AArch64FrameLowering::getNonLocalFrameIndexReference(
1511
5
  const MachineFunction &MF, int FI) const {
1512
5
  return getSEHFrameIndexOffset(MF, FI);
1513
5
}
1514
1515
595k
static int getFPOffset(const MachineFunction &MF, int ObjectOffset) {
1516
595k
  const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
1517
595k
  const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1518
595k
  bool IsWin64 =
1519
595k
      Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
1520
595k
  unsigned FixedObject = IsWin64 ? 
alignTo(AFI->getVarArgsGPRSize(), 16)360
:
0594k
;
1521
595k
  return ObjectOffset + FixedObject + 16;
1522
595k
}
1523
1524
595k
static int getStackOffset(const MachineFunction &MF, int ObjectOffset) {
1525
595k
  const auto &MFI = MF.getFrameInfo();
1526
595k
  return ObjectOffset + MFI.getStackSize();
1527
595k
}
1528
1529
int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
1530
5
                                                 int FI) const {
1531
5
  const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
1532
5
      MF.getSubtarget().getRegisterInfo());
1533
5
  int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
1534
5
  return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
1535
5
             ? 
getFPOffset(MF, ObjectOffset)3
1536
5
             : 
getStackOffset(MF, ObjectOffset)2
;
1537
5
}
1538
1539
int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
1540
                                                     int FI, unsigned &FrameReg,
1541
                                                     bool PreferFP,
1542
595k
                                                     bool ForSimm) const {
1543
595k
  const auto &MFI = MF.getFrameInfo();
1544
595k
  int ObjectOffset = MFI.getObjectOffset(FI);
1545
595k
  bool isFixed = MFI.isFixedObjectIndex(FI);
1546
595k
  return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, FrameReg,
1547
595k
                                     PreferFP, ForSimm);
1548
595k
}
1549
1550
int AArch64FrameLowering::resolveFrameOffsetReference(
1551
    const MachineFunction &MF, int ObjectOffset, bool isFixed,
1552
595k
    unsigned &FrameReg, bool PreferFP, bool ForSimm) const {
1553
595k
  const auto &MFI = MF.getFrameInfo();
1554
595k
  const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
1555
595k
      MF.getSubtarget().getRegisterInfo());
1556
595k
  const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
1557
595k
  const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1558
595k
1559
595k
  int FPOffset = getFPOffset(MF, ObjectOffset);
1560
595k
  int Offset = getStackOffset(MF, ObjectOffset);
1561
595k
  bool isCSR =
1562
595k
      !isFixed && 
ObjectOffset >= -((int)AFI->getCalleeSavedStackSize())577k
;
1563
595k
1564
595k
  // Use frame pointer to reference fixed objects. Use it for locals if
1565
595k
  // there are VLAs or a dynamically realigned SP (and thus the SP isn't
1566
595k
  // reliable as a base). Make sure useFPForScavengingIndex() does the
1567
595k
  // right thing for the emergency spill slot.
1568
595k
  bool UseFP = false;
1569
595k
  if (AFI->hasStackFrame()) {
1570
588k
    // Note: Keeping the following as multiple 'if' statements rather than
1571
588k
    // merging to a single expression for readability.
1572
588k
    //
1573
588k
    // Argument access should always use the FP.
1574
588k
    if (isFixed) {
1575
12.4k
      UseFP = hasFP(MF);
1576
575k
    } else if (isCSR && 
RegInfo->needsStackRealignment(MF)115
) {
1577
3
      // References to the CSR area must use FP if we're re-aligning the stack
1578
3
      // since the dynamically-sized alignment padding is between the SP/BP and
1579
3
      // the CSR area.
1580
3
      assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
1581
3
      UseFP = true;
1582
575k
    } else if (hasFP(MF) && 
!RegInfo->needsStackRealignment(MF)518k
) {
1583
517k
      // If the FPOffset is negative and we're producing a signed immediate, we
1584
517k
      // have to keep in mind that the available offset range for negative
1585
517k
      // offsets is smaller than for positive ones. If an offset is available
1586
517k
      // via the FP and the SP, use whichever is closest.
1587
517k
      bool FPOffsetFits = !ForSimm || 
FPOffset >= -256517k
;
1588
517k
      PreferFP |= Offset > -FPOffset;
1589
517k
1590
517k
      if (MFI.hasVarSizedObjects()) {
1591
1.09k
        // If we have variable sized objects, we can use either FP or BP, as the
1592
1.09k
        // SP offset is unknown. We can use the base pointer if we have one and
1593
1.09k
        // FP is not preferred. If not, we're stuck with using FP.
1594
1.09k
        bool CanUseBP = RegInfo->hasBasePointer(MF);
1595
1.09k
        if (FPOffsetFits && 
CanUseBP566
) // Both are ok. Pick the best.
1596
15
          UseFP = PreferFP;
1597
1.08k
        else if (!CanUseBP) // Can't use BP. Forced to use FP.
1598
660
          UseFP = true;
1599
1.09k
        // else we can use BP and FP, but the offset from FP won't fit.
1600
1.09k
        // That will make us scavenge registers which we can probably avoid by
1601
1.09k
        // using BP. If it won't fit for BP either, we'll scavenge anyway.
1602
516k
      } else if (FPOffset >= 0) {
1603
0
        // Use SP or FP, whichever gives us the best chance of the offset
1604
0
        // being in range for direct access. If the FPOffset is positive,
1605
0
        // that'll always be best, as the SP will be even further away.
1606
0
        UseFP = true;
1607
516k
      } else if (MF.hasEHFunclets() && 
!RegInfo->hasBasePointer(MF)56
) {
1608
30
        // Funclets access the locals contained in the parent's stack frame
1609
30
        // via the frame pointer, so we have to use the FP in the parent
1610
30
        // function.
1611
30
        (void) Subtarget;
1612
30
        assert(
1613
30
            Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) &&
1614
30
            "Funclets should only be present on Win64");
1615
30
        UseFP = true;
1616
516k
      } else {
1617
516k
        // We have the choice between FP and (SP or BP).
1618
516k
        if (FPOffsetFits && 
PreferFP239k
) // If FP is the best fit, use it.
1619
47.2k
          UseFP = true;
1620
516k
      }
1621
517k
    }
1622
588k
  }
1623
595k
1624
595k
  assert(((isFixed || isCSR) || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
1625
595k
         "In the presence of dynamic stack pointer realignment, "
1626
595k
         "non-argument/CSR objects cannot be accessed through the frame pointer");
1627
595k
1628
595k
  if (UseFP) {
1629
48.9k
    FrameReg = RegInfo->getFrameRegister(MF);
1630
48.9k
    return FPOffset;
1631
48.9k
  }
1632
546k
1633
546k
  // Use the base pointer if we have one.
1634
546k
  if (RegInfo->hasBasePointer(MF))
1635
1.10k
    FrameReg = RegInfo->getBaseRegister();
1636
545k
  else {
1637
545k
    assert(!MFI.hasVarSizedObjects() &&
1638
545k
           "Can't use SP when we have var sized objects.");
1639
545k
    FrameReg = AArch64::SP;
1640
545k
    // If we're using the red zone for this function, the SP won't actually
1641
545k
    // be adjusted, so the offsets will be negative. They're also all
1642
545k
    // within range of the signed 9-bit immediate instructions.
1643
545k
    if (canUseRedZone(MF))
1644
23
      Offset -= AFI->getLocalStackSize();
1645
545k
  }
1646
546k
1647
546k
  return Offset;
1648
546k
}
1649
1650
1.24M
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
1651
1.24M
  // Do not set a kill flag on values that are also marked as live-in. This
1652
1.24M
  // happens with the @llvm-returnaddress intrinsic and with arguments passed in
1653
1.24M
  // callee saved registers.
1654
1.24M
  // Omitting the kill flags is conservatively correct even if the live-in
1655
1.24M
  // is not used after all.
1656
1.24M
  bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
1657
1.24M
  return getKillRegState(!IsLiveIn);
1658
1.24M
}
1659
1660
1.30M
static bool produceCompactUnwindFrame(MachineFunction &MF) {
1661
1.30M
  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1662
1.30M
  AttributeList Attrs = MF.getFunction().getAttributes();
1663
1.30M
  return Subtarget.isTargetMachO() &&
1664
1.30M
         
!(1.30M
Subtarget.getTargetLowering()->supportSwiftError()1.30M
&&
1665
1.30M
           Attrs.hasAttrSomewhere(Attribute::SwiftError));
1666
1.30M
}
1667
1668
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
1669
1.39M
                                             bool NeedsWinCFI) {
1670
1.39M
  // If we are generating register pairs for a Windows function that requires
1671
1.39M
  // EH support, then pair consecutive registers only.  There are no unwind
1672
1.39M
  // opcodes for saves/restores of non-consectuve register pairs.
1673
1.39M
  // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x.
1674
1.39M
  // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
1675
1.39M
1676
1.39M
  // TODO: LR can be paired with any register.  We don't support this yet in
1677
1.39M
  // the MCLayer.  We need to add support for the save_lrpair unwind code.
1678
1.39M
  if (!NeedsWinCFI)
1679
1.39M
    return false;
1680
192
  if (Reg2 == Reg1 + 1)
1681
154
    return false;
1682
38
  return true;
1683
38
}
1684
1685
namespace {
1686
1687
struct RegPairInfo {
1688
  unsigned Reg1 = AArch64::NoRegister;
1689
  unsigned Reg2 = AArch64::NoRegister;
1690
  int FrameIdx;
1691
  int Offset;
1692
  enum RegType { GPR, FPR64, FPR128 } Type;
1693
1694
1.39M
  RegPairInfo() = default;
1695
1696
5.57M
  bool isPaired() const { return Reg2 != AArch64::NoRegister; }
1697
};
1698
1699
} // end anonymous namespace
1700
1701
static void computeCalleeSaveRegisterPairs(
1702
    MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
1703
    const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
1704
449k
    bool &NeedShadowCallStackProlog) {
1705
449k
1706
449k
  if (CSI.empty())
1707
0
    return;
1708
449k
1709
449k
  bool NeedsWinCFI = needsWinCFI(MF);
1710
449k
  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1711
449k
  MachineFrameInfo &MFI = MF.getFrameInfo();
1712
449k
  CallingConv::ID CC = MF.getFunction().getCallingConv();
1713
449k
  unsigned Count = CSI.size();
1714
449k
  (void)CC;
1715
449k
  // MachO's compact unwind format relies on all registers being stored in
1716
449k
  // pairs.
1717
449k
  assert((!produceCompactUnwindFrame(MF) ||
1718
449k
          CC == CallingConv::PreserveMost ||
1719
449k
          (Count & 1) == 0) &&
1720
449k
         "Odd number of callee-saved regs to spill!");
1721
449k
  int Offset = AFI->getCalleeSavedStackSize();
1722
449k
  // On Linux, we will have either one or zero non-paired register.  On Windows
1723
449k
  // with CFI, we can have multiple unpaired registers in order to utilize the
1724
449k
  // available unwind codes.  This flag assures that the alignment fixup is done
1725
449k
  // only once, as intened.
1726
449k
  bool FixupDone = false;
1727
1.84M
  for (unsigned i = 0; i < Count; 
++i1.39M
) {
1728
1.39M
    RegPairInfo RPI;
1729
1.39M
    RPI.Reg1 = CSI[i].getReg();
1730
1.39M
1731
1.39M
    if (AArch64::GPR64RegClass.contains(RPI.Reg1))
1732
1.36M
      RPI.Type = RegPairInfo::GPR;
1733
29.3k
    else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
1734
29.3k
      RPI.Type = RegPairInfo::FPR64;
1735
19
    else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
1736
19
      RPI.Type = RegPairInfo::FPR128;
1737
19
    else
1738
19
      
llvm_unreachable0
("Unsupported register class.");
1739
1.39M
1740
1.39M
    // Add the next reg to the pair if it is in the same register class.
1741
1.39M
    if (i + 1 < Count) {
1742
1.39M
      unsigned NextReg = CSI[i + 1].getReg();
1743
1.39M
      switch (RPI.Type) {
1744
1.39M
      case RegPairInfo::GPR:
1745
1.36M
        if (AArch64::GPR64RegClass.contains(NextReg) &&
1746
1.36M
            
!invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI)1.36M
)
1747
1.36M
          RPI.Reg2 = NextReg;
1748
1.36M
        break;
1749
1.39M
      case RegPairInfo::FPR64:
1750
29.0k
        if (AArch64::FPR64RegClass.contains(NextReg) &&
1751
29.0k
            !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
1752
29.0k
          RPI.Reg2 = NextReg;
1753
29.0k
        break;
1754
1.39M
      case RegPairInfo::FPR128:
1755
19
        if (AArch64::FPR128RegClass.contains(NextReg))
1756
19
          RPI.Reg2 = NextReg;
1757
19
        break;
1758
1.39M
      }
1759
1.39M
    }
1760
1.39M
1761
1.39M
    // If either of the registers to be saved is the lr register, it means that
1762
1.39M
    // we also need to save lr in the shadow call stack.
1763
1.39M
    if ((RPI.Reg1 == AArch64::LR || 
RPI.Reg2 == AArch64::LR954k
) &&
1764
1.39M
        
MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)439k
) {
1765
6
      if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18))
1766
0
        report_fatal_error("Must reserve x18 to use shadow call stack");
1767
6
      NeedShadowCallStackProlog = true;
1768
6
    }
1769
1.39M
1770
1.39M
    // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
1771
1.39M
    // list to come in sorted by frame index so that we can issue the store
1772
1.39M
    // pair instructions directly. Assert if we see anything otherwise.
1773
1.39M
    //
1774
1.39M
    // The order of the registers in the list is controlled by
1775
1.39M
    // getCalleeSavedRegs(), so they will always be in-order, as well.
1776
1.39M
    assert((!RPI.isPaired() ||
1777
1.39M
            (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
1778
1.39M
           "Out of order callee saved regs!");
1779
1.39M
1780
1.39M
    // MachO's compact unwind format relies on all registers being stored in
1781
1.39M
    // adjacent register pairs.
1782
1.39M
    assert((!produceCompactUnwindFrame(MF) ||
1783
1.39M
            CC == CallingConv::PreserveMost ||
1784
1.39M
            (RPI.isPaired() &&
1785
1.39M
             ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
1786
1.39M
              RPI.Reg1 + 1 == RPI.Reg2))) &&
1787
1.39M
           "Callee-save registers not saved as adjacent register pair!");
1788
1.39M
1789
1.39M
    RPI.FrameIdx = CSI[i].getFrameIdx();
1790
1.39M
1791
1.39M
    int Scale = RPI.Type == RegPairInfo::FPR128 ? 
1619
:
81.39M
;
1792
1.39M
    Offset -= RPI.isPaired() ? 
2 * Scale1.39M
:
Scale2.24k
;
1793
1.39M
1794
1.39M
    // Round up size of non-pair to pair size if we need to pad the
1795
1.39M
    // callee-save area to ensure 16-byte alignment.
1796
1.39M
    if (AFI->hasCalleeSaveStackFreeSpace() && 
!FixupDone2.37k
&&
1797
1.39M
        
RPI.Type != RegPairInfo::FPR1282.25k
&&
!RPI.isPaired()2.25k
) {
1798
1.57k
      FixupDone = true;
1799
1.57k
      Offset -= 8;
1800
1.57k
      assert(Offset % 16 == 0);
1801
1.57k
      assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
1802
1.57k
      MFI.setObjectAlignment(RPI.FrameIdx, 16);
1803
1.57k
    }
1804
1.39M
1805
1.39M
    assert(Offset % Scale == 0);
1806
1.39M
    RPI.Offset = Offset / Scale;
1807
1.39M
    assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
1808
1.39M
           "Offset out of bounds for LDP/STP immediate");
1809
1.39M
1810
1.39M
    RegPairs.push_back(RPI);
1811
1.39M
    if (RPI.isPaired())
1812
1.39M
      ++i;
1813
1.39M
  }
1814
449k
}
1815
1816
bool AArch64FrameLowering::spillCalleeSavedRegisters(
1817
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1818
    const std::vector<CalleeSavedInfo> &CSI,
1819
191k
    const TargetRegisterInfo *TRI) const {
1820
191k
  MachineFunction &MF = *MBB.getParent();
1821
191k
  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1822
191k
  bool NeedsWinCFI = needsWinCFI(MF);
1823
191k
  DebugLoc DL;
1824
191k
  SmallVector<RegPairInfo, 8> RegPairs;
1825
191k
1826
191k
  bool NeedShadowCallStackProlog = false;
1827
191k
  computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
1828
191k
                                 NeedShadowCallStackProlog);
1829
191k
  const MachineRegisterInfo &MRI = MF.getRegInfo();
1830
191k
1831
191k
  if (NeedShadowCallStackProlog) {
1832
3
    // Shadow call stack prolog: str x30, [x18], #8
1833
3
    BuildMI(MBB, MI, DL, TII.get(AArch64::STRXpost))
1834
3
        .addReg(AArch64::X18, RegState::Define)
1835
3
        .addReg(AArch64::LR)
1836
3
        .addReg(AArch64::X18)
1837
3
        .addImm(8)
1838
3
        .setMIFlag(MachineInstr::FrameSetup);
1839
3
1840
3
    if (NeedsWinCFI)
1841
0
      BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop))
1842
0
          .setMIFlag(MachineInstr::FrameSetup);
1843
3
1844
3
    if (!MF.getFunction().hasFnAttribute(Attribute::NoUnwind)) {
1845
2
      // Emit a CFI instruction that causes 8 to be subtracted from the value of
1846
2
      // x18 when unwinding past this frame.
1847
2
      static const char CFIInst[] = {
1848
2
          dwarf::DW_CFA_val_expression,
1849
2
          18, // register
1850
2
          2,  // length
1851
2
          static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
1852
2
          static_cast<char>(-8) & 0x7f, // addend (sleb128)
1853
2
      };
1854
2
      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
1855
2
          nullptr, StringRef(CFIInst, sizeof(CFIInst))));
1856
2
      BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
1857
2
          .addCFIIndex(CFIIndex)
1858
2
          .setMIFlag(MachineInstr::FrameSetup);
1859
2
    }
1860
3
1861
3
    // This instruction also makes x18 live-in to the entry block.
1862
3
    MBB.addLiveIn(AArch64::X18);
1863
3
  }
1864
191k
1865
815k
  for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
1866
623k
       ++RPII) {
1867
623k
    RegPairInfo RPI = *RPII;
1868
623k
    unsigned Reg1 = RPI.Reg1;
1869
623k
    unsigned Reg2 = RPI.Reg2;
1870
623k
    unsigned StrOpc;
1871
623k
1872
623k
    // Issue sequence of spills for cs regs.  The first spill may be converted
1873
623k
    // to a pre-decrement store later by emitPrologue if the callee-save stack
1874
623k
    // area allocation can't be combined with the local stack area allocation.
1875
623k
    // For example:
1876
623k
    //    stp     x22, x21, [sp, #0]     // addImm(+0)
1877
623k
    //    stp     x20, x19, [sp, #16]    // addImm(+2)
1878
623k
    //    stp     fp, lr, [sp, #32]      // addImm(+4)
1879
623k
    // Rationale: This sequence saves uop updates compared to a sequence of
1880
623k
    // pre-increment spills like stp xi,xj,[sp,#-16]!
1881
623k
    // Note: Similar rationale and sequence for restores in epilog.
1882
623k
    unsigned Size, Align;
1883
623k
    switch (RPI.Type) {
1884
623k
    case RegPairInfo::GPR:
1885
611k
       StrOpc = RPI.isPaired() ? 
AArch64::STPXi610k
:
AArch64::STRXui949
;
1886
611k
       Size = 8;
1887
611k
       Align = 8;
1888
611k
       break;
1889
623k
    case RegPairInfo::FPR64:
1890
12.3k
       StrOpc = RPI.isPaired() ? 
AArch64::STPDi12.1k
:
AArch64::STRDui163
;
1891
12.3k
       Size = 8;
1892
12.3k
       Align = 8;
1893
12.3k
       break;
1894
623k
    case RegPairInfo::FPR128:
1895
19
       StrOpc = RPI.isPaired() ? AArch64::STPQi : 
AArch64::STRQui0
;
1896
19
       Size = 16;
1897
19
       Align = 16;
1898
19
       break;
1899
623k
    }
1900
623k
    LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
1901
623k
               if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
1902
623k
               dbgs() << ") -> fi#(" << RPI.FrameIdx;
1903
623k
               if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
1904
623k
               dbgs() << ")\n");
1905
623k
1906
623k
    assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
1907
623k
           "Windows unwdinding requires a consecutive (FP,LR) pair");
1908
623k
    // Windows unwind codes require consecutive registers if registers are
1909
623k
    // paired.  Make the switch here, so that the code below will save (x,x+1)
1910
623k
    // and not (x+1,x).
1911
623k
    unsigned FrameIdxReg1 = RPI.FrameIdx;
1912
623k
    unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
1913
623k
    if (NeedsWinCFI && 
RPI.isPaired()149
) {
1914
77
      std::swap(Reg1, Reg2);
1915
77
      std::swap(FrameIdxReg1, FrameIdxReg2);
1916
77
    }
1917
623k
    MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
1918
623k
    if (!MRI.isReserved(Reg1))
1919
623k
      MBB.addLiveIn(Reg1);
1920
623k
    if (RPI.isPaired()) {
1921
622k
      if (!MRI.isReserved(Reg2))
1922
432k
        MBB.addLiveIn(Reg2);
1923
622k
      MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
1924
622k
      MIB.addMemOperand(MF.getMachineMemOperand(
1925
622k
          MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
1926
622k
          MachineMemOperand::MOStore, Size, Align));
1927
622k
    }
1928
623k
    MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
1929
623k
        .addReg(AArch64::SP)
1930
623k
        .addImm(RPI.Offset) // [sp, #offset*scale],
1931
623k
                            // where factor*scale is implicit
1932
623k
        .setMIFlag(MachineInstr::FrameSetup);
1933
623k
    MIB.addMemOperand(MF.getMachineMemOperand(
1934
623k
        MachinePointerInfo::getFixedStack(MF,FrameIdxReg1),
1935
623k
        MachineMemOperand::MOStore, Size, Align));
1936
623k
    if (NeedsWinCFI)
1937
149
      InsertSEH(MIB, TII, MachineInstr::FrameSetup);
1938
623k
1939
623k
  }
1940
191k
  return true;
1941
191k
}
1942
1943
bool AArch64FrameLowering::restoreCalleeSavedRegisters(
1944
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1945
    std::vector<CalleeSavedInfo> &CSI,
1946
257k
    const TargetRegisterInfo *TRI) const {
1947
257k
  MachineFunction &MF = *MBB.getParent();
1948
257k
  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1949
257k
  DebugLoc DL;
1950
257k
  SmallVector<RegPairInfo, 8> RegPairs;
1951
257k
  bool NeedsWinCFI = needsWinCFI(MF);
1952
257k
1953
257k
  if (MI != MBB.end())
1954
257k
    DL = MI->getDebugLoc();
1955
257k
1956
257k
  bool NeedShadowCallStackProlog = false;
1957
257k
  computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
1958
257k
                                 NeedShadowCallStackProlog);
1959
257k
1960
769k
  auto EmitMI = [&](const RegPairInfo &RPI) {
1961
769k
    unsigned Reg1 = RPI.Reg1;
1962
769k
    unsigned Reg2 = RPI.Reg2;
1963
769k
1964
769k
    // Issue sequence of restores for cs regs. The last restore may be converted
1965
769k
    // to a post-increment load later by emitEpilogue if the callee-save stack
1966
769k
    // area allocation can't be combined with the local stack area allocation.
1967
769k
    // For example:
1968
769k
    //    ldp     fp, lr, [sp, #32]       // addImm(+4)
1969
769k
    //    ldp     x20, x19, [sp, #16]     // addImm(+2)
1970
769k
    //    ldp     x22, x21, [sp, #0]      // addImm(+0)
1971
769k
    // Note: see comment in spillCalleeSavedRegisters()
1972
769k
    unsigned LdrOpc;
1973
769k
    unsigned Size, Align;
1974
769k
    switch (RPI.Type) {
1975
769k
    case RegPairInfo::GPR:
1976
752k
       LdrOpc = RPI.isPaired() ? 
AArch64::LDPXi751k
:
AArch64::LDRXui973
;
1977
752k
       Size = 8;
1978
752k
       Align = 8;
1979
752k
       break;
1980
769k
    case RegPairInfo::FPR64:
1981
17.0k
       LdrOpc = RPI.isPaired() ? 
AArch64::LDPDi16.8k
:
AArch64::LDRDui164
;
1982
17.0k
       Size = 8;
1983
17.0k
       Align = 8;
1984
17.0k
       break;
1985
769k
    case RegPairInfo::FPR128:
1986
0
       LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
1987
0
       Size = 16;
1988
0
       Align = 16;
1989
0
       break;
1990
769k
    }
1991
769k
    LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
1992
769k
               if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
1993
769k
               dbgs() << ") -> fi#(" << RPI.FrameIdx;
1994
769k
               if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
1995
769k
               dbgs() << ")\n");
1996
769k
1997
769k
    // Windows unwind codes require consecutive registers if registers are
1998
769k
    // paired.  Make the switch here, so that the code below will save (x,x+1)
1999
769k
    // and not (x+1,x).
2000
769k
    unsigned FrameIdxReg1 = RPI.FrameIdx;
2001
769k
    unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
2002
769k
    if (NeedsWinCFI && 
RPI.isPaired()151
) {
2003
77
      std::swap(Reg1, Reg2);
2004
77
      std::swap(FrameIdxReg1, FrameIdxReg2);
2005
77
    }
2006
769k
    MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
2007
769k
    if (RPI.isPaired()) {
2008
768k
      MIB.addReg(Reg2, getDefRegState(true));
2009
768k
      MIB.addMemOperand(MF.getMachineMemOperand(
2010
768k
          MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
2011
768k
          MachineMemOperand::MOLoad, Size, Align));
2012
768k
    }
2013
769k
    MIB.addReg(Reg1, getDefRegState(true))
2014
769k
        .addReg(AArch64::SP)
2015
769k
        .addImm(RPI.Offset) // [sp, #offset*scale]
2016
769k
                            // where factor*scale is implicit
2017
769k
        .setMIFlag(MachineInstr::FrameDestroy);
2018
769k
    MIB.addMemOperand(MF.getMachineMemOperand(
2019
769k
        MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
2020
769k
        MachineMemOperand::MOLoad, Size, Align));
2021
769k
    if (NeedsWinCFI)
2022
151
      InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
2023
769k
  };
2024
257k
  if (ReverseCSRRestoreSeq)
2025
6
    for (const RegPairInfo &RPI : reverse(RegPairs))
2026
20
      EmitMI(RPI);
2027
257k
  else
2028
257k
    for (const RegPairInfo &RPI : RegPairs)
2029
769k
      EmitMI(RPI);
2030
257k
2031
257k
  if (NeedShadowCallStackProlog) {
2032
3
    // Shadow call stack epilog: ldr x30, [x18, #-8]!
2033
3
    BuildMI(MBB, MI, DL, TII.get(AArch64::LDRXpre))
2034
3
        .addReg(AArch64::X18, RegState::Define)
2035
3
        .addReg(AArch64::LR, RegState::Define)
2036
3
        .addReg(AArch64::X18)
2037
3
        .addImm(-8)
2038
3
        .setMIFlag(MachineInstr::FrameDestroy);
2039
3
  }
2040
257k
2041
257k
  return true;
2042
257k
}
2043
2044
void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
2045
                                                BitVector &SavedRegs,
2046
279k
                                                RegScavenger *RS) const {
2047
279k
  // All calls are tail calls in GHC calling conv, and functions have no
2048
279k
  // prologue/epilogue.
2049
279k
  if (MF.getFunction().getCallingConv() == CallingConv::GHC)
2050
6
    return;
2051
279k
2052
279k
  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2053
279k
  const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
2054
279k
      MF.getSubtarget().getRegisterInfo());
2055
279k
  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
2056
279k
  unsigned UnspilledCSGPR = AArch64::NoRegister;
2057
279k
  unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
2058
279k
2059
279k
  MachineFrameInfo &MFI = MF.getFrameInfo();
2060
279k
  const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
2061
279k
2062
279k
  unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
2063
279k
                                ? 
RegInfo->getBaseRegister()42
2064
279k
                                : 
(unsigned)AArch64::NoRegister279k
;
2065
279k
2066
279k
  unsigned ExtraCSSpill = 0;
2067
279k
  // Figure out which callee-saved registers to save/restore.
2068
5.87M
  for (unsigned i = 0; CSRegs[i]; 
++i5.59M
) {
2069
5.59M
    const unsigned Reg = CSRegs[i];
2070
5.59M
2071
5.59M
    // Add the base pointer register to SavedRegs if it is callee-save.
2072
5.59M
    if (Reg == BasePointerReg)
2073
42
      SavedRegs.set(Reg);
2074
5.59M
2075
5.59M
    bool RegUsed = SavedRegs.test(Reg);
2076
5.59M
    unsigned PairedReg = CSRegs[i ^ 1];
2077
5.59M
    if (!RegUsed) {
2078
4.29M
      if (AArch64::GPR64RegClass.contains(Reg) &&
2079
4.29M
          
!RegInfo->isReservedReg(MF, Reg)2.08M
) {
2080
2.01M
        UnspilledCSGPR = Reg;
2081
2.01M
        UnspilledCSGPRPaired = PairedReg;
2082
2.01M
      }
2083
4.29M
      continue;
2084
4.29M
    }
2085
1.30M
2086
1.30M
    // MachO's compact unwind format relies on all registers being stored in
2087
1.30M
    // pairs.
2088
1.30M
    // FIXME: the usual format is actually better if unwinding isn't needed.
2089
1.30M
    if (produceCompactUnwindFrame(MF) && 
PairedReg != AArch64::NoRegister1.30M
&&
2090
1.30M
        
!SavedRegs.test(PairedReg)1.30M
) {
2091
239k
      SavedRegs.set(PairedReg);
2092
239k
      if (AArch64::GPR64RegClass.contains(PairedReg) &&
2093
239k
          
!RegInfo->isReservedReg(MF, PairedReg)235k
)
2094
44.8k
        ExtraCSSpill = PairedReg;
2095
239k
    }
2096
1.30M
  }
2097
279k
2098
279k
  // Calculates the callee saved stack size.
2099
279k
  unsigned CSStackSize = 0;
2100
279k
  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2101
279k
  const MachineRegisterInfo &MRI = MF.getRegInfo();
2102
279k
  for (unsigned Reg : SavedRegs.set_bits())
2103
1.30M
    CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8;
2104
279k
2105
279k
  // Save number of saved regs, so we can easily update CSStackSize later.
2106
279k
  unsigned NumSavedRegs = SavedRegs.count();
2107
279k
2108
279k
  // The frame record needs to be created by saving the appropriate registers
2109
279k
  unsigned EstimatedStackSize = MFI.estimateStackSize(MF);
2110
279k
  if (hasFP(MF) ||
2111
279k
      
windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)91.6k
) {
2112
188k
    SavedRegs.set(AArch64::FP);
2113
188k
    SavedRegs.set(AArch64::LR);
2114
188k
  }
2115
279k
2116
279k
  LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
2117
279k
             for (unsigned Reg
2118
279k
                  : SavedRegs.set_bits()) dbgs()
2119
279k
             << ' ' << printReg(Reg, RegInfo);
2120
279k
             dbgs() << "\n";);
2121
279k
2122
279k
  // If any callee-saved registers are used, the frame cannot be eliminated.
2123
279k
  bool CanEliminateFrame = SavedRegs.count() == 0;
2124
279k
2125
279k
  // The CSR spill slots have not been allocated yet, so estimateStackSize
2126
279k
  // won't include them.
2127
279k
  unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
2128
279k
  bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
2129
279k
  if (BigStack || 
!CanEliminateFrame269k
||
RegInfo->cannotEliminateFrame(MF)81.4k
)
2130
198k
    AFI->setHasStackFrame(true);
2131
279k
2132
279k
  // Estimate if we might need to scavenge a register at some point in order
2133
279k
  // to materialize a stack offset. If so, either spill one additional
2134
279k
  // callee-saved register or reserve a special spill slot to facilitate
2135
279k
  // register scavenging. If we already spilled an extra callee-saved register
2136
279k
  // above to keep the number of spills even, we don't need to do anything else
2137
279k
  // here.
2138
279k
  if (BigStack) {
2139
10.6k
    if (!ExtraCSSpill && 
UnspilledCSGPR != AArch64::NoRegister7.43k
) {
2140
984
      LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
2141
984
                        << " to get a scratch register.\n");
2142
984
      SavedRegs.set(UnspilledCSGPR);
2143
984
      // MachO's compact unwind format relies on all registers being stored in
2144
984
      // pairs, so if we need to spill one extra for BigStack, then we need to
2145
984
      // store the pair.
2146
984
      if (produceCompactUnwindFrame(MF))
2147
923
        SavedRegs.set(UnspilledCSGPRPaired);
2148
984
      ExtraCSSpill = UnspilledCSGPRPaired;
2149
984
    }
2150
10.6k
2151
10.6k
    // If we didn't find an extra callee-saved register to spill, create
2152
10.6k
    // an emergency spill slot.
2153
10.6k
    if (!ExtraCSSpill || 
MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)4.24k
) {
2154
6.45k
      const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2155
6.45k
      const TargetRegisterClass &RC = AArch64::GPR64RegClass;
2156
6.45k
      unsigned Size = TRI->getSpillSize(RC);
2157
6.45k
      unsigned Align = TRI->getSpillAlignment(RC);
2158
6.45k
      int FI = MFI.CreateStackObject(Size, Align, false);
2159
6.45k
      RS->addScavengingFrameIndex(FI);
2160
6.45k
      LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
2161
6.45k
                        << " as the emergency spill slot.\n");
2162
6.45k
    }
2163
10.6k
  }
2164
279k
2165
279k
  // Adding the size of additional 64bit GPR saves.
2166
279k
  CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
2167
279k
  unsigned AlignedCSStackSize = alignTo(CSStackSize, 16);
2168
279k
  LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
2169
279k
               << EstimatedStackSize + AlignedCSStackSize
2170
279k
               << " bytes.\n");
2171
279k
2172
279k
  // Round up to register pair alignment to avoid additional SP adjustment
2173
279k
  // instructions.
2174
279k
  AFI->setCalleeSavedStackSize(AlignedCSStackSize);
2175
279k
  AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
2176
279k
}
2177
2178
bool AArch64FrameLowering::enableStackSlotScavenging(
2179
32.7k
    const MachineFunction &MF) const {
2180
32.7k
  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
2181
32.7k
  return AFI->hasCalleeSaveStackFreeSpace();
2182
32.7k
}
2183
2184
void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
2185
257k
    MachineFunction &MF, RegScavenger *RS) const {
2186
257k
  // If this function isn't doing Win64-style C++ EH, we don't need to do
2187
257k
  // anything.
2188
257k
  if (!MF.hasEHFunclets())
2189
257k
    return;
2190
15
  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2191
15
  MachineFrameInfo &MFI = MF.getFrameInfo();
2192
15
  WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
2193
15
2194
15
  MachineBasicBlock &MBB = MF.front();
2195
15
  auto MBBI = MBB.begin();
2196
65
  while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
2197
50
    ++MBBI;
2198
15
2199
15
  // Create an UnwindHelp object.
2200
15
  int UnwindHelpFI =
2201
15
      MFI.CreateStackObject(/*size*/8, /*alignment*/16, false);
2202
15
  EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
2203
15
  // We need to store -2 into the UnwindHelp object at the start of the
2204
15
  // function.
2205
15
  DebugLoc DL;
2206
15
  RS->enterBasicBlockEnd(MBB);
2207
15
  RS->backward(std::prev(MBBI));
2208
15
  unsigned DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
2209
15
  assert(DstReg && "There must be a free register after frame setup");
2210
15
  BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
2211
15
  BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
2212
15
      .addReg(DstReg, getKillRegState(true))
2213
15
      .addFrameIndex(UnwindHelpFI)
2214
15
      .addImm(0);
2215
15
}
2216
2217
/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before
2218
/// the update.  This is easily retrieved as it is exactly the offset that is set
2219
/// in processFunctionBeforeFrameFinalized.
2220
int AArch64FrameLowering::getFrameIndexReferencePreferSP(
2221
    const MachineFunction &MF, int FI, unsigned &FrameReg,
2222
10
    bool IgnoreSPUpdates) const {
2223
10
  const MachineFrameInfo &MFI = MF.getFrameInfo();
2224
10
  LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
2225
10
                    << MFI.getObjectOffset(FI) << "\n");
2226
10
  FrameReg = AArch64::SP;
2227
10
  return MFI.getObjectOffset(FI);
2228
10
}
2229
2230
/// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
2231
/// the parent's frame pointer
2232
unsigned AArch64FrameLowering::getWinEHParentFrameOffset(
2233
7
    const MachineFunction &MF) const {
2234
7
  return 0;
2235
7
}
2236
2237
/// Funclets only need to account for space for the callee saved registers,
2238
/// as the locals are accounted for in the parent's stack frame.
2239
unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
2240
24
    const MachineFunction &MF) const {
2241
24
  // This is the size of the pushed CSRs.
2242
24
  unsigned CSSize =
2243
24
      MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize();
2244
24
  // This is the amount of stack a funclet needs to allocate.
2245
24
  return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
2246
24
                 getStackAlignment());
2247
24
}