Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/X86/X86FrameLowering.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains the X86 implementation of TargetFrameLowering class.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "X86FrameLowering.h"
14
#include "X86InstrBuilder.h"
15
#include "X86InstrInfo.h"
16
#include "X86MachineFunctionInfo.h"
17
#include "X86Subtarget.h"
18
#include "X86TargetMachine.h"
19
#include "llvm/ADT/SmallSet.h"
20
#include "llvm/Analysis/EHPersonalities.h"
21
#include "llvm/CodeGen/MachineFrameInfo.h"
22
#include "llvm/CodeGen/MachineFunction.h"
23
#include "llvm/CodeGen/MachineInstrBuilder.h"
24
#include "llvm/CodeGen/MachineModuleInfo.h"
25
#include "llvm/CodeGen/MachineRegisterInfo.h"
26
#include "llvm/CodeGen/WinEHFuncInfo.h"
27
#include "llvm/IR/DataLayout.h"
28
#include "llvm/IR/Function.h"
29
#include "llvm/MC/MCAsmInfo.h"
30
#include "llvm/MC/MCSymbol.h"
31
#include "llvm/Support/Debug.h"
32
#include "llvm/Target/TargetOptions.h"
33
#include <cstdlib>
34
35
using namespace llvm;
36
37
X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
38
                                   unsigned StackAlignOverride)
39
    : TargetFrameLowering(StackGrowsDown, StackAlignOverride,
40
                          STI.is64Bit() ? -8 : -4),
41
15.2k
      STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
42
15.2k
  // Cache a bunch of frame-related predicates for this subtarget.
43
15.2k
  SlotSize = TRI->getSlotSize();
44
15.2k
  Is64Bit = STI.is64Bit();
45
15.2k
  IsLP64 = STI.isTarget64BitLP64();
46
15.2k
  // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
47
15.2k
  Uses64BitFramePtr = STI.isTarget64BitLP64() || 
STI.isTargetNaCl64()3.23k
;
48
15.2k
  StackPtr = TRI->getStackRegister();
49
15.2k
}
50
51
569k
bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
52
569k
  return !MF.getFrameInfo().hasVarSizedObjects() &&
53
569k
         
!MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences()563k
;
54
569k
}
55
56
/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
57
/// call frame pseudos can be simplified.  Having a FP, as in the default
58
/// implementation, is not sufficient here since we can't always use it.
59
/// Use a more nuanced condition.
60
bool
61
272k
X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
62
272k
  return hasReservedCallFrame(MF) ||
63
272k
         
(68.9k
hasFP(MF)68.9k
&&
!TRI->needsStackRealignment(MF)66.6k
) ||
64
272k
         
TRI->hasBasePointer(MF)3.10k
;
65
272k
}
66
67
// needsFrameIndexResolution - Do we need to perform FI resolution for
68
// this function. Normally, this is required only when the function
69
// has any stack objects. However, FI resolution actually has another job,
70
// not apparent from the title - it resolves callframesetup/destroy
71
// that were not simplified earlier.
72
// So, this is required for x86 functions that have push sequences even
73
// when there are no stack objects.
74
bool
75
137k
X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
76
137k
  return MF.getFrameInfo().hasStackObjects() ||
77
137k
         
MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences()93.6k
;
78
137k
}
79
80
/// hasFP - Return true if the specified function should have a dedicated frame
81
/// pointer register.  This is true if the function has variable sized allocas
82
/// or if frame pointer elimination is disabled.
83
2.37M
bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
84
2.37M
  const MachineFrameInfo &MFI = MF.getFrameInfo();
85
2.37M
  return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
86
2.37M
          
TRI->needsStackRealignment(MF)1.61M
||
87
2.37M
          
MFI.hasVarSizedObjects()1.57M
||
88
2.37M
          
MFI.isFrameAddressTaken()1.57M
||
MFI.hasOpaqueSPAdjustment()1.57M
||
89
2.37M
          
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer()1.56M
||
90
2.37M
          
MF.callsUnwindInit()1.56M
||
MF.hasEHFunclets()1.56M
||
MF.callsEHReturn()1.56M
||
91
2.37M
          
MFI.hasStackMap()1.56M
||
MFI.hasPatchPoint()1.56M
||
92
2.37M
          
MFI.hasCopyImplyingStackAdjustment()1.56M
);
93
2.37M
}
94
95
24.0k
static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
96
24.0k
  if (IsLP64) {
97
10.1k
    if (isInt<8>(Imm))
98
8.06k
      return X86::SUB64ri8;
99
2.05k
    return X86::SUB64ri32;
100
13.9k
  } else {
101
13.9k
    if (isInt<8>(Imm))
102
13.3k
      return X86::SUB32ri8;
103
527
    return X86::SUB32ri;
104
527
  }
105
24.0k
}
106
107
36.4k
static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
108
36.4k
  if (IsLP64) {
109
17.3k
    if (isInt<8>(Imm))
110
14.6k
      return X86::ADD64ri8;
111
2.72k
    return X86::ADD64ri32;
112
19.0k
  } else {
113
19.0k
    if (isInt<8>(Imm))
114
18.3k
      return X86::ADD32ri8;
115
751
    return X86::ADD32ri;
116
751
  }
117
36.4k
}
118
119
60
static unsigned getSUBrrOpcode(unsigned isLP64) {
120
60
  return isLP64 ? 
X86::SUB64rr55
:
X86::SUB32rr5
;
121
60
}
122
123
9
static unsigned getADDrrOpcode(unsigned isLP64) {
124
9
  return isLP64 ? 
X86::ADD64rr6
:
X86::ADD32rr3
;
125
9
}
126
127
1.45k
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
128
1.45k
  if (IsLP64) {
129
324
    if (isInt<8>(Imm))
130
318
      return X86::AND64ri8;
131
6
    return X86::AND64ri32;
132
6
  }
133
1.12k
  if (isInt<8>(Imm))
134
1.12k
    return X86::AND32ri8;
135
0
  return X86::AND32ri;
136
0
}
137
138
473
static unsigned getLEArOpcode(unsigned IsLP64) {
139
473
  return IsLP64 ? 
X86::LEA64r132
:
X86::LEA32r341
;
140
473
}
141
142
/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
143
/// when it reaches the "return" instruction. We can then pop a stack object
144
/// to this register without worry about clobbering it.
145
static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
146
                                       MachineBasicBlock::iterator &MBBI,
147
                                       const X86RegisterInfo *TRI,
148
7.46k
                                       bool Is64Bit) {
149
7.46k
  const MachineFunction *MF = MBB.getParent();
150
7.46k
  if (MF->callsEHReturn())
151
1
    return 0;
152
7.46k
153
7.46k
  const TargetRegisterClass &AvailableRegs = *TRI->getGPRsForTailCall(*MF);
154
7.46k
155
7.46k
  if (MBBI == MBB.end())
156
7
    return 0;
157
7.46k
158
7.46k
  switch (MBBI->getOpcode()) {
159
7.46k
  
default: return 05.10k
;
160
7.46k
  case TargetOpcode::PATCHABLE_RET:
161
2.35k
  case X86::RET:
162
2.35k
  case X86::RETL:
163
2.35k
  case X86::RETQ:
164
2.35k
  case X86::RETIL:
165
2.35k
  case X86::RETIQ:
166
2.35k
  case X86::TCRETURNdi:
167
2.35k
  case X86::TCRETURNri:
168
2.35k
  case X86::TCRETURNmi:
169
2.35k
  case X86::TCRETURNdi64:
170
2.35k
  case X86::TCRETURNri64:
171
2.35k
  case X86::TCRETURNmi64:
172
2.35k
  case X86::EH_RETURN:
173
2.35k
  case X86::EH_RETURN64: {
174
2.35k
    SmallSet<uint16_t, 8> Uses;
175
6.08k
    for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; 
++i3.72k
) {
176
3.72k
      MachineOperand &MO = MBBI->getOperand(i);
177
3.72k
      if (!MO.isReg() || 
MO.isDef()1.59k
)
178
2.13k
        continue;
179
1.59k
      unsigned Reg = MO.getReg();
180
1.59k
      if (!Reg)
181
0
        continue;
182
15.8k
      
for (MCRegAliasIterator AI(Reg, TRI, true); 1.59k
AI.isValid();
++AI14.2k
)
183
14.2k
        Uses.insert(*AI);
184
1.59k
    }
185
2.35k
186
2.35k
    for (auto CS : AvailableRegs)
187
3.42k
      if (!Uses.count(CS) && 
CS != X86::RIP2.35k
&&
CS != X86::RSP2.34k
&&
188
3.42k
          
CS != X86::ESP2.34k
)
189
2.34k
        return CS;
190
2.35k
  }
191
7.46k
  }
192
7.46k
193
7.46k
  
return 07
;
194
7.46k
}
195
196
62
static bool isEAXLiveIn(MachineBasicBlock &MBB) {
197
62
  for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) {
198
29
    unsigned Reg = RegMask.PhysReg;
199
29
200
29
    if (Reg == X86::RAX || Reg == X86::EAX || 
Reg == X86::AX25
||
201
29
        
Reg == X86::AH25
||
Reg == X86::AL25
)
202
6
      return true;
203
29
  }
204
62
205
62
  
return false56
;
206
62
}
207
208
/// Check if the flags need to be preserved before the terminators.
209
/// This would be the case, if the eflags is live-in of the region
210
/// composed by the terminators or live-out of that region, without
211
/// being defined by a terminator.
212
static bool
213
19.9k
flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) {
214
19.9k
  for (const MachineInstr &MI : MBB.terminators()) {
215
19.9k
    bool BreakNext = false;
216
50.8k
    for (const MachineOperand &MO : MI.operands()) {
217
50.8k
      if (!MO.isReg())
218
26.5k
        continue;
219
24.2k
      unsigned Reg = MO.getReg();
220
24.2k
      if (Reg != X86::EFLAGS)
221
24.2k
        continue;
222
0
223
0
      // This terminator needs an eflags that is not defined
224
0
      // by a previous another terminator:
225
0
      // EFLAGS is live-in of the region composed by the terminators.
226
0
      if (!MO.isDef())
227
0
        return true;
228
0
      // This terminator defines the eflags, i.e., we don't need to preserve it.
229
0
      // However, we still need to check this specific terminator does not
230
0
      // read a live-in value.
231
0
      BreakNext = true;
232
0
    }
233
19.9k
    // We found a definition of the eflags, no need to preserve them.
234
19.9k
    if (BreakNext)
235
0
      return false;
236
19.9k
  }
237
19.9k
238
19.9k
  // None of the terminators use or define the eflags.
239
19.9k
  // Check if they are live-out, that would imply we need to preserve them.
240
19.9k
  for (const MachineBasicBlock *Succ : MBB.successors())
241
131
    if (Succ->isLiveIn(X86::EFLAGS))
242
0
      return true;
243
19.9k
244
19.9k
  return false;
245
19.9k
}
246
247
/// emitSPUpdate - Emit a series of instructions to increment / decrement the
248
/// stack pointer by a constant value.
249
void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
250
                                    MachineBasicBlock::iterator &MBBI,
251
                                    const DebugLoc &DL,
252
44.4k
                                    int64_t NumBytes, bool InEpilogue) const {
253
44.4k
  bool isSub = NumBytes < 0;
254
44.4k
  uint64_t Offset = isSub ? 
-NumBytes21.5k
:
NumBytes22.9k
;
255
44.4k
  MachineInstr::MIFlag Flag =
256
44.4k
      isSub ? 
MachineInstr::FrameSetup21.5k
:
MachineInstr::FrameDestroy22.9k
;
257
44.4k
258
44.4k
  uint64_t Chunk = (1LL << 31) - 1;
259
44.4k
260
44.4k
  if (Offset > Chunk) {
261
18
    // Rather than emit a long series of instructions for large offsets,
262
18
    // load the offset into a register and do one sub/add
263
18
    unsigned Reg = 0;
264
18
    unsigned Rax = (unsigned)(Is64Bit ? 
X86::RAX12
:
X86::EAX6
);
265
18
266
18
    if (isSub && 
!isEAXLiveIn(MBB)9
)
267
6
      Reg = Rax;
268
12
    else
269
12
      Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
270
18
271
18
    unsigned MovRIOpc = Is64Bit ? 
X86::MOV64ri12
:
X86::MOV32ri6
;
272
18
    unsigned AddSubRROpc =
273
18
        isSub ? 
getSUBrrOpcode(Is64Bit)9
:
getADDrrOpcode(Is64Bit)9
;
274
18
    if (Reg) {
275
10
      BuildMI(MBB, MBBI, DL, TII.get(MovRIOpc), Reg)
276
10
          .addImm(Offset)
277
10
          .setMIFlag(Flag);
278
10
      MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
279
10
                             .addReg(StackPtr)
280
10
                             .addReg(Reg);
281
10
      MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
282
10
      return;
283
10
    } else 
if (8
Offset > 8 * Chunk8
) {
284
2
      // If we would need more than 8 add or sub instructions (a >16GB stack
285
2
      // frame), it's worth spilling RAX to materialize this immediate.
286
2
      //   pushq %rax
287
2
      //   movabsq +-$Offset+-SlotSize, %rax
288
2
      //   addq %rsp, %rax
289
2
      //   xchg %rax, (%rsp)
290
2
      //   movq (%rsp), %rsp
291
2
      assert(Is64Bit && "can't have 32-bit 16GB stack frame");
292
2
      BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
293
2
          .addReg(Rax, RegState::Kill)
294
2
          .setMIFlag(Flag);
295
2
      // Subtract is not commutative, so negate the offset and always use add.
296
2
      // Subtract 8 less and add 8 more to account for the PUSH we just did.
297
2
      if (isSub)
298
1
        Offset = -(Offset - SlotSize);
299
1
      else
300
1
        Offset = Offset + SlotSize;
301
2
      BuildMI(MBB, MBBI, DL, TII.get(MovRIOpc), Rax)
302
2
          .addImm(Offset)
303
2
          .setMIFlag(Flag);
304
2
      MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
305
2
                             .addReg(Rax)
306
2
                             .addReg(StackPtr);
307
2
      MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
308
2
      // Exchange the new SP in RAX with the top of the stack.
309
2
      addRegOffset(
310
2
          BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
311
2
          StackPtr, false, 0);
312
2
      // Load new SP from the top of the stack into RSP.
313
2
      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
314
2
                   StackPtr, false, 0);
315
2
      return;
316
2
    }
317
44.4k
  }
318
44.4k
319
88.9k
  
while (44.4k
Offset) {
320
44.4k
    uint64_t ThisVal = std::min(Offset, Chunk);
321
44.4k
    if (ThisVal == SlotSize) {
322
14.0k
      // Use push / pop for slot sized adjustments as a size optimization. We
323
14.0k
      // need to find a dead register when using pop.
324
14.0k
      unsigned Reg = isSub
325
14.0k
        ? 
(unsigned)(Is64Bit 6.57k
?
X86::RAX5.74k
:
X86::EAX831
)
326
14.0k
        : 
findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit)7.45k
;
327
14.0k
      if (Reg) {
328
8.91k
        unsigned Opc = isSub
329
8.91k
          ? 
(Is64Bit 6.57k
?
X86::PUSH64r5.74k
:
X86::PUSH32r829
)
330
8.91k
          : 
(Is64Bit 2.34k
?
X86::POP64r1.94k
:
X86::POP32r393
);
331
8.91k
        BuildMI(MBB, MBBI, DL, TII.get(Opc))
332
8.91k
            .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
333
8.91k
            .setMIFlag(Flag);
334
8.91k
        Offset -= ThisVal;
335
8.91k
        continue;
336
8.91k
      }
337
35.5k
    }
338
35.5k
339
35.5k
    BuildStackAdjustment(MBB, MBBI, DL, isSub ? 
-ThisVal15.0k
:
ThisVal20.5k
, InEpilogue)
340
35.5k
        .setMIFlag(Flag);
341
35.5k
342
35.5k
    Offset -= ThisVal;
343
35.5k
  }
344
44.4k
}
345
346
MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
347
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
348
60.4k
    const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
349
60.4k
  assert(Offset != 0 && "zero offset stack adjustment requested");
350
60.4k
351
60.4k
  // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
352
60.4k
  // is tricky.
353
60.4k
  bool UseLEA;
354
60.4k
  if (!InEpilogue) {
355
39.9k
    // Check if inserting the prologue at the beginning
356
39.9k
    // of MBB would require to use LEA operations.
357
39.9k
    // We need to use LEA operations if EFLAGS is live in, because
358
39.9k
    // it means an instruction will read it before it gets defined.
359
39.9k
    UseLEA = STI.useLeaForSP() || 
MBB.isLiveIn(X86::EFLAGS)39.9k
;
360
39.9k
  } else {
361
20.5k
    // If we can use LEA for SP but we shouldn't, check that none
362
20.5k
    // of the terminators uses the eflags. Otherwise we will insert
363
20.5k
    // a ADD that will redefine the eflags and break the condition.
364
20.5k
    // Alternatively, we could move the ADD, but this may not be possible
365
20.5k
    // and is an optimization anyway.
366
20.5k
    UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent());
367
20.5k
    if (UseLEA && 
!STI.useLeaForSP()19.9k
)
368
19.9k
      UseLEA = flagsNeedToBePreservedBeforeTheTerminators(MBB);
369
20.5k
    // If that assert breaks, that means we do not do the right thing
370
20.5k
    // in canUseAsEpilogue.
371
20.5k
    assert((UseLEA || !flagsNeedToBePreservedBeforeTheTerminators(MBB)) &&
372
20.5k
           "We shouldn't have allowed this insertion point");
373
20.5k
  }
374
60.4k
375
60.4k
  MachineInstrBuilder MI;
376
60.4k
  if (UseLEA) {
377
35
    MI = addRegOffset(BuildMI(MBB, MBBI, DL,
378
35
                              TII.get(getLEArOpcode(Uses64BitFramePtr)),
379
35
                              StackPtr),
380
35
                      StackPtr, false, Offset);
381
60.4k
  } else {
382
60.4k
    bool IsSub = Offset < 0;
383
60.4k
    uint64_t AbsOffset = IsSub ? 
-Offset24.0k
:
Offset36.4k
;
384
60.4k
    unsigned Opc = IsSub ? 
getSUBriOpcode(Uses64BitFramePtr, AbsOffset)24.0k
385
60.4k
                         : 
getADDriOpcode(Uses64BitFramePtr, AbsOffset)36.4k
;
386
60.4k
    MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
387
60.4k
             .addReg(StackPtr)
388
60.4k
             .addImm(AbsOffset);
389
60.4k
    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
390
60.4k
  }
391
60.4k
  return MI;
392
60.4k
}
393
394
int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
395
                                     MachineBasicBlock::iterator &MBBI,
396
212k
                                     bool doMergeWithPrevious) const {
397
212k
  if ((doMergeWithPrevious && 
MBBI == MBB.begin()187k
) ||
398
212k
      
(92.2k
!doMergeWithPrevious92.2k
&&
MBBI == MBB.end()24.8k
))
399
124k
    return 0;
400
88.1k
401
88.1k
  MachineBasicBlock::iterator PI = doMergeWithPrevious ? 
std::prev(MBBI)67.3k
:
MBBI20.7k
;
402
88.1k
403
88.1k
  PI = skipDebugInstructionsBackward(PI, MBB.begin());
404
88.1k
  // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
405
88.1k
  // instruction, and that there are no DBG_VALUE or other instructions between
406
88.1k
  // ADD/SUB/LEA and its corresponding CFI instruction.
407
88.1k
  /* TODO: Add support for the case where there are multiple CFI instructions
408
88.1k
    below the ADD/SUB/LEA, e.g.:
409
88.1k
    ...
410
88.1k
    add
411
88.1k
    cfi_def_cfa_offset
412
88.1k
    cfi_offset
413
88.1k
    ...
414
88.1k
  */
415
88.1k
  if (doMergeWithPrevious && 
PI != MBB.begin()67.3k
&&
PI->isCFIInstruction()60.3k
)
416
9.48k
    PI = std::prev(PI);
417
88.1k
418
88.1k
  unsigned Opc = PI->getOpcode();
419
88.1k
  int Offset = 0;
420
88.1k
421
88.1k
  if ((Opc == X86::ADD64ri32 || 
Opc == X86::ADD64ri888.1k
||
422
88.1k
       
Opc == X86::ADD32ri87.8k
||
Opc == X86::ADD32ri887.8k
) &&
423
88.1k
      
PI->getOperand(0).getReg() == StackPtr2.93k
){
424
2.78k
    assert(PI->getOperand(1).getReg() == StackPtr);
425
2.78k
    Offset = PI->getOperand(2).getImm();
426
85.3k
  } else if ((Opc == X86::LEA32r || 
Opc == X86::LEA64_32r80.3k
) &&
427
85.3k
             
PI->getOperand(0).getReg() == StackPtr4.98k
&&
428
85.3k
             
PI->getOperand(1).getReg() == StackPtr12
&&
429
85.3k
             
PI->getOperand(2).getImm() == 11
&&
430
85.3k
             
PI->getOperand(3).getReg() == X86::NoRegister1
&&
431
85.3k
             
PI->getOperand(5).getReg() == X86::NoRegister1
) {
432
1
    // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
433
1
    Offset = PI->getOperand(4).getImm();
434
85.3k
  } else if ((Opc == X86::SUB64ri32 || 
Opc == X86::SUB64ri885.3k
||
435
85.3k
              
Opc == X86::SUB32ri85.3k
||
Opc == X86::SUB32ri885.3k
) &&
436
85.3k
             
PI->getOperand(0).getReg() == StackPtr108
) {
437
107
    assert(PI->getOperand(1).getReg() == StackPtr);
438
107
    Offset = -PI->getOperand(2).getImm();
439
107
  } else
440
85.2k
    return 0;
441
2.88k
442
2.88k
  PI = MBB.erase(PI);
443
2.88k
  if (PI != MBB.end() && 
PI->isCFIInstruction()2.88k
)
PI = MBB.erase(PI)157
;
444
2.88k
  if (!doMergeWithPrevious)
445
145
    MBBI = skipDebugInstructionsForward(PI, MBB.end());
446
2.88k
447
2.88k
  return Offset;
448
2.88k
}
449
450
void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB,
451
                                MachineBasicBlock::iterator MBBI,
452
                                const DebugLoc &DL,
453
137k
                                const MCCFIInstruction &CFIInst) const {
454
137k
  MachineFunction &MF = *MBB.getParent();
455
137k
  unsigned CFIIndex = MF.addFrameInst(CFIInst);
456
137k
  BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
457
137k
      .addCFIIndex(CFIIndex);
458
137k
}
459
460
void X86FrameLowering::emitCalleeSavedFrameMoves(
461
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
462
18.9k
    const DebugLoc &DL) const {
463
18.9k
  MachineFunction &MF = *MBB.getParent();
464
18.9k
  MachineFrameInfo &MFI = MF.getFrameInfo();
465
18.9k
  MachineModuleInfo &MMI = MF.getMMI();
466
18.9k
  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
467
18.9k
468
18.9k
  // Add callee saved registers to move list.
469
18.9k
  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
470
18.9k
  if (CSI.empty()) 
return2.90k
;
471
16.0k
472
16.0k
  // Calculate offsets.
473
16.0k
  for (std::vector<CalleeSavedInfo>::const_iterator
474
68.3k
         I = CSI.begin(), E = CSI.end(); I != E; 
++I52.2k
) {
475
52.2k
    int64_t Offset = MFI.getObjectOffset(I->getFrameIdx());
476
52.2k
    unsigned Reg = I->getReg();
477
52.2k
478
52.2k
    unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
479
52.2k
    BuildCFI(MBB, MBBI, DL,
480
52.2k
             MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
481
52.2k
  }
482
16.0k
}
483
484
void X86FrameLowering::emitStackProbe(MachineFunction &MF,
485
                                      MachineBasicBlock &MBB,
486
                                      MachineBasicBlock::iterator MBBI,
487
124
                                      const DebugLoc &DL, bool InProlog) const {
488
124
  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
489
124
  if (STI.isTargetWindowsCoreCLR()) {
490
8
    if (InProlog) {
491
7
      emitStackProbeInlineStub(MF, MBB, MBBI, DL, true);
492
7
    } else {
493
1
      emitStackProbeInline(MF, MBB, MBBI, DL, false);
494
1
    }
495
116
  } else {
496
116
    emitStackProbeCall(MF, MBB, MBBI, DL, InProlog);
497
116
  }
498
124
}
499
500
void X86FrameLowering::inlineStackProbe(MachineFunction &MF,
501
137k
                                        MachineBasicBlock &PrologMBB) const {
502
137k
  const StringRef ChkStkStubSymbol = "__chkstk_stub";
503
137k
  MachineInstr *ChkStkStub = nullptr;
504
137k
505
1.19M
  for (MachineInstr &MI : PrologMBB) {
506
1.19M
    if (MI.isCall() && 
MI.getOperand(0).isSymbol()33.8k
&&
507
1.19M
        
ChkStkStubSymbol == MI.getOperand(0).getSymbolName()2.34k
) {
508
7
      ChkStkStub = &MI;
509
7
      break;
510
7
    }
511
1.19M
  }
512
137k
513
137k
  if (ChkStkStub != nullptr) {
514
7
    assert(!ChkStkStub->isBundled() &&
515
7
           "Not expecting bundled instructions here");
516
7
    MachineBasicBlock::iterator MBBI = std::next(ChkStkStub->getIterator());
517
7
    assert(std::prev(MBBI) == ChkStkStub &&
518
7
           "MBBI expected after __chkstk_stub.");
519
7
    DebugLoc DL = PrologMBB.findDebugLoc(MBBI);
520
7
    emitStackProbeInline(MF, PrologMBB, MBBI, DL, true);
521
7
    ChkStkStub->eraseFromParent();
522
7
  }
523
137k
}
524
525
void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
526
                                            MachineBasicBlock &MBB,
527
                                            MachineBasicBlock::iterator MBBI,
528
                                            const DebugLoc &DL,
529
8
                                            bool InProlog) const {
530
8
  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
531
8
  assert(STI.is64Bit() && "different expansion needed for 32 bit");
532
8
  assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
533
8
  const TargetInstrInfo &TII = *STI.getInstrInfo();
534
8
  const BasicBlock *LLVM_BB = MBB.getBasicBlock();
535
8
536
8
  // RAX contains the number of bytes of desired stack adjustment.
537
8
  // The handling here assumes this value has already been updated so as to
538
8
  // maintain stack alignment.
539
8
  //
540
8
  // We need to exit with RSP modified by this amount and execute suitable
541
8
  // page touches to notify the OS that we're growing the stack responsibly.
542
8
  // All stack probing must be done without modifying RSP.
543
8
  //
544
8
  // MBB:
545
8
  //    SizeReg = RAX;
546
8
  //    ZeroReg = 0
547
8
  //    CopyReg = RSP
548
8
  //    Flags, TestReg = CopyReg - SizeReg
549
8
  //    FinalReg = !Flags.Ovf ? TestReg : ZeroReg
550
8
  //    LimitReg = gs magic thread env access
551
8
  //    if FinalReg >= LimitReg goto ContinueMBB
552
8
  // RoundBB:
553
8
  //    RoundReg = page address of FinalReg
554
8
  // LoopMBB:
555
8
  //    LoopReg = PHI(LimitReg,ProbeReg)
556
8
  //    ProbeReg = LoopReg - PageSize
557
8
  //    [ProbeReg] = 0
558
8
  //    if (ProbeReg > RoundReg) goto LoopMBB
559
8
  // ContinueMBB:
560
8
  //    RSP = RSP - RAX
561
8
  //    [rest of original MBB]
562
8
563
8
  // Set up the new basic blocks
564
8
  MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
565
8
  MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
566
8
  MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
567
8
568
8
  MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
569
8
  MF.insert(MBBIter, RoundMBB);
570
8
  MF.insert(MBBIter, LoopMBB);
571
8
  MF.insert(MBBIter, ContinueMBB);
572
8
573
8
  // Split MBB and move the tail portion down to ContinueMBB.
574
8
  MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
575
8
  ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
576
8
  ContinueMBB->transferSuccessorsAndUpdatePHIs(&MBB);
577
8
578
8
  // Some useful constants
579
8
  const int64_t ThreadEnvironmentStackLimit = 0x10;
580
8
  const int64_t PageSize = 0x1000;
581
8
  const int64_t PageMask = ~(PageSize - 1);
582
8
583
8
  // Registers we need. For the normal case we use virtual
584
8
  // registers. For the prolog expansion we use RAX, RCX and RDX.
585
8
  MachineRegisterInfo &MRI = MF.getRegInfo();
586
8
  const TargetRegisterClass *RegClass = &X86::GR64RegClass;
587
8
  const Register SizeReg = InProlog ? 
X86::RAX7
588
8
                                    : 
MRI.createVirtualRegister(RegClass)1
,
589
8
                 ZeroReg = InProlog ? 
X86::RCX7
590
8
                                    : 
MRI.createVirtualRegister(RegClass)1
,
591
8
                 CopyReg = InProlog ? 
X86::RDX7
592
8
                                    : 
MRI.createVirtualRegister(RegClass)1
,
593
8
                 TestReg = InProlog ? 
X86::RDX7
594
8
                                    : 
MRI.createVirtualRegister(RegClass)1
,
595
8
                 FinalReg = InProlog ? 
X86::RDX7
596
8
                                     : 
MRI.createVirtualRegister(RegClass)1
,
597
8
                 RoundedReg = InProlog ? 
X86::RDX7
598
8
                                       : 
MRI.createVirtualRegister(RegClass)1
,
599
8
                 LimitReg = InProlog ? 
X86::RCX7
600
8
                                     : 
MRI.createVirtualRegister(RegClass)1
,
601
8
                 JoinReg = InProlog ? 
X86::RCX7
602
8
                                    : 
MRI.createVirtualRegister(RegClass)1
,
603
8
                 ProbeReg = InProlog ? 
X86::RCX7
604
8
                                     : 
MRI.createVirtualRegister(RegClass)1
;
605
8
606
8
  // SP-relative offsets where we can save RCX and RDX.
607
8
  int64_t RCXShadowSlot = 0;
608
8
  int64_t RDXShadowSlot = 0;
609
8
610
8
  // If inlining in the prolog, save RCX and RDX.
611
8
  if (InProlog) {
612
7
    // Compute the offsets. We need to account for things already
613
7
    // pushed onto the stack at this point: return address, frame
614
7
    // pointer (if used), and callee saves.
615
7
    X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
616
7
    const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
617
7
    const bool HasFP = hasFP(MF);
618
7
619
7
    // Check if we need to spill RCX and/or RDX.
620
7
    // Here we assume that no earlier prologue instruction changes RCX and/or
621
7
    // RDX, so checking the block live-ins is enough.
622
7
    const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
623
7
    const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
624
7
    int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 
81
:
06
);
625
7
    // Assign the initial slot to both registers, then change RDX's slot if both
626
7
    // need to be spilled.
627
7
    if (IsRCXLiveIn)
628
0
      RCXShadowSlot = InitSlot;
629
7
    if (IsRDXLiveIn)
630
0
      RDXShadowSlot = InitSlot;
631
7
    if (IsRDXLiveIn && 
IsRCXLiveIn0
)
632
0
      RDXShadowSlot += 8;
633
7
    // Emit the saves if needed.
634
7
    if (IsRCXLiveIn)
635
0
      addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
636
0
                   RCXShadowSlot)
637
0
          .addReg(X86::RCX);
638
7
    if (IsRDXLiveIn)
639
0
      addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
640
0
                   RDXShadowSlot)
641
0
          .addReg(X86::RDX);
642
7
  } else {
643
1
    // Not in the prolog. Copy RAX to a virtual reg.
644
1
    BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
645
1
  }
646
8
647
8
  // Add code to MBB to check for overflow and set the new target stack pointer
648
8
  // to zero if so.
649
8
  BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
650
8
      .addReg(ZeroReg, RegState::Undef)
651
8
      .addReg(ZeroReg, RegState::Undef);
652
8
  BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
653
8
  BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
654
8
      .addReg(CopyReg)
655
8
      .addReg(SizeReg);
656
8
  BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
657
8
      .addReg(TestReg)
658
8
      .addReg(ZeroReg)
659
8
      .addImm(X86::COND_B);
660
8
661
8
  // FinalReg now holds final stack pointer value, or zero if
662
8
  // allocation would overflow. Compare against the current stack
663
8
  // limit from the thread environment block. Note this limit is the
664
8
  // lowest touched page on the stack, not the point at which the OS
665
8
  // will cause an overflow exception, so this is just an optimization
666
8
  // to avoid unnecessarily touching pages that are below the current
667
8
  // SP but already committed to the stack by the OS.
668
8
  BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
669
8
      .addReg(0)
670
8
      .addImm(1)
671
8
      .addReg(0)
672
8
      .addImm(ThreadEnvironmentStackLimit)
673
8
      .addReg(X86::GS);
674
8
  BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
675
8
  // Jump if the desired stack pointer is at or above the stack limit.
676
8
  BuildMI(&MBB, DL, TII.get(X86::JCC_1)).addMBB(ContinueMBB).addImm(X86::COND_AE);
677
8
678
8
  // Add code to roundMBB to round the final stack pointer to a page boundary.
679
8
  RoundMBB->addLiveIn(FinalReg);
680
8
  BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
681
8
      .addReg(FinalReg)
682
8
      .addImm(PageMask);
683
8
  BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
684
8
685
8
  // LimitReg now holds the current stack limit, RoundedReg page-rounded
686
8
  // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
687
8
  // and probe until we reach RoundedReg.
688
8
  if (!InProlog) {
689
1
    BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
690
1
        .addReg(LimitReg)
691
1
        .addMBB(RoundMBB)
692
1
        .addReg(ProbeReg)
693
1
        .addMBB(LoopMBB);
694
1
  }
695
8
696
8
  LoopMBB->addLiveIn(JoinReg);
697
8
  addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
698
8
               false, -PageSize);
699
8
700
8
  // Probe by storing a byte onto the stack.
701
8
  BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
702
8
      .addReg(ProbeReg)
703
8
      .addImm(1)
704
8
      .addReg(0)
705
8
      .addImm(0)
706
8
      .addReg(0)
707
8
      .addImm(0);
708
8
709
8
  LoopMBB->addLiveIn(RoundedReg);
710
8
  BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
711
8
      .addReg(RoundedReg)
712
8
      .addReg(ProbeReg);
713
8
  BuildMI(LoopMBB, DL, TII.get(X86::JCC_1)).addMBB(LoopMBB).addImm(X86::COND_NE);
714
8
715
8
  MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
716
8
717
8
  // If in prolog, restore RDX and RCX.
718
8
  if (InProlog) {
719
7
    if (RCXShadowSlot) // It means we spilled RCX in the prologue.
720
0
      addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
721
0
                           TII.get(X86::MOV64rm), X86::RCX),
722
0
                   X86::RSP, false, RCXShadowSlot);
723
7
    if (RDXShadowSlot) // It means we spilled RDX in the prologue.
724
0
      addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
725
0
                           TII.get(X86::MOV64rm), X86::RDX),
726
0
                   X86::RSP, false, RDXShadowSlot);
727
7
  }
728
8
729
8
  // Now that the probing is done, add code to continueMBB to update
730
8
  // the stack pointer for real.
731
8
  ContinueMBB->addLiveIn(SizeReg);
732
8
  BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
733
8
      .addReg(X86::RSP)
734
8
      .addReg(SizeReg);
735
8
736
8
  // Add the control flow edges we need.
737
8
  MBB.addSuccessor(ContinueMBB);
738
8
  MBB.addSuccessor(RoundMBB);
739
8
  RoundMBB->addSuccessor(LoopMBB);
740
8
  LoopMBB->addSuccessor(ContinueMBB);
741
8
  LoopMBB->addSuccessor(LoopMBB);
742
8
743
8
  // Mark all the instructions added to the prolog as frame setup.
744
8
  if (InProlog) {
745
56
    for (++BeforeMBBI; BeforeMBBI != MBB.end(); 
++BeforeMBBI49
) {
746
49
      BeforeMBBI->setFlag(MachineInstr::FrameSetup);
747
49
    }
748
14
    for (MachineInstr &MI : *RoundMBB) {
749
14
      MI.setFlag(MachineInstr::FrameSetup);
750
14
    }
751
28
    for (MachineInstr &MI : *LoopMBB) {
752
28
      MI.setFlag(MachineInstr::FrameSetup);
753
28
    }
754
7
    for (MachineBasicBlock::iterator CMBBI = ContinueMBB->begin();
755
14
         CMBBI != ContinueMBBI; 
++CMBBI7
) {
756
7
      CMBBI->setFlag(MachineInstr::FrameSetup);
757
7
    }
758
7
  }
759
8
}
760
761
void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
762
                                          MachineBasicBlock &MBB,
763
                                          MachineBasicBlock::iterator MBBI,
764
                                          const DebugLoc &DL,
765
116
                                          bool InProlog) const {
766
116
  bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
767
116
768
116
  // FIXME: Add retpoline support and remove this.
769
116
  if (Is64Bit && 
IsLargeCodeModel50
&&
STI.useRetpolineIndirectCalls()7
)
770
0
    report_fatal_error("Emitting stack probe calls on 64-bit with the large "
771
0
                       "code model and retpoline not yet implemented.");
772
116
773
116
  unsigned CallOp;
774
116
  if (Is64Bit)
775
50
    CallOp = IsLargeCodeModel ? 
X86::CALL64r7
:
X86::CALL64pcrel3243
;
776
66
  else
777
66
    CallOp = X86::CALLpcrel32;
778
116
779
116
  StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF);
780
116
781
116
  MachineInstrBuilder CI;
782
116
  MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
783
116
784
116
  // All current stack probes take AX and SP as input, clobber flags, and
785
116
  // preserve all registers. x86_64 probes leave RSP unmodified.
786
116
  if (Is64Bit && 
MF.getTarget().getCodeModel() == CodeModel::Large50
) {
787
7
    // For the large code model, we have to call through a register. Use R11,
788
7
    // as it is scratch in all supported calling conventions.
789
7
    BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
790
7
        .addExternalSymbol(MF.createExternalSymbolName(Symbol));
791
7
    CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
792
109
  } else {
793
109
    CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
794
109
        .addExternalSymbol(MF.createExternalSymbolName(Symbol));
795
109
  }
796
116
797
116
  unsigned AX = Uses64BitFramePtr ? 
X86::RAX49
:
X86::EAX67
;
798
116
  unsigned SP = Uses64BitFramePtr ? 
X86::RSP49
:
X86::ESP67
;
799
116
  CI.addReg(AX, RegState::Implicit)
800
116
      .addReg(SP, RegState::Implicit)
801
116
      .addReg(AX, RegState::Define | RegState::Implicit)
802
116
      .addReg(SP, RegState::Define | RegState::Implicit)
803
116
      .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
804
116
805
116
  if (STI.isTargetWin64() || 
!STI.isOSWindows()70
) {
806
51
    // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
807
51
    // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
808
51
    // themselves. They also does not clobber %rax so we can reuse it when
809
51
    // adjusting %rsp.
810
51
    // All other platforms do not specify a particular ABI for the stack probe
811
51
    // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
812
51
    BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Uses64BitFramePtr)), SP)
813
51
        .addReg(SP)
814
51
        .addReg(AX);
815
51
  }
816
116
817
116
  if (InProlog) {
818
46
    // Apply the frame setup flag to all inserted instrs.
819
124
    for (++ExpansionMBBI; ExpansionMBBI != MBBI; 
++ExpansionMBBI78
)
820
78
      ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
821
46
  }
822
116
}
823
824
void X86FrameLowering::emitStackProbeInlineStub(
825
    MachineFunction &MF, MachineBasicBlock &MBB,
826
7
    MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
827
7
828
7
  assert(InProlog && "ChkStkStub called outside prolog!");
829
7
830
7
  BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
831
7
      .addExternalSymbol("__chkstk_stub");
832
7
}
833
834
4.36k
static unsigned calculateSetFPREG(uint64_t SPAdjust) {
835
4.36k
  // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
836
4.36k
  // and might require smaller successive adjustments.
837
4.36k
  const uint64_t Win64MaxSEHOffset = 128;
838
4.36k
  uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
839
4.36k
  // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
840
4.36k
  return SEHFrameOffset & -16;
841
4.36k
}
842
843
// If we're forcing a stack realignment we can't rely on just the frame
844
// info, we need to know the ABI stack alignment as well in case we
845
// have a call out.  Otherwise just make sure we have some alignment - we'll
846
// go with the minimum SlotSize.
847
279k
uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
848
279k
  const MachineFrameInfo &MFI = MF.getFrameInfo();
849
279k
  uint64_t MaxAlign = MFI.getMaxAlignment(); // Desired stack alignment.
850
279k
  unsigned StackAlign = getStackAlignment();
851
279k
  if (MF.getFunction().hasFnAttribute("stackrealign")) {
852
102
    if (MFI.hasCalls())
853
82
      MaxAlign = (StackAlign > MaxAlign) ? 
StackAlign70
:
MaxAlign12
;
854
20
    else if (MaxAlign < SlotSize)
855
2
      MaxAlign = SlotSize;
856
102
  }
857
279k
  return MaxAlign;
858
279k
}
859
860
void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
861
                                          MachineBasicBlock::iterator MBBI,
862
                                          const DebugLoc &DL, unsigned Reg,
863
1.45k
                                          uint64_t MaxAlign) const {
864
1.45k
  uint64_t Val = -MaxAlign;
865
1.45k
  unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
866
1.45k
  MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
867
1.45k
                         .addReg(Reg)
868
1.45k
                         .addImm(Val)
869
1.45k
                         .setMIFlag(MachineInstr::FrameSetup);
870
1.45k
871
1.45k
  // The EFLAGS implicit def is dead.
872
1.45k
  MI->getOperand(3).setIsDead();
873
1.45k
}
874
875
137k
bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
876
137k
  // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
877
137k
  // clobbered by any interrupt handler.
878
137k
  assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
879
137k
         "MF used frame lowering for wrong subtarget");
880
137k
  const Function &Fn = MF.getFunction();
881
137k
  const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
882
137k
  return Is64Bit && 
!IsWin64CC110k
&&
!Fn.hasFnAttribute(Attribute::NoRedZone)109k
;
883
137k
}
884
885
886
/// emitPrologue - Push callee-saved registers onto the stack, which
887
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
888
/// space for local variables. Also emit labels used by the exception handler to
889
/// generate the exception handling frames.
890
891
/*
892
  Here's a gist of what gets emitted:
893
894
  ; Establish frame pointer, if needed
895
  [if needs FP]
896
      push  %rbp
897
      .cfi_def_cfa_offset 16
898
      .cfi_offset %rbp, -16
899
      .seh_pushreg %rpb
900
      mov  %rsp, %rbp
901
      .cfi_def_cfa_register %rbp
902
903
  ; Spill general-purpose registers
904
  [for all callee-saved GPRs]
905
      pushq %<reg>
906
      [if not needs FP]
907
         .cfi_def_cfa_offset (offset from RETADDR)
908
      .seh_pushreg %<reg>
909
910
  ; If the required stack alignment > default stack alignment
911
  ; rsp needs to be re-aligned.  This creates a "re-alignment gap"
912
  ; of unknown size in the stack frame.
913
  [if stack needs re-alignment]
914
      and  $MASK, %rsp
915
916
  ; Allocate space for locals
917
  [if target is Windows and allocated space > 4096 bytes]
918
      ; Windows needs special care for allocations larger
919
      ; than one page.
920
      mov $NNN, %rax
921
      call ___chkstk_ms/___chkstk
922
      sub  %rax, %rsp
923
  [else]
924
      sub  $NNN, %rsp
925
926
  [if needs FP]
927
      .seh_stackalloc (size of XMM spill slots)
928
      .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
929
  [else]
930
      .seh_stackalloc NNN
931
932
  ; Spill XMMs
933
  ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
934
  ; they may get spilled on any platform, if the current function
935
  ; calls @llvm.eh.unwind.init
936
  [if needs FP]
937
      [for all callee-saved XMM registers]
938
          movaps  %<xmm reg>, -MMM(%rbp)
939
      [for all callee-saved XMM registers]
940
          .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
941
              ; i.e. the offset relative to (%rbp - SEHFrameOffset)
942
  [else]
943
      [for all callee-saved XMM registers]
944
          movaps  %<xmm reg>, KKK(%rsp)
945
      [for all callee-saved XMM registers]
946
          .seh_savexmm %<xmm reg>, KKK
947
948
  .seh_endprologue
949
950
  [if needs base pointer]
951
      mov  %rsp, %rbx
952
      [if needs to restore base pointer]
953
          mov %rsp, -MMM(%rbp)
954
955
  ; Emit CFI info
956
  [if needs FP]
957
      [for all callee-saved registers]
958
          .cfi_offset %<reg>, (offset from %rbp)
959
  [else]
960
       .cfi_def_cfa_offset (offset from RETADDR)
961
      [for all callee-saved registers]
962
          .cfi_offset %<reg>, (offset from %rsp)
963
964
  Notes:
965
  - .seh directives are emitted only for Windows 64 ABI
966
  - .cv_fpo directives are emitted on win32 when emitting CodeView
967
  - .cfi directives are emitted for all other ABIs
968
  - for 32-bit code, substitute %e?? registers for %r??
969
*/
970
971
void X86FrameLowering::emitPrologue(MachineFunction &MF,
972
137k
                                    MachineBasicBlock &MBB) const {
973
137k
  assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
974
137k
         "MF used frame lowering for wrong subtarget");
975
137k
  MachineBasicBlock::iterator MBBI = MBB.begin();
976
137k
  MachineFrameInfo &MFI = MF.getFrameInfo();
977
137k
  const Function &Fn = MF.getFunction();
978
137k
  MachineModuleInfo &MMI = MF.getMMI();
979
137k
  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
980
137k
  uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
981
137k
  uint64_t StackSize = MFI.getStackSize();    // Number of bytes to allocate.
982
137k
  bool IsFunclet = MBB.isEHFuncletEntry();
983
137k
  EHPersonality Personality = EHPersonality::Unknown;
984
137k
  if (Fn.hasPersonalityFn())
985
432
    Personality = classifyEHPersonality(Fn.getPersonalityFn());
986
137k
  bool FnHasClrFunclet =
987
137k
      MF.hasEHFunclets() && 
Personality == EHPersonality::CoreCLR206
;
988
137k
  bool IsClrFunclet = IsFunclet && 
FnHasClrFunclet118
;
989
137k
  bool HasFP = hasFP(MF);
990
137k
  bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
991
137k
  bool NeedsWin64CFI = IsWin64Prologue && 
Fn.needsUnwindTableEntry()1.81k
;
992
137k
  // FIXME: Emit FPO data for EH funclets.
993
137k
  bool NeedsWinFPO =
994
137k
      !IsFunclet && 
STI.isTargetWin32()137k
&&
MMI.getModule()->getCodeViewFlag()1.16k
;
995
137k
  bool NeedsWinCFI = NeedsWin64CFI || 
NeedsWinFPO136k
;
996
137k
  bool NeedsDwarfCFI =
997
137k
      !IsWin64Prologue && 
(136k
MMI.hasDebugInfo()136k
||
Fn.needsUnwindTableEntry()113k
);
998
137k
  unsigned FramePtr = TRI->getFrameRegister(MF);
999
137k
  const unsigned MachineFramePtr =
1000
137k
      STI.isTarget64BitILP32()
1001
137k
          ? 
getX86SubSuperRegister(FramePtr, 64)195
:
FramePtr137k
;
1002
137k
  unsigned BasePtr = TRI->getBaseRegister();
1003
137k
  bool HasWinCFI = false;
1004
137k
1005
137k
  // Debug location must be unknown since the first debug location is used
1006
137k
  // to determine the end of the prologue.
1007
137k
  DebugLoc DL;
1008
137k
1009
137k
  // Add RETADDR move area to callee saved frame size.
1010
137k
  int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
1011
137k
  if (TailCallReturnAddrDelta && 
IsWin64Prologue8
)
1012
0
    report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1013
137k
1014
137k
  if (TailCallReturnAddrDelta < 0)
1015
8
    X86FI->setCalleeSavedFrameSize(
1016
8
      X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
1017
137k
1018
137k
  bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty();
1019
137k
1020
137k
  // The default stack probe size is 4096 if the function has no stackprobesize
1021
137k
  // attribute.
1022
137k
  unsigned StackProbeSize = 4096;
1023
137k
  if (Fn.hasFnAttribute("stack-probe-size"))
1024
8
    Fn.getFnAttribute("stack-probe-size")
1025
8
        .getValueAsString()
1026
8
        .getAsInteger(0, StackProbeSize);
1027
137k
1028
137k
  // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1029
137k
  // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1030
137k
  // stack alignment.
1031
137k
  if (Fn.getCallingConv() == CallingConv::X86_INTR && 
Is64Bit39
&&
1032
137k
      
Fn.arg_size() == 221
) {
1033
9
    StackSize += 8;
1034
9
    MFI.setStackSize(StackSize);
1035
9
    emitSPUpdate(MBB, MBBI, DL, -8, /*InEpilogue=*/false);
1036
9
  }
1037
137k
1038
137k
  // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1039
137k
  // function, and use up to 128 bytes of stack space, don't have a frame
1040
137k
  // pointer, calls, or dynamic alloca then we do not need to adjust the
1041
137k
  // stack pointer (we fit in the Red Zone). We also check that we don't
1042
137k
  // push and pop from the stack.
1043
137k
  if (has128ByteRedZone(MF) &&
1044
137k
      
!TRI->needsStackRealignment(MF)108k
&&
1045
137k
      
!MFI.hasVarSizedObjects()108k
&& // No dynamic alloca.
1046
137k
      
!MFI.adjustsStack()108k
&& // No calls.
1047
137k
      
!UseStackProbe89.9k
&& // No stack probes.
1048
137k
      
!MFI.hasCopyImplyingStackAdjustment()89.9k
&& // Don't push and pop.
1049
137k
      
!MF.shouldSplitStack()89.9k
) { // Regular stack
1050
89.9k
    uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
1051
89.9k
    if (HasFP) 
MinSize += SlotSize4.25k
;
1052
89.9k
    X86FI->setUsesRedZone(MinSize > 0 || 
StackSize > 085.2k
);
1053
89.9k
    StackSize = std::max(MinSize, StackSize > 128 ? 
StackSize - 12882
:
089.8k
);
1054
89.9k
    MFI.setStackSize(StackSize);
1055
89.9k
  }
1056
137k
1057
137k
  // Insert stack pointer adjustment for later moving of return addr.  Only
1058
137k
  // applies to tail call optimized functions where the callee argument stack
1059
137k
  // size is bigger than the callers.
1060
137k
  if (TailCallReturnAddrDelta < 0) {
1061
8
    BuildStackAdjustment(MBB, MBBI, DL, TailCallReturnAddrDelta,
1062
8
                         /*InEpilogue=*/false)
1063
8
        .setMIFlag(MachineInstr::FrameSetup);
1064
8
  }
1065
137k
1066
137k
  // Mapping for machine moves:
1067
137k
  //
1068
137k
  //   DST: VirtualFP AND
1069
137k
  //        SRC: VirtualFP              => DW_CFA_def_cfa_offset
1070
137k
  //        ELSE                        => DW_CFA_def_cfa
1071
137k
  //
1072
137k
  //   SRC: VirtualFP AND
1073
137k
  //        DST: Register               => DW_CFA_def_cfa_register
1074
137k
  //
1075
137k
  //   ELSE
1076
137k
  //        OFFSET < 0                  => DW_CFA_offset_extended_sf
1077
137k
  //        REG < 64                    => DW_CFA_offset + Reg
1078
137k
  //        ELSE                        => DW_CFA_offset_extended
1079
137k
1080
137k
  uint64_t NumBytes = 0;
1081
137k
  int stackGrowth = -SlotSize;
1082
137k
1083
137k
  // Find the funclet establisher parameter
1084
137k
  unsigned Establisher = X86::NoRegister;
1085
137k
  if (IsClrFunclet)
1086
19
    Establisher = Uses64BitFramePtr ? X86::RCX : 
X86::ECX0
;
1087
137k
  else if (IsFunclet)
1088
99
    Establisher = Uses64BitFramePtr ? 
X86::RDX67
:
X86::EDX32
;
1089
137k
1090
137k
  if (IsWin64Prologue && 
IsFunclet1.81k
&&
!IsClrFunclet86
) {
1091
67
    // Immediately spill establisher into the home slot.
1092
67
    // The runtime cares about this.
1093
67
    // MOV64mr %rdx, 16(%rsp)
1094
67
    unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : 
X86::MOV32mr0
;
1095
67
    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1096
67
        .addReg(Establisher)
1097
67
        .setMIFlag(MachineInstr::FrameSetup);
1098
67
    MBB.addLiveIn(Establisher);
1099
67
  }
1100
137k
1101
137k
  if (HasFP) {
1102
23.7k
    assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1103
23.7k
1104
23.7k
    // Calculate required stack adjustment.
1105
23.7k
    uint64_t FrameSize = StackSize - SlotSize;
1106
23.7k
    // If required, include space for extra hidden slot for stashing base pointer.
1107
23.7k
    if (X86FI->getRestoreBasePointer())
1108
2
      FrameSize += SlotSize;
1109
23.7k
1110
23.7k
    NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
1111
23.7k
1112
23.7k
    // Callee-saved registers are pushed on stack before the stack is realigned.
1113
23.7k
    if (TRI->needsStackRealignment(MF) && 
!IsWin64Prologue1.45k
)
1114
1.42k
      NumBytes = alignTo(NumBytes, MaxAlign);
1115
23.7k
1116
23.7k
    // Save EBP/RBP into the appropriate stack slot.
1117
23.7k
    BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? 
X86::PUSH64r19.3k
:
X86::PUSH32r4.36k
))
1118
23.7k
      .addReg(MachineFramePtr, RegState::Kill)
1119
23.7k
      .setMIFlag(MachineInstr::FrameSetup);
1120
23.7k
1121
23.7k
    if (NeedsDwarfCFI) {
1122
22.6k
      // Mark the place where EBP/RBP was saved.
1123
22.6k
      // Define the current CFA rule to use the provided offset.
1124
22.6k
      assert(StackSize);
1125
22.6k
      BuildCFI(MBB, MBBI, DL,
1126
22.6k
               MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth));
1127
22.6k
1128
22.6k
      // Change the rule for the FramePtr to be an "offset" rule.
1129
22.6k
      unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1130
22.6k
      BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createOffset(
1131
22.6k
                                  nullptr, DwarfFramePtr, 2 * stackGrowth));
1132
22.6k
    }
1133
23.7k
1134
23.7k
    if (NeedsWinCFI) {
1135
241
      HasWinCFI = true;
1136
241
      BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1137
241
          .addImm(FramePtr)
1138
241
          .setMIFlag(MachineInstr::FrameSetup);
1139
241
    }
1140
23.7k
1141
23.7k
    if (!IsWin64Prologue && 
!IsFunclet23.5k
) {
1142
23.5k
      // Update EBP with the new base value.
1143
23.5k
      BuildMI(MBB, MBBI, DL,
1144
23.5k
              TII.get(Uses64BitFramePtr ? 
X86::MOV64rr19.1k
:
X86::MOV32rr4.35k
),
1145
23.5k
              FramePtr)
1146
23.5k
          .addReg(StackPtr)
1147
23.5k
          .setMIFlag(MachineInstr::FrameSetup);
1148
23.5k
1149
23.5k
      if (NeedsDwarfCFI) {
1150
22.6k
        // Mark effective beginning of when frame pointer becomes valid.
1151
22.6k
        // Define the current CFA to use the EBP/RBP register.
1152
22.6k
        unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1153
22.6k
        BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaRegister(
1154
22.6k
                                    nullptr, DwarfFramePtr));
1155
22.6k
      }
1156
23.5k
1157
23.5k
      if (NeedsWinFPO) {
1158
60
        // .cv_fpo_setframe $FramePtr
1159
60
        HasWinCFI = true;
1160
60
        BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1161
60
            .addImm(FramePtr)
1162
60
            .addImm(0)
1163
60
            .setMIFlag(MachineInstr::FrameSetup);
1164
60
      }
1165
23.5k
    }
1166
114k
  } else {
1167
114k
    assert(!IsFunclet && "funclets without FPs not yet implemented");
1168
114k
    NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
1169
114k
  }
1170
137k
1171
137k
  // Update the offset adjustment, which is mainly used by codeview to translate
1172
137k
  // from ESP to VFRAME relative local variable offsets.
1173
137k
  if (!IsFunclet) {
1174
137k
    if (HasFP && 
TRI->needsStackRealignment(MF)23.6k
)
1175
1.45k
      MFI.setOffsetAdjustment(-NumBytes);
1176
136k
    else
1177
136k
      MFI.setOffsetAdjustment(-StackSize);
1178
137k
  }
1179
137k
1180
137k
  // For EH funclets, only allocate enough space for outgoing calls. Save the
1181
137k
  // NumBytes value that we would've used for the parent frame.
1182
137k
  unsigned ParentFrameNumBytes = NumBytes;
1183
137k
  if (IsFunclet)
1184
118
    NumBytes = getWinEHFuncletFrameSize(MF);
1185
137k
1186
137k
  // Skip the callee-saved push instructions.
1187
137k
  bool PushedRegs = false;
1188
137k
  int StackOffset = 2 * stackGrowth;
1189
137k
1190
194k
  while (MBBI != MBB.end() &&
1191
194k
         
MBBI->getFlag(MachineInstr::FrameSetup)194k
&&
1192
194k
         
(56.8k
MBBI->getOpcode() == X86::PUSH32r56.8k
||
1193
56.8k
          
MBBI->getOpcode() == X86::PUSH64r47.2k
)) {
1194
56.7k
    PushedRegs = true;
1195
56.7k
    unsigned Reg = MBBI->getOperand(0).getReg();
1196
56.7k
    ++MBBI;
1197
56.7k
1198
56.7k
    if (!HasFP && 
NeedsDwarfCFI8.13k
) {
1199
3.70k
      // Mark callee-saved push instruction.
1200
3.70k
      // Define the current CFA rule to use the provided offset.
1201
3.70k
      assert(StackSize);
1202
3.70k
      BuildCFI(MBB, MBBI, DL,
1203
3.70k
               MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset));
1204
3.70k
      StackOffset += stackGrowth;
1205
3.70k
    }
1206
56.7k
1207
56.7k
    if (NeedsWinCFI) {
1208
342
      HasWinCFI = true;
1209
342
      BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1210
342
          .addImm(Reg)
1211
342
          .setMIFlag(MachineInstr::FrameSetup);
1212
342
    }
1213
56.7k
  }
1214
137k
1215
137k
  // Realign stack after we pushed callee-saved registers (so that we'll be
1216
137k
  // able to calculate their offsets from the frame pointer).
1217
137k
  // Don't do this for Win64, it needs to realign the stack after the prologue.
1218
137k
  if (!IsWin64Prologue && 
!IsFunclet136k
&&
TRI->needsStackRealignment(MF)136k
) {
1219
1.42k
    assert(HasFP && "There should be a frame pointer if stack is realigned.");
1220
1.42k
    BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1221
1.42k
1222
1.42k
    if (NeedsWinCFI) {
1223
7
      HasWinCFI = true;
1224
7
      BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1225
7
          .addImm(MaxAlign)
1226
7
          .setMIFlag(MachineInstr::FrameSetup);
1227
7
    }
1228
1.42k
  }
1229
137k
1230
137k
  // If there is an SUB32ri of ESP immediately before this instruction, merge
1231
137k
  // the two. This can be the case when tail call elimination is enabled and
1232
137k
  // the callee has more arguments then the caller.
1233
137k
  NumBytes -= mergeSPUpdates(MBB, MBBI, true);
1234
137k
1235
137k
  // Adjust stack pointer: ESP -= numbytes.
1236
137k
1237
137k
  // Windows and cygwin/mingw require a prologue helper routine when allocating
1238
137k
  // more than 4K bytes on the stack.  Windows uses __chkstk and cygwin/mingw
1239
137k
  // uses __alloca.  __alloca and the 32-bit version of __chkstk will probe the
1240
137k
  // stack and adjust the stack pointer in one go.  The 64-bit version of
1241
137k
  // __chkstk is only responsible for probing the stack.  The 64-bit prologue is
1242
137k
  // responsible for adjusting the stack pointer.  Touching the stack at 4K
1243
137k
  // increments is necessary to ensure that the guard pages used by the OS
1244
137k
  // virtual memory manager are allocated in correct sequence.
1245
137k
  uint64_t AlignedNumBytes = NumBytes;
1246
137k
  if (IsWin64Prologue && 
!IsFunclet1.81k
&&
TRI->needsStackRealignment(MF)1.73k
)
1247
29
    AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
1248
137k
  if (AlignedNumBytes >= StackProbeSize && 
UseStackProbe199
) {
1249
53
    assert(!X86FI->getUsesRedZone() &&
1250
53
           "The Red Zone is not accounted for in stack probes");
1251
53
1252
53
    // Check whether EAX is livein for this block.
1253
53
    bool isEAXAlive = isEAXLiveIn(MBB);
1254
53
1255
53
    if (isEAXAlive) {
1256
3
      if (Is64Bit) {
1257
0
        // Save RAX
1258
0
        BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1259
0
          .addReg(X86::RAX, RegState::Kill)
1260
0
          .setMIFlag(MachineInstr::FrameSetup);
1261
3
      } else {
1262
3
        // Save EAX
1263
3
        BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
1264
3
          .addReg(X86::EAX, RegState::Kill)
1265
3
          .setMIFlag(MachineInstr::FrameSetup);
1266
3
      }
1267
3
    }
1268
53
1269
53
    if (Is64Bit) {
1270
33
      // Handle the 64-bit Windows ABI case where we need to call __chkstk.
1271
33
      // Function prologue is responsible for adjusting the stack pointer.
1272
33
      int Alloc = isEAXAlive ? 
NumBytes - 80
: NumBytes;
1273
33
      if (isUInt<32>(Alloc)) {
1274
33
        BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1275
33
            .addImm(Alloc)
1276
33
            .setMIFlag(MachineInstr::FrameSetup);
1277
33
      } else 
if (0
isInt<32>(Alloc)0
) {
1278
0
        BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX)
1279
0
            .addImm(Alloc)
1280
0
            .setMIFlag(MachineInstr::FrameSetup);
1281
0
      } else {
1282
0
        BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
1283
0
            .addImm(Alloc)
1284
0
            .setMIFlag(MachineInstr::FrameSetup);
1285
0
      }
1286
33
    } else {
1287
20
      // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
1288
20
      // We'll also use 4 already allocated bytes for EAX.
1289
20
      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1290
20
          .addImm(isEAXAlive ? 
NumBytes - 43
:
NumBytes17
)
1291
20
          .setMIFlag(MachineInstr::FrameSetup);
1292
20
    }
1293
53
1294
53
    // Call __chkstk, __chkstk_ms, or __alloca.
1295
53
    emitStackProbe(MF, MBB, MBBI, DL, true);
1296
53
1297
53
    if (isEAXAlive) {
1298
3
      // Restore RAX/EAX
1299
3
      MachineInstr *MI;
1300
3
      if (Is64Bit)
1301
0
        MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
1302
0
                          StackPtr, false, NumBytes - 8);
1303
3
      else
1304
3
        MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
1305
3
                          StackPtr, false, NumBytes - 4);
1306
3
      MI->setFlag(MachineInstr::FrameSetup);
1307
3
      MBB.insert(MBBI, MI);
1308
3
    }
1309
137k
  } else if (NumBytes) {
1310
21.5k
    emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
1311
21.5k
  }
1312
137k
1313
137k
  if (NeedsWinCFI && 
NumBytes1.20k
) {
1314
699
    HasWinCFI = true;
1315
699
    BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
1316
699
        .addImm(NumBytes)
1317
699
        .setMIFlag(MachineInstr::FrameSetup);
1318
699
  }
1319
137k
1320
137k
  int SEHFrameOffset = 0;
1321
137k
  unsigned SPOrEstablisher;
1322
137k
  if (IsFunclet) {
1323
118
    if (IsClrFunclet) {
1324
19
      // The establisher parameter passed to a CLR funclet is actually a pointer
1325
19
      // to the (mostly empty) frame of its nearest enclosing funclet; we have
1326
19
      // to find the root function establisher frame by loading the PSPSym from
1327
19
      // the intermediate frame.
1328
19
      unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
1329
19
      MachinePointerInfo NoInfo;
1330
19
      MBB.addLiveIn(Establisher);
1331
19
      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
1332
19
                   Establisher, false, PSPSlotOffset)
1333
19
          .addMemOperand(MF.getMachineMemOperand(
1334
19
              NoInfo, MachineMemOperand::MOLoad, SlotSize, SlotSize));
1335
19
      ;
1336
19
      // Save the root establisher back into the current funclet's (mostly
1337
19
      // empty) frame, in case a sub-funclet or the GC needs it.
1338
19
      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
1339
19
                   false, PSPSlotOffset)
1340
19
          .addReg(Establisher)
1341
19
          .addMemOperand(
1342
19
              MF.getMachineMemOperand(NoInfo, MachineMemOperand::MOStore |
1343
19
                                                  MachineMemOperand::MOVolatile,
1344
19
                                      SlotSize, SlotSize));
1345
19
    }
1346
118
    SPOrEstablisher = Establisher;
1347
137k
  } else {
1348
137k
    SPOrEstablisher = StackPtr;
1349
137k
  }
1350
137k
1351
137k
  if (IsWin64Prologue && 
HasFP1.81k
) {
1352
214
    // Set RBP to a small fixed offset from RSP. In the funclet case, we base
1353
214
    // this calculation on the incoming establisher, which holds the value of
1354
214
    // RSP from the parent frame at the end of the prologue.
1355
214
    SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
1356
214
    if (SEHFrameOffset)
1357
194
      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
1358
194
                   SPOrEstablisher, false, SEHFrameOffset);
1359
20
    else
1360
20
      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
1361
20
          .addReg(SPOrEstablisher);
1362
214
1363
214
    // If this is not a funclet, emit the CFI describing our frame pointer.
1364
214
    if (NeedsWinCFI && 
!IsFunclet181
) {
1365
95
      assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
1366
95
      HasWinCFI = true;
1367
95
      BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1368
95
          .addImm(FramePtr)
1369
95
          .addImm(SEHFrameOffset)
1370
95
          .setMIFlag(MachineInstr::FrameSetup);
1371
95
      if (isAsynchronousEHPersonality(Personality))
1372
12
        MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
1373
95
    }
1374
137k
  } else if (IsFunclet && 
STI.is32Bit()32
) {
1375
32
    // Reset EBP / ESI to something good for funclets.
1376
32
    MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL);
1377
32
    // If we're a catch funclet, we can be returned to via catchret. Save ESP
1378
32
    // into the registration node so that the runtime will restore it for us.
1379
32
    if (!MBB.isCleanupFuncletEntry()) {
1380
21
      assert(Personality == EHPersonality::MSVC_CXX);
1381
21
      unsigned FrameReg;
1382
21
      int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex;
1383
21
      int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg);
1384
21
      // ESP is the first field, so no extra displacement is needed.
1385
21
      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
1386
21
                   false, EHRegOffset)
1387
21
          .addReg(X86::ESP);
1388
21
    }
1389
32
  }
1390
137k
1391
138k
  while (MBBI != MBB.end() && 
MBBI->getFlag(MachineInstr::FrameSetup)138k
) {
1392
756
    const MachineInstr &FrameInstr = *MBBI;
1393
756
    ++MBBI;
1394
756
1395
756
    if (NeedsWinCFI) {
1396
190
      int FI;
1397
190
      if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
1398
190
        if (X86::FR64RegClass.contains(Reg)) {
1399
190
          unsigned IgnoredFrameReg;
1400
190
          int Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg);
1401
190
          Offset += SEHFrameOffset;
1402
190
1403
190
          HasWinCFI = true;
1404
190
          assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
1405
190
          BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
1406
190
              .addImm(Reg)
1407
190
              .addImm(Offset)
1408
190
              .setMIFlag(MachineInstr::FrameSetup);
1409
190
        }
1410
190
      }
1411
190
    }
1412
756
  }
1413
137k
1414
137k
  if (NeedsWinCFI && 
HasWinCFI1.20k
)
1415
751
    BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
1416
751
        .setMIFlag(MachineInstr::FrameSetup);
1417
137k
1418
137k
  if (FnHasClrFunclet && 
!IsFunclet26
) {
1419
7
    // Save the so-called Initial-SP (i.e. the value of the stack pointer
1420
7
    // immediately after the prolog)  into the PSPSlot so that funclets
1421
7
    // and the GC can recover it.
1422
7
    unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
1423
7
    auto PSPInfo = MachinePointerInfo::getFixedStack(
1424
7
        MF, MF.getWinEHFuncInfo()->PSPSymFrameIdx);
1425
7
    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
1426
7
                 PSPSlotOffset)
1427
7
        .addReg(StackPtr)
1428
7
        .addMemOperand(MF.getMachineMemOperand(
1429
7
            PSPInfo, MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
1430
7
            SlotSize, SlotSize));
1431
7
  }
1432
137k
1433
137k
  // Realign stack after we spilled callee-saved registers (so that we'll be
1434
137k
  // able to calculate their offsets from the frame pointer).
1435
137k
  // Win64 requires aligning the stack after the prologue.
1436
137k
  if (IsWin64Prologue && 
TRI->needsStackRealignment(MF)1.81k
) {
1437
30
    assert(HasFP && "There should be a frame pointer if stack is realigned.");
1438
30
    BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
1439
30
  }
1440
137k
1441
137k
  // We already dealt with stack realignment and funclets above.
1442
137k
  if (IsFunclet && 
STI.is32Bit()118
)
1443
32
    return;
1444
137k
1445
137k
  // If we need a base pointer, set it up here. It's whatever the value
1446
137k
  // of the stack pointer is at this point. Any variable size objects
1447
137k
  // will be allocated after this, so we can still use the base pointer
1448
137k
  // to reference locals.
1449
137k
  if (TRI->hasBasePointer(MF)) {
1450
75
    // Update the base pointer with the current stack pointer.
1451
75
    unsigned Opc = Uses64BitFramePtr ? 
X86::MOV64rr25
:
X86::MOV32rr50
;
1452
75
    BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
1453
75
      .addReg(SPOrEstablisher)
1454
75
      .setMIFlag(MachineInstr::FrameSetup);
1455
75
    if (X86FI->getRestoreBasePointer()) {
1456
2
      // Stash value of base pointer.  Saving RSP instead of EBP shortens
1457
2
      // dependence chain. Used by SjLj EH.
1458
2
      unsigned Opm = Uses64BitFramePtr ? 
X86::MOV64mr1
:
X86::MOV32mr1
;
1459
2
      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
1460
2
                   FramePtr, true, X86FI->getRestoreBasePointerOffset())
1461
2
        .addReg(SPOrEstablisher)
1462
2
        .setMIFlag(MachineInstr::FrameSetup);
1463
2
    }
1464
75
1465
75
    if (X86FI->getHasSEHFramePtrSave() && 
!IsFunclet6
) {
1466
5
      // Stash the value of the frame pointer relative to the base pointer for
1467
5
      // Win32 EH. This supports Win32 EH, which does the inverse of the above:
1468
5
      // it recovers the frame pointer from the base pointer rather than the
1469
5
      // other way around.
1470
5
      unsigned Opm = Uses64BitFramePtr ? 
X86::MOV64mr1
:
X86::MOV32mr4
;
1471
5
      unsigned UsedReg;
1472
5
      int Offset =
1473
5
          getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg);
1474
5
      assert(UsedReg == BasePtr);
1475
5
      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
1476
5
          .addReg(FramePtr)
1477
5
          .setMIFlag(MachineInstr::FrameSetup);
1478
5
    }
1479
75
  }
1480
137k
1481
137k
  if (((!HasFP && 
NumBytes114k
) ||
PushedRegs131k
) &&
NeedsDwarfCFI23.2k
) {
1482
18.9k
    // Mark end of stack pointer adjustment.
1483
18.9k
    if (!HasFP && 
NumBytes4.50k
) {
1484
3.73k
      // Define the current CFA rule to use the provided offset.
1485
3.73k
      assert(StackSize);
1486
3.73k
      BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset(
1487
3.73k
                                  nullptr, -StackSize + stackGrowth));
1488
3.73k
    }
1489
18.9k
1490
18.9k
    // Emit DWARF info specifying the offsets of the callee-saved registers.
1491
18.9k
    emitCalleeSavedFrameMoves(MBB, MBBI, DL);
1492
18.9k
  }
1493
137k
1494
137k
  // X86 Interrupt handling function cannot assume anything about the direction
1495
137k
  // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
1496
137k
  // in each prologue of interrupt handler function.
1497
137k
  //
1498
137k
  // FIXME: Create "cld" instruction only in these cases:
1499
137k
  // 1. The interrupt handling function uses any of the "rep" instructions.
1500
137k
  // 2. Interrupt handling function calls another function.
1501
137k
  //
1502
137k
  if (Fn.getCallingConv() == CallingConv::X86_INTR)
1503
39
    BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
1504
39
        .setMIFlag(MachineInstr::FrameSetup);
1505
137k
1506
137k
  // At this point we know if the function has WinCFI or not.
1507
137k
  MF.setHasWinCFI(HasWinCFI);
1508
137k
}
1509
1510
bool X86FrameLowering::canUseLEAForSPInEpilogue(
1511
21.0k
    const MachineFunction &MF) const {
1512
21.0k
  // We can't use LEA instructions for adjusting the stack pointer if we don't
1513
21.0k
  // have a frame pointer in the Win64 ABI.  Only ADD instructions may be used
1514
21.0k
  // to deallocate the stack.
1515
21.0k
  // This means that we can use LEA for SP in two situations:
1516
21.0k
  // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
1517
21.0k
  // 2. We *have* a frame pointer which means we are permitted to use LEA.
1518
21.0k
  return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || 
hasFP(MF)683
;
1519
21.0k
}
1520
1521
163k
static bool isFuncletReturnInstr(MachineInstr &MI) {
1522
163k
  switch (MI.getOpcode()) {
1523
163k
  case X86::CATCHRET:
1524
138
  case X86::CLEANUPRET:
1525
138
    return true;
1526
163k
  default:
1527
163k
    return false;
1528
0
  }
1529
0
  llvm_unreachable("impossible");
1530
0
}
1531
1532
// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
1533
// stack. It holds a pointer to the bottom of the root function frame.  The
1534
// establisher frame pointer passed to a nested funclet may point to the
1535
// (mostly empty) frame of its parent funclet, but it will need to find
1536
// the frame of the root function to access locals.  To facilitate this,
1537
// every funclet copies the pointer to the bottom of the root function
1538
// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
1539
// same offset for the PSPSym in the root function frame that's used in the
1540
// funclets' frames allows each funclet to dynamically accept any ancestor
1541
// frame as its establisher argument (the runtime doesn't guarantee the
1542
// immediate parent for some reason lost to history), and also allows the GC,
1543
// which uses the PSPSym for some bookkeeping, to find it in any funclet's
1544
// frame with only a single offset reported for the entire method.
1545
unsigned
1546
59
X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
1547
59
  const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
1548
59
  unsigned SPReg;
1549
59
  int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
1550
59
                                              /*IgnoreSPUpdates*/ true);
1551
59
  assert(Offset >= 0 && SPReg == TRI->getStackRegister());
1552
59
  return static_cast<unsigned>(Offset);
1553
59
}
1554
1555
unsigned
1556
245
X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
1557
245
  // This is the size of the pushed CSRs.
1558
245
  unsigned CSSize =
1559
245
      MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
1560
245
  // This is the amount of stack a funclet needs to allocate.
1561
245
  unsigned UsedSize;
1562
245
  EHPersonality Personality =
1563
245
      classifyEHPersonality(MF.getFunction().getPersonalityFn());
1564
245
  if (Personality == EHPersonality::CoreCLR) {
1565
33
    // CLR funclets need to hold enough space to include the PSPSym, at the
1566
33
    // same offset from the stack pointer (immediately after the prolog) as it
1567
33
    // resides at in the main function.
1568
33
    UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
1569
212
  } else {
1570
212
    // Other funclets just need enough stack for outgoing call arguments.
1571
212
    UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
1572
212
  }
1573
245
  // RBP is not included in the callee saved register block. After pushing RBP,
1574
245
  // everything is 16 byte aligned. Everything we allocate before an outgoing
1575
245
  // call must also be 16 byte aligned.
1576
245
  unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlignment());
1577
245
  // Subtract out the size of the callee saved registers. This is how much stack
1578
245
  // each funclet will allocate.
1579
245
  return FrameSizeMinusRBP - CSSize;
1580
245
}
1581
1582
141k
static bool isTailCallOpcode(unsigned Opc) {
1583
141k
    return Opc == X86::TCRETURNri || 
Opc == X86::TCRETURNdi141k
||
1584
141k
        
Opc == X86::TCRETURNmi141k
||
1585
141k
        
Opc == X86::TCRETURNri64141k
||
Opc == X86::TCRETURNdi64140k
||
1586
141k
        
Opc == X86::TCRETURNmi64134k
;
1587
141k
}
1588
1589
void X86FrameLowering::emitEpilogue(MachineFunction &MF,
1590
141k
                                    MachineBasicBlock &MBB) const {
1591
141k
  const MachineFrameInfo &MFI = MF.getFrameInfo();
1592
141k
  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1593
141k
  MachineBasicBlock::iterator Terminator = MBB.getFirstTerminator();
1594
141k
  MachineBasicBlock::iterator MBBI = Terminator;
1595
141k
  DebugLoc DL;
1596
141k
  if (MBBI != MBB.end())
1597
141k
    DL = MBBI->getDebugLoc();
1598
141k
  // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
1599
141k
  const bool Is64BitILP32 = STI.isTarget64BitILP32();
1600
141k
  unsigned FramePtr = TRI->getFrameRegister(MF);
1601
141k
  unsigned MachineFramePtr =
1602
141k
      Is64BitILP32 ? 
getX86SubSuperRegister(FramePtr, 64)201
:
FramePtr141k
;
1603
141k
1604
141k
  bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1605
141k
  bool NeedsWin64CFI =
1606
141k
      IsWin64Prologue && 
MF.getFunction().needsUnwindTableEntry()1.83k
;
1607
141k
  bool IsFunclet = MBBI == MBB.end() ? 
false148
:
isFuncletReturnInstr(*MBBI)141k
;
1608
141k
1609
141k
  // Get the number of bytes to allocate from the FrameInfo.
1610
141k
  uint64_t StackSize = MFI.getStackSize();
1611
141k
  uint64_t MaxAlign = calculateMaxStackAlign(MF);
1612
141k
  unsigned CSSize = X86FI->getCalleeSavedFrameSize();
1613
141k
  bool HasFP = hasFP(MF);
1614
141k
  uint64_t NumBytes = 0;
1615
141k
1616
141k
  bool NeedsDwarfCFI =
1617
141k
      (!MF.getTarget().getTargetTriple().isOSDarwin() &&
1618
141k
       
!MF.getTarget().getTargetTriple().isOSWindows()99.8k
) &&
1619
141k
      
(96.7k
MF.getMMI().hasDebugInfo()96.7k
||
MF.getFunction().needsUnwindTableEntry()95.7k
);
1620
141k
1621
141k
  if (IsFunclet) {
1622
96
    assert(HasFP && "EH funclets without FP not yet implemented");
1623
96
    NumBytes = getWinEHFuncletFrameSize(MF);
1624
141k
  } else if (HasFP) {
1625
26.6k
    // Calculate required stack adjustment.
1626
26.6k
    uint64_t FrameSize = StackSize - SlotSize;
1627
26.6k
    NumBytes = FrameSize - CSSize;
1628
26.6k
1629
26.6k
    // Callee-saved registers were pushed on stack before the stack was
1630
26.6k
    // realigned.
1631
26.6k
    if (TRI->needsStackRealignment(MF) && 
!IsWin64Prologue1.45k
)
1632
1.42k
      NumBytes = alignTo(FrameSize, MaxAlign);
1633
115k
  } else {
1634
115k
    NumBytes = StackSize - CSSize;
1635
115k
  }
1636
141k
  uint64_t SEHStackAllocAmt = NumBytes;
1637
141k
1638
141k
  if (HasFP) {
1639
26.7k
    // Pop EBP.
1640
26.7k
    BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? 
X86::POP64r21.9k
:
X86::POP32r4.79k
),
1641
26.7k
            MachineFramePtr)
1642
26.7k
        .setMIFlag(MachineInstr::FrameDestroy);
1643
26.7k
    if (NeedsDwarfCFI) {
1644
717
      unsigned DwarfStackPtr =
1645
717
          TRI->getDwarfRegNum(Is64Bit ? 
X86::RSP271
:
X86::ESP446
, true);
1646
717
      BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfa(
1647
717
                                  nullptr, DwarfStackPtr, -SlotSize));
1648
717
      --MBBI;
1649
717
    }
1650
26.7k
  }
1651
141k
1652
141k
  MachineBasicBlock::iterator FirstCSPop = MBBI;
1653
141k
  // Skip the callee-saved pop instructions.
1654
236k
  while (MBBI != MBB.begin()) {
1655
224k
    MachineBasicBlock::iterator PI = std::prev(MBBI);
1656
224k
    unsigned Opc = PI->getOpcode();
1657
224k
1658
224k
    if (Opc != X86::DBG_VALUE && 
!PI->isTerminator()224k
) {
1659
224k
      if ((Opc != X86::POP32r || 
!PI->getFlag(MachineInstr::FrameDestroy)15.7k
) &&
1660
224k
          
(209k
Opc != X86::POP64r209k
||
!PI->getFlag(MachineInstr::FrameDestroy)78.7k
))
1661
130k
        break;
1662
94.4k
      FirstCSPop = PI;
1663
94.4k
    }
1664
224k
1665
224k
    --MBBI;
1666
94.4k
  }
1667
141k
  MBBI = FirstCSPop;
1668
141k
1669
141k
  if (IsFunclet && 
Terminator->getOpcode() == X86::CATCHRET96
)
1670
63
    emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
1671
141k
1672
141k
  if (MBBI != MBB.end())
1673
141k
    DL = MBBI->getDebugLoc();
1674
141k
1675
141k
  // If there is an ADD32ri or SUB32ri of ESP immediately before this
1676
141k
  // instruction, merge the two instructions.
1677
141k
  if (NumBytes || 
MFI.hasVarSizedObjects()117k
)
1678
24.5k
    NumBytes += mergeSPUpdates(MBB, MBBI, true);
1679
141k
1680
141k
  // If dynamic alloca is used, then reset esp to point to the last callee-saved
1681
141k
  // slot before popping them off! Same applies for the case, when stack was
1682
141k
  // realigned. Don't do this if this was a funclet epilogue, since the funclets
1683
141k
  // will not do realignment or dynamic stack allocation.
1684
141k
  if ((TRI->needsStackRealignment(MF) || 
MFI.hasVarSizedObjects()140k
) &&
1685
141k
      
!IsFunclet1.65k
) {
1686
1.65k
    if (TRI->needsStackRealignment(MF))
1687
1.45k
      MBBI = FirstCSPop;
1688
1.65k
    unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
1689
1.65k
    uint64_t LEAAmount =
1690
1.65k
        IsWin64Prologue ? 
SEHStackAllocAmt - SEHFrameOffset46
:
-CSSize1.60k
;
1691
1.65k
1692
1.65k
    // There are only two legal forms of epilogue:
1693
1.65k
    // - add SEHAllocationSize, %rsp
1694
1.65k
    // - lea SEHAllocationSize(%FramePtr), %rsp
1695
1.65k
    //
1696
1.65k
    // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
1697
1.65k
    // However, we may use this sequence if we have a frame pointer because the
1698
1.65k
    // effects of the prologue can safely be undone.
1699
1.65k
    if (LEAAmount != 0) {
1700
438
      unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
1701
438
      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
1702
438
                   FramePtr, false, LEAAmount);
1703
438
      --MBBI;
1704
1.21k
    } else {
1705
1.21k
      unsigned Opc = (Uses64BitFramePtr ? 
X86::MOV64rr322
:
X86::MOV32rr892
);
1706
1.21k
      BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
1707
1.21k
        .addReg(FramePtr);
1708
1.21k
      --MBBI;
1709
1.21k
    }
1710
140k
  } else if (NumBytes) {
1711
22.8k
    // Adjust stack pointer back: ESP += numbytes.
1712
22.8k
    emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
1713
22.8k
    if (!hasFP(MF) && 
NeedsDwarfCFI7.02k
) {
1714
2.59k
      // Define the current CFA rule to use the provided offset.
1715
2.59k
      BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset(
1716
2.59k
                                  nullptr, -CSSize - SlotSize));
1717
2.59k
    }
1718
22.8k
    --MBBI;
1719
22.8k
  }
1720
141k
1721
141k
  // Windows unwinder will not invoke function's exception handler if IP is
1722
141k
  // either in prologue or in epilogue.  This behavior causes a problem when a
1723
141k
  // call immediately precedes an epilogue, because the return address points
1724
141k
  // into the epilogue.  To cope with that, we insert an epilogue marker here,
1725
141k
  // then replace it with a 'nop' if it ends up immediately after a CALL in the
1726
141k
  // final emitted code.
1727
141k
  if (NeedsWin64CFI && 
MF.hasWinCFI()1.05k
)
1728
642
    BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
1729
141k
1730
141k
  if (!hasFP(MF) && 
NeedsDwarfCFI115k
) {
1731
66.2k
    MBBI = FirstCSPop;
1732
66.2k
    int64_t Offset = -CSSize - SlotSize;
1733
66.2k
    // Mark callee-saved pop instruction.
1734
66.2k
    // Define the current CFA rule to use the provided offset.
1735
135k
    while (MBBI != MBB.end()) {
1736
68.7k
      MachineBasicBlock::iterator PI = MBBI;
1737
68.7k
      unsigned Opc = PI->getOpcode();
1738
68.7k
      ++MBBI;
1739
68.7k
      if (Opc == X86::POP32r || 
Opc == X86::POP64r67.6k
) {
1740
2.55k
        Offset += SlotSize;
1741
2.55k
        BuildCFI(MBB, MBBI, DL,
1742
2.55k
                 MCCFIInstruction::createDefCfaOffset(nullptr, Offset));
1743
2.55k
      }
1744
68.7k
    }
1745
66.2k
  }
1746
141k
1747
141k
  if (Terminator == MBB.end() || 
!isTailCallOpcode(Terminator->getOpcode())141k
) {
1748
134k
    // Add the return addr area delta back since we are not tail calling.
1749
134k
    int Offset = -1 * X86FI->getTCReturnAddrDelta();
1750
134k
    assert(Offset >= 0 && "TCDelta should never be positive");
1751
134k
    if (Offset) {
1752
0
      // Check for possible merge with preceding ADD instruction.
1753
0
      Offset += mergeSPUpdates(MBB, Terminator, true);
1754
0
      emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
1755
0
    }
1756
134k
  }
1757
141k
}
1758
1759
int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
1760
226k
                                             unsigned &FrameReg) const {
1761
226k
  const MachineFrameInfo &MFI = MF.getFrameInfo();
1762
226k
1763
226k
  bool IsFixed = MFI.isFixedObjectIndex(FI);
1764
226k
  // We can't calculate offset from frame pointer if the stack is realigned,
1765
226k
  // so enforce usage of stack/base pointer.  The base pointer is used when we
1766
226k
  // have dynamic allocas in addition to dynamic realignment.
1767
226k
  if (TRI->hasBasePointer(MF))
1768
394
    FrameReg = IsFixed ? 
TRI->getFramePtr()87
:
TRI->getBaseRegister()307
;
1769
226k
  else if (TRI->needsStackRealignment(MF))
1770
17.3k
    FrameReg = IsFixed ? 
TRI->getFramePtr()4.03k
:
TRI->getStackRegister()13.2k
;
1771
208k
  else
1772
208k
    FrameReg = TRI->getFrameRegister(MF);
1773
226k
1774
226k
  // Offset will hold the offset from the stack pointer at function entry to the
1775
226k
  // object.
1776
226k
  // We need to factor in additional offsets applied during the prologue to the
1777
226k
  // frame, base, and stack pointer depending on which is used.
1778
226k
  int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
1779
226k
  const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1780
226k
  unsigned CSSize = X86FI->getCalleeSavedFrameSize();
1781
226k
  uint64_t StackSize = MFI.getStackSize();
1782
226k
  bool HasFP = hasFP(MF);
1783
226k
  bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1784
226k
  int64_t FPDelta = 0;
1785
226k
1786
226k
  // In an x86 interrupt, remove the offset we added to account for the return
1787
226k
  // address from any stack object allocated in the caller's frame. Interrupts
1788
226k
  // do not have a standard return address. Fixed objects in the current frame,
1789
226k
  // such as SSE register spills, should not get this treatment.
1790
226k
  if (MF.getFunction().getCallingConv() == CallingConv::X86_INTR &&
1791
226k
      
Offset >= 0326
) {
1792
34
    Offset += getOffsetOfLocalArea();
1793
34
  }
1794
226k
1795
226k
  if (IsWin64Prologue) {
1796
2.50k
    assert(!MFI.hasCalls() || (StackSize % 16) == 8);
1797
2.50k
1798
2.50k
    // Calculate required stack adjustment.
1799
2.50k
    uint64_t FrameSize = StackSize - SlotSize;
1800
2.50k
    // If required, include space for extra hidden slot for stashing base pointer.
1801
2.50k
    if (X86FI->getRestoreBasePointer())
1802
0
      FrameSize += SlotSize;
1803
2.50k
    uint64_t NumBytes = FrameSize - CSSize;
1804
2.50k
1805
2.50k
    uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
1806
2.50k
    if (FI && 
FI == X86FI->getFAIndex()1.71k
)
1807
4
      return -SEHFrameOffset;
1808
2.49k
1809
2.49k
    // FPDelta is the offset from the "traditional" FP location of the old base
1810
2.49k
    // pointer followed by return address and the location required by the
1811
2.49k
    // restricted Win64 prologue.
1812
2.49k
    // Add FPDelta to all offsets below that go through the frame pointer.
1813
2.49k
    FPDelta = FrameSize - SEHFrameOffset;
1814
2.49k
    assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
1815
2.49k
           "FPDelta isn't aligned per the Win64 ABI!");
1816
2.49k
  }
1817
226k
1818
226k
1819
226k
  
if (226k
TRI->hasBasePointer(MF)226k
) {
1820
394
    assert(HasFP && "VLAs and dynamic stack realign, but no FP?!");
1821
394
    if (FI < 0) {
1822
87
      // Skip the saved EBP.
1823
87
      return Offset + SlotSize + FPDelta;
1824
307
    } else {
1825
307
      assert((-(Offset + StackSize)) % MFI.getObjectAlignment(FI) == 0);
1826
307
      return Offset + StackSize;
1827
307
    }
1828
226k
  } else if (TRI->needsStackRealignment(MF)) {
1829
17.3k
    if (FI < 0) {
1830
4.03k
      // Skip the saved EBP.
1831
4.03k
      return Offset + SlotSize + FPDelta;
1832
13.2k
    } else {
1833
13.2k
      assert((-(Offset + StackSize)) % MFI.getObjectAlignment(FI) == 0);
1834
13.2k
      return Offset + StackSize;
1835
13.2k
    }
1836
208k
    // FIXME: Support tail calls
1837
208k
  } else {
1838
208k
    if (!HasFP)
1839
66.4k
      return Offset + StackSize;
1840
142k
1841
142k
    // Skip the saved EBP.
1842
142k
    Offset += SlotSize;
1843
142k
1844
142k
    // Skip the RETADDR move area
1845
142k
    int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
1846
142k
    if (TailCallReturnAddrDelta < 0)
1847
0
      Offset -= TailCallReturnAddrDelta;
1848
142k
  }
1849
226k
1850
226k
  
return Offset + FPDelta142k
;
1851
226k
}
1852
1853
int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF,
1854
                                               int FI, unsigned &FrameReg,
1855
382
                                               int Adjustment) const {
1856
382
  const MachineFrameInfo &MFI = MF.getFrameInfo();
1857
382
  FrameReg = TRI->getStackRegister();
1858
382
  return MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + Adjustment;
1859
382
}
1860
1861
int
1862
X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
1863
                                                 int FI, unsigned &FrameReg,
1864
377
                                                 bool IgnoreSPUpdates) const {
1865
377
1866
377
  const MachineFrameInfo &MFI = MF.getFrameInfo();
1867
377
  // Does not include any dynamic realign.
1868
377
  const uint64_t StackSize = MFI.getStackSize();
1869
377
  // LLVM arranges the stack as follows:
1870
377
  //   ...
1871
377
  //   ARG2
1872
377
  //   ARG1
1873
377
  //   RETADDR
1874
377
  //   PUSH RBP   <-- RBP points here
1875
377
  //   PUSH CSRs
1876
377
  //   ~~~~~~~    <-- possible stack realignment (non-win64)
1877
377
  //   ...
1878
377
  //   STACK OBJECTS
1879
377
  //   ...        <-- RSP after prologue points here
1880
377
  //   ~~~~~~~    <-- possible stack realignment (win64)
1881
377
  //
1882
377
  // if (hasVarSizedObjects()):
1883
377
  //   ...        <-- "base pointer" (ESI/RBX) points here
1884
377
  //   DYNAMIC ALLOCAS
1885
377
  //   ...        <-- RSP points here
1886
377
  //
1887
377
  // Case 1: In the simple case of no stack realignment and no dynamic
1888
377
  // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
1889
377
  // with fixed offsets from RSP.
1890
377
  //
1891
377
  // Case 2: In the case of stack realignment with no dynamic allocas, fixed
1892
377
  // stack objects are addressed with RBP and regular stack objects with RSP.
1893
377
  //
1894
377
  // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
1895
377
  // to address stack arguments for outgoing calls and nothing else. The "base
1896
377
  // pointer" points to local variables, and RBP points to fixed objects.
1897
377
  //
1898
377
  // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
1899
377
  // answer we give is relative to the SP after the prologue, and not the
1900
377
  // SP in the middle of the function.
1901
377
1902
377
  if (MFI.isFixedObjectIndex(FI) && 
TRI->needsStackRealignment(MF)128
&&
1903
377
      
!STI.isTargetWin64()1
)
1904
0
    return getFrameIndexReference(MF, FI, FrameReg);
1905
377
1906
377
  // If !hasReservedCallFrame the function might have SP adjustement in the
1907
377
  // body.  So, even though the offset is statically known, it depends on where
1908
377
  // we are in the function.
1909
377
  if (!IgnoreSPUpdates && 
!hasReservedCallFrame(MF)277
)
1910
4
    return getFrameIndexReference(MF, FI, FrameReg);
1911
373
1912
373
  // We don't handle tail calls, and shouldn't be seeing them either.
1913
373
  assert(MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta() >= 0 &&
1914
373
         "we don't handle this case!");
1915
373
1916
373
  // This is how the math works out:
1917
373
  //
1918
373
  //  %rsp grows (i.e. gets lower) left to right. Each box below is
1919
373
  //  one word (eight bytes).  Obj0 is the stack slot we're trying to
1920
373
  //  get to.
1921
373
  //
1922
373
  //    ----------------------------------
1923
373
  //    | BP | Obj0 | Obj1 | ... | ObjN |
1924
373
  //    ----------------------------------
1925
373
  //    ^    ^      ^                   ^
1926
373
  //    A    B      C                   E
1927
373
  //
1928
373
  // A is the incoming stack pointer.
1929
373
  // (B - A) is the local area offset (-8 for x86-64) [1]
1930
373
  // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
1931
373
  //
1932
373
  // |(E - B)| is the StackSize (absolute value, positive).  For a
1933
373
  // stack that grown down, this works out to be (B - E). [3]
1934
373
  //
1935
373
  // E is also the value of %rsp after stack has been set up, and we
1936
373
  // want (C - E) -- the value we can add to %rsp to get to Obj0.  Now
1937
373
  // (C - E) == (C - A) - (B - A) + (B - E)
1938
373
  //            { Using [1], [2] and [3] above }
1939
373
  //         == getObjectOffset - LocalAreaOffset + StackSize
1940
373
1941
373
  return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
1942
373
}
1943
1944
bool X86FrameLowering::assignCalleeSavedSpillSlots(
1945
    MachineFunction &MF, const TargetRegisterInfo *TRI,
1946
137k
    std::vector<CalleeSavedInfo> &CSI) const {
1947
137k
  MachineFrameInfo &MFI = MF.getFrameInfo();
1948
137k
  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1949
137k
1950
137k
  unsigned CalleeSavedFrameSize = 0;
1951
137k
  int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
1952
137k
1953
137k
  int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
1954
137k
1955
137k
  if (TailCallReturnAddrDelta < 0) {
1956
8
    // create RETURNADDR area
1957
8
    //   arg
1958
8
    //   arg
1959
8
    //   RETADDR
1960
8
    //   { ...
1961
8
    //     RETADDR area
1962
8
    //     ...
1963
8
    //   }
1964
8
    //   [EBP]
1965
8
    MFI.CreateFixedObject(-TailCallReturnAddrDelta,
1966
8
                           TailCallReturnAddrDelta - SlotSize, true);
1967
8
  }
1968
137k
1969
137k
  // Spill the BasePtr if it's used.
1970
137k
  if (this->TRI->hasBasePointer(MF)) {
1971
74
    // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
1972
74
    if (MF.hasEHFunclets()) {
1973
5
      int FI = MFI.CreateSpillStackObject(SlotSize, SlotSize);
1974
5
      X86FI->setHasSEHFramePtrSave(true);
1975
5
      X86FI->setSEHFramePtrSaveIndex(FI);
1976
5
    }
1977
74
  }
1978
137k
1979
137k
  if (hasFP(MF)) {
1980
23.6k
    // emitPrologue always spills frame register the first thing.
1981
23.6k
    SpillSlotOffset -= SlotSize;
1982
23.6k
    MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
1983
23.6k
1984
23.6k
    // Since emitPrologue and emitEpilogue will handle spilling and restoring of
1985
23.6k
    // the frame register, we can delete it from CSI list and not have to worry
1986
23.6k
    // about avoiding it later.
1987
23.6k
    unsigned FPReg = TRI->getFrameRegister(MF);
1988
72.2k
    for (unsigned i = 0; i < CSI.size(); 
++i48.6k
) {
1989
48.7k
      if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
1990
81
        CSI.erase(CSI.begin() + i);
1991
81
        break;
1992
81
      }
1993
48.7k
    }
1994
23.6k
  }
1995
137k
1996
137k
  // Assign slots for GPRs. It increases frame size.
1997
195k
  for (unsigned i = CSI.size(); i != 0; 
--i57.4k
) {
1998
57.4k
    unsigned Reg = CSI[i - 1].getReg();
1999
57.4k
2000
57.4k
    if (!X86::GR64RegClass.contains(Reg) && 
!X86::GR32RegClass.contains(Reg)10.3k
)
2001
755
      continue;
2002
56.7k
2003
56.7k
    SpillSlotOffset -= SlotSize;
2004
56.7k
    CalleeSavedFrameSize += SlotSize;
2005
56.7k
2006
56.7k
    int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2007
56.7k
    CSI[i - 1].setFrameIdx(SlotIndex);
2008
56.7k
  }
2009
137k
2010
137k
  X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2011
137k
  MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2012
137k
2013
137k
  // Assign slots for XMMs.
2014
195k
  for (unsigned i = CSI.size(); i != 0; 
--i57.4k
) {
2015
57.4k
    unsigned Reg = CSI[i - 1].getReg();
2016
57.4k
    if (X86::GR64RegClass.contains(Reg) || 
X86::GR32RegClass.contains(Reg)10.3k
)
2017
56.7k
      continue;
2018
755
2019
755
    // If this is k-register make sure we lookup via the largest legal type.
2020
755
    MVT VT = MVT::Other;
2021
755
    if (X86::VK16RegClass.contains(Reg))
2022
48
      VT = STI.hasBWI() ? 
MVT::v64i124
:
MVT::v16i124
;
2023
755
2024
755
    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2025
755
    unsigned Size = TRI->getSpillSize(*RC);
2026
755
    unsigned Align = TRI->getSpillAlignment(*RC);
2027
755
    // ensure alignment
2028
755
    SpillSlotOffset -= std::abs(SpillSlotOffset) % Align;
2029
755
    // spill into slot
2030
755
    SpillSlotOffset -= Size;
2031
755
    int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2032
755
    CSI[i - 1].setFrameIdx(SlotIndex);
2033
755
    MFI.ensureMaxAlignment(Align);
2034
755
  }
2035
137k
2036
137k
  return true;
2037
137k
}
2038
2039
bool X86FrameLowering::spillCalleeSavedRegisters(
2040
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2041
    const std::vector<CalleeSavedInfo> &CSI,
2042
18.3k
    const TargetRegisterInfo *TRI) const {
2043
18.3k
  DebugLoc DL = MBB.findDebugLoc(MI);
2044
18.3k
2045
18.3k
  // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
2046
18.3k
  // for us, and there are no XMM CSRs on Win32.
2047
18.3k
  if (MBB.isEHFuncletEntry() && 
STI.is32Bit()46
&&
STI.isOSWindows()32
)
2048
32
    return true;
2049
18.3k
2050
18.3k
  // Push GPRs. It increases frame size.
2051
18.3k
  const MachineFunction &MF = *MBB.getParent();
2052
18.3k
  unsigned Opc = STI.is64Bit() ? 
X86::PUSH64r13.9k
:
X86::PUSH32r4.31k
;
2053
75.8k
  for (unsigned i = CSI.size(); i != 0; 
--i57.5k
) {
2054
57.5k
    unsigned Reg = CSI[i - 1].getReg();
2055
57.5k
2056
57.5k
    if (!X86::GR64RegClass.contains(Reg) && 
!X86::GR32RegClass.contains(Reg)10.3k
)
2057
756
      continue;
2058
56.7k
2059
56.7k
    const MachineRegisterInfo &MRI = MF.getRegInfo();
2060
56.7k
    bool isLiveIn = MRI.isLiveIn(Reg);
2061
56.7k
    if (!isLiveIn)
2062
56.7k
      MBB.addLiveIn(Reg);
2063
56.7k
2064
56.7k
    // Decide whether we can add a kill flag to the use.
2065
56.7k
    bool CanKill = !isLiveIn;
2066
56.7k
    // Check if any subregister is live-in
2067
56.7k
    if (CanKill) {
2068
510k
      for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); 
++AReg453k
) {
2069
453k
        if (MRI.isLiveIn(*AReg)) {
2070
1
          CanKill = false;
2071
1
          break;
2072
1
        }
2073
453k
      }
2074
56.7k
    }
2075
56.7k
2076
56.7k
    // Do not set a kill flag on values that are also marked as live-in. This
2077
56.7k
    // happens with the @llvm-returnaddress intrinsic and with arguments
2078
56.7k
    // passed in callee saved registers.
2079
56.7k
    // Omitting the kill flags is conservatively correct even if the live-in
2080
56.7k
    // is not used after all.
2081
56.7k
    BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, getKillRegState(CanKill))
2082
56.7k
      .setMIFlag(MachineInstr::FrameSetup);
2083
56.7k
  }
2084
18.3k
2085
18.3k
  // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
2086
18.3k
  // It can be done by spilling XMMs to stack frame.
2087
75.8k
  for (unsigned i = CSI.size(); i != 0; 
--i57.5k
) {
2088
57.5k
    unsigned Reg = CSI[i-1].getReg();
2089
57.5k
    if (X86::GR64RegClass.contains(Reg) || 
X86::GR32RegClass.contains(Reg)10.3k
)
2090
56.7k
      continue;
2091
756
2092
756
    // If this is k-register make sure we lookup via the largest legal type.
2093
756
    MVT VT = MVT::Other;
2094
756
    if (X86::VK16RegClass.contains(Reg))
2095
48
      VT = STI.hasBWI() ? 
MVT::v64i124
:
MVT::v16i124
;
2096
756
2097
756
    // Add the callee-saved register as live-in. It's killed at the spill.
2098
756
    MBB.addLiveIn(Reg);
2099
756
    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2100
756
2101
756
    TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC,
2102
756
                            TRI);
2103
756
    --MI;
2104
756
    MI->setFlag(MachineInstr::FrameSetup);
2105
756
    ++MI;
2106
756
  }
2107
18.3k
2108
18.3k
  return true;
2109
18.3k
}
2110
2111
void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
2112
                                               MachineBasicBlock::iterator MBBI,
2113
63
                                               MachineInstr *CatchRet) const {
2114
63
  // SEH shouldn't use catchret.
2115
63
  assert(!isAsynchronousEHPersonality(classifyEHPersonality(
2116
63
             MBB.getParent()->getFunction().getPersonalityFn())) &&
2117
63
         "SEH should not use CATCHRET");
2118
63
  DebugLoc DL = CatchRet->getDebugLoc();
2119
63
  MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
2120
63
2121
63
  // Fill EAX/RAX with the address of the target block.
2122
63
  if (STI.is64Bit()) {
2123
43
    // LEA64r CatchRetTarget(%rip), %rax
2124
43
    BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
2125
43
        .addReg(X86::RIP)
2126
43
        .addImm(0)
2127
43
        .addReg(0)
2128
43
        .addMBB(CatchRetTarget)
2129
43
        .addReg(0);
2130
43
  } else {
2131
20
    // MOV32ri $CatchRetTarget, %eax
2132
20
    BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
2133
20
        .addMBB(CatchRetTarget);
2134
20
  }
2135
63
2136
63
  // Record that we've taken the address of CatchRetTarget and no longer just
2137
63
  // reference it in a terminator.
2138
63
  CatchRetTarget->setHasAddressTaken();
2139
63
}
2140
2141
bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
2142
                                               MachineBasicBlock::iterator MI,
2143
                                          std::vector<CalleeSavedInfo> &CSI,
2144
21.4k
                                          const TargetRegisterInfo *TRI) const {
2145
21.4k
  if (CSI.empty())
2146
0
    return false;
2147
21.4k
2148
21.4k
  if (MI != MBB.end() && 
isFuncletReturnInstr(*MI)21.4k
&&
STI.isOSWindows()42
) {
2149
42
    // Don't restore CSRs in 32-bit EH funclets. Matches
2150
42
    // spillCalleeSavedRegisters.
2151
42
    if (STI.is32Bit())
2152
31
      return true;
2153
11
    // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
2154
11
    // funclets. emitEpilogue transforms these to normal jumps.
2155
11
    if (MI->getOpcode() == X86::CATCHRET) {
2156
6
      const Function &F = MBB.getParent()->getFunction();
2157
6
      bool IsSEH = isAsynchronousEHPersonality(
2158
6
          classifyEHPersonality(F.getPersonalityFn()));
2159
6
      if (IsSEH)
2160
0
        return true;
2161
21.4k
    }
2162
11
  }
2163
21.4k
2164
21.4k
  DebugLoc DL = MBB.findDebugLoc(MI);
2165
21.4k
2166
21.4k
  // Reload XMMs from stack frame.
2167
89.8k
  for (unsigned i = 0, e = CSI.size(); i != e; 
++i68.4k
) {
2168
68.4k
    unsigned Reg = CSI[i].getReg();
2169
68.4k
    if (X86::GR64RegClass.contains(Reg) ||
2170
68.4k
        
X86::GR32RegClass.contains(Reg)11.7k
)
2171
67.6k
      continue;
2172
776
2173
776
    // If this is k-register make sure we lookup via the largest legal type.
2174
776
    MVT VT = MVT::Other;
2175
776
    if (X86::VK16RegClass.contains(Reg))
2176
48
      VT = STI.hasBWI() ? 
MVT::v64i124
:
MVT::v16i124
;
2177
776
2178
776
    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2179
776
    TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
2180
776
  }
2181
21.4k
2182
21.4k
  // POP GPRs.
2183
21.4k
  unsigned Opc = STI.is64Bit() ? 
X86::POP64r16.6k
:
X86::POP32r4.82k
;
2184
89.8k
  for (unsigned i = 0, e = CSI.size(); i != e; 
++i68.4k
) {
2185
68.4k
    unsigned Reg = CSI[i].getReg();
2186
68.4k
    if (!X86::GR64RegClass.contains(Reg) &&
2187
68.4k
        
!X86::GR32RegClass.contains(Reg)11.7k
)
2188
776
      continue;
2189
67.6k
2190
67.6k
    BuildMI(MBB, MI, DL, TII.get(Opc), Reg)
2191
67.6k
        .setMIFlag(MachineInstr::FrameDestroy);
2192
67.6k
  }
2193
21.4k
  return true;
2194
21.4k
}
2195
2196
void X86FrameLowering::determineCalleeSaves(MachineFunction &MF,
2197
                                            BitVector &SavedRegs,
2198
162k
                                            RegScavenger *RS) const {
2199
162k
  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2200
162k
2201
162k
  // Spill the BasePtr if it's used.
2202
162k
  if (TRI->hasBasePointer(MF)){
2203
84
    unsigned BasePtr = TRI->getBaseRegister();
2204
84
    if (STI.isTarget64BitILP32())
2205
3
      BasePtr = getX86SubSuperRegister(BasePtr, 64);
2206
84
    SavedRegs.set(BasePtr);
2207
84
  }
2208
162k
}
2209
2210
static bool
2211
201
HasNestArgument(const MachineFunction *MF) {
2212
201
  const Function &F = MF->getFunction();
2213
201
  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
2214
279
       I != E; 
I++78
) {
2215
101
    if (I->hasNestAttr() && 
!I->use_empty()46
)
2216
23
      return true;
2217
101
  }
2218
201
  
return false178
;
2219
201
}
2220
2221
/// GetScratchRegister - Get a temp register for performing work in the
2222
/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
2223
/// and the properties of the function either one or two registers will be
2224
/// needed. Set primary to true for the first register, false for the second.
2225
static unsigned
2226
218
GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) {
2227
218
  CallingConv::ID CallingConvention = MF.getFunction().getCallingConv();
2228
218
2229
218
  // Erlang stuff.
2230
218
  if (CallingConvention == CallingConv::HiPE) {
2231
2
    if (Is64Bit)
2232
1
      return Primary ? X86::R14 : 
X86::R130
;
2233
1
    else
2234
1
      return Primary ? X86::EBX : 
X86::EDI0
;
2235
216
  }
2236
216
2237
216
  if (Is64Bit) {
2238
124
    if (IsLP64)
2239
104
      return Primary ? X86::R11 : 
X86::R120
;
2240
20
    else
2241
20
      return Primary ? X86::R11D : 
X86::R12D0
;
2242
92
  }
2243
92
2244
92
  bool IsNested = HasNestArgument(&MF);
2245
92
2246
92
  if (CallingConvention == CallingConv::X86_FastCall ||
2247
92
      CallingConvention == CallingConv::Fast) {
2248
30
    if (IsNested)
2249
0
      report_fatal_error("Segmented stacks does not support fastcall with "
2250
0
                         "nested function.");
2251
30
    return Primary ? 
X86::EAX26
:
X86::ECX4
;
2252
30
  }
2253
62
  if (IsNested)
2254
10
    return Primary ? X86::EDX : 
X86::EAX0
;
2255
52
  return Primary ? 
X86::ECX50
:
X86::EAX2
;
2256
52
}
2257
2258
// The stack limit in the TCB is set to this many bytes above the actual stack
2259
// limit.
2260
static const uint64_t kSplitStackAvailable = 256;
2261
2262
void X86FrameLowering::adjustForSegmentedStacks(
2263
200
    MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2264
200
  MachineFrameInfo &MFI = MF.getFrameInfo();
2265
200
  uint64_t StackSize;
2266
200
  unsigned TlsReg, TlsOffset;
2267
200
  DebugLoc DL;
2268
200
2269
200
  // To support shrink-wrapping we would need to insert the new blocks
2270
200
  // at the right place and update the branches to PrologueMBB.
2271
200
  assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
2272
200
2273
200
  unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
2274
200
  assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
2275
200
         "Scratch register is live-in");
2276
200
2277
200
  if (MF.getFunction().isVarArg())
2278
0
    report_fatal_error("Segmented stacks do not support vararg functions.");
2279
200
  if (!STI.isTargetLinux() && 
!STI.isTargetDarwin()129
&&
!STI.isTargetWin32()92
&&
2280
200
      
!STI.isTargetWin64()74
&&
!STI.isTargetFreeBSD()56
&&
2281
200
      
!STI.isTargetDragonFly()37
)
2282
1
    report_fatal_error("Segmented stacks not supported on this platform.");
2283
199
2284
199
  // Eventually StackSize will be calculated by a link-time pass; which will
2285
199
  // also decide whether checking code needs to be injected into this particular
2286
199
  // prologue.
2287
199
  StackSize = MFI.getStackSize();
2288
199
2289
199
  // Do not generate a prologue for leaf functions with a stack of size zero.
2290
199
  // For non-leaf functions we have to allow for the possibility that the
2291
199
  // callis to a non-split function, as in PR37807. This function could also
2292
199
  // take the address of a non-split function. When the linker tries to adjust
2293
199
  // its non-existent prologue, it would fail with an error. Mark the object
2294
199
  // file so that such failures are not errors. See this Go language bug-report
2295
199
  // https://go-review.googlesource.com/c/go/+/148819/
2296
199
  if (StackSize == 0 && 
!MFI.hasTailCall()44
) {
2297
23
    MF.getMMI().setHasNosplitStack(true);
2298
23
    return;
2299
23
  }
2300
176
2301
176
  MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
2302
176
  MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
2303
176
  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
2304
176
  bool IsNested = false;
2305
176
2306
176
  // We need to know if the function has a nest argument only in 64 bit mode.
2307
176
  if (Is64Bit)
2308
109
    IsNested = HasNestArgument(&MF);
2309
176
2310
176
  // The MOV R10, RAX needs to be in a different block, since the RET we emit in
2311
176
  // allocMBB needs to be last (terminating) instruction.
2312
176
2313
176
  for (const auto &LI : PrologueMBB.liveins()) {
2314
101
    allocMBB->addLiveIn(LI);
2315
101
    checkMBB->addLiveIn(LI);
2316
101
  }
2317
176
2318
176
  if (IsNested)
2319
13
    allocMBB->addLiveIn(IsLP64 ? 
X86::R1011
:
X86::R10D2
);
2320
176
2321
176
  MF.push_front(allocMBB);
2322
176
  MF.push_front(checkMBB);
2323
176
2324
176
  // When the frame size is less than 256 we just compare the stack
2325
176
  // boundary directly to the value of the stack pointer, per gcc.
2326
176
  bool CompareStackPointer = StackSize < kSplitStackAvailable;
2327
176
2328
176
  // Read the limit off the current stacklet off the stack_guard location.
2329
176
  if (Is64Bit) {
2330
109
    if (STI.isTargetLinux()) {
2331
44
      TlsReg = X86::FS;
2332
44
      TlsOffset = IsLP64 ? 
0x7026
:
0x4018
;
2333
65
    } else if (STI.isTargetDarwin()) {
2334
17
      TlsReg = X86::GS;
2335
17
      TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
2336
48
    } else if (STI.isTargetWin64()) {
2337
16
      TlsReg = X86::GS;
2338
16
      TlsOffset = 0x28; // pvArbitrary, reserved for application use
2339
32
    } else if (STI.isTargetFreeBSD()) {
2340
16
      TlsReg = X86::FS;
2341
16
      TlsOffset = 0x18;
2342
16
    } else if (STI.isTargetDragonFly()) {
2343
16
      TlsReg = X86::FS;
2344
16
      TlsOffset = 0x20; // use tls_tcb.tcb_segstack
2345
16
    } else {
2346
0
      report_fatal_error("Segmented stacks not supported on this platform.");
2347
0
    }
2348
109
2349
109
    if (CompareStackPointer)
2350
70
      ScratchReg = IsLP64 ? 
X86::RSP58
:
X86::ESP12
;
2351
39
    else
2352
39
      BuildMI(checkMBB, DL, TII.get(IsLP64 ? 
X86::LEA64r33
:
X86::LEA64_32r6
), ScratchReg).addReg(X86::RSP)
2353
39
        .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
2354
109
2355
109
    BuildMI(checkMBB, DL, TII.get(IsLP64 ? 
X86::CMP64rm91
:
X86::CMP32rm18
)).addReg(ScratchReg)
2356
109
      .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
2357
109
  } else {
2358
67
    if (STI.isTargetLinux()) {
2359
18
      TlsReg = X86::GS;
2360
18
      TlsOffset = 0x30;
2361
49
    } else if (STI.isTargetDarwin()) {
2362
16
      TlsReg = X86::GS;
2363
16
      TlsOffset = 0x48 + 90*4;
2364
33
    } else if (STI.isTargetWin32()) {
2365
16
      TlsReg = X86::FS;
2366
16
      TlsOffset = 0x14; // pvArbitrary, reserved for application use
2367
17
    } else if (STI.isTargetDragonFly()) {
2368
16
      TlsReg = X86::FS;
2369
16
      TlsOffset = 0x10; // use tls_tcb.tcb_segstack
2370
16
    } else 
if (1
STI.isTargetFreeBSD()1
) {
2371
1
      report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
2372
1
    } else {
2373
0
      report_fatal_error("Segmented stacks not supported on this platform.");
2374
0
    }
2375
66
2376
66
    if (CompareStackPointer)
2377
42
      ScratchReg = X86::ESP;
2378
24
    else
2379
24
      BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
2380
24
        .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
2381
66
2382
66
    if (STI.isTargetLinux() || 
STI.isTargetWin32()48
||
STI.isTargetWin64()32
||
2383
66
        
STI.isTargetDragonFly()32
) {
2384
50
      BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
2385
50
        .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
2386
50
    } else 
if (16
STI.isTargetDarwin()16
) {
2387
16
2388
16
      // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
2389
16
      unsigned ScratchReg2;
2390
16
      bool SaveScratch2;
2391
16
      if (CompareStackPointer) {
2392
10
        // The primary scratch register is available for holding the TLS offset.
2393
10
        ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
2394
10
        SaveScratch2 = false;
2395
10
      } else {
2396
6
        // Need to use a second register to hold the TLS offset
2397
6
        ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
2398
6
2399
6
        // Unfortunately, with fastcc the second scratch register may hold an
2400
6
        // argument.
2401
6
        SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
2402
6
      }
2403
16
2404
16
      // If Scratch2 is live-in then it needs to be saved.
2405
16
      assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
2406
16
             "Scratch register is live-in and not saved");
2407
16
2408
16
      if (SaveScratch2)
2409
2
        BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
2410
2
          .addReg(ScratchReg2, RegState::Kill);
2411
16
2412
16
      BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
2413
16
        .addImm(TlsOffset);
2414
16
      BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
2415
16
        .addReg(ScratchReg)
2416
16
        .addReg(ScratchReg2).addImm(1).addReg(0)
2417
16
        .addImm(0)
2418
16
        .addReg(TlsReg);
2419
16
2420
16
      if (SaveScratch2)
2421
2
        BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
2422
16
    }
2423
66
  }
2424
176
2425
176
  // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
2426
176
  // It jumps to normal execution of the function body.
2427
176
  BuildMI(checkMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_A);
2428
175
2429
175
  // On 32 bit we first push the arguments size and then the frame size. On 64
2430
175
  // bit, we pass the stack frame size in r10 and the argument size in r11.
2431
175
  if (Is64Bit) {
2432
109
    // Functions with nested arguments use R10, so it needs to be saved across
2433
109
    // the call to _morestack
2434
109
2435
109
    const unsigned RegAX = IsLP64 ? 
X86::RAX91
:
X86::EAX18
;
2436
109
    const unsigned Reg10 = IsLP64 ? 
X86::R1091
:
X86::R10D18
;
2437
109
    const unsigned Reg11 = IsLP64 ? 
X86::R1191
:
X86::R11D18
;
2438
109
    const unsigned MOVrr = IsLP64 ? 
X86::MOV64rr91
:
X86::MOV32rr18
;
2439
109
    const unsigned MOVri = IsLP64 ? 
X86::MOV64ri91
:
X86::MOV32ri18
;
2440
109
2441
109
    if (IsNested)
2442
13
      BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
2443
109
2444
109
    BuildMI(allocMBB, DL, TII.get(MOVri), Reg10)
2445
109
      .addImm(StackSize);
2446
109
    BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
2447
109
      .addImm(X86FI->getArgumentStackSize());
2448
109
  } else {
2449
66
    BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
2450
66
      .addImm(X86FI->getArgumentStackSize());
2451
66
    BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
2452
66
      .addImm(StackSize);
2453
66
  }
2454
175
2455
175
  // __morestack is in libgcc
2456
175
  if (Is64Bit && 
MF.getTarget().getCodeModel() == CodeModel::Large109
) {
2457
8
    // Under the large code model, we cannot assume that __morestack lives
2458
8
    // within 2^31 bytes of the call site, so we cannot use pc-relative
2459
8
    // addressing. We cannot perform the call via a temporary register,
2460
8
    // as the rax register may be used to store the static chain, and all
2461
8
    // other suitable registers may be either callee-save or used for
2462
8
    // parameter passing. We cannot use the stack at this point either
2463
8
    // because __morestack manipulates the stack directly.
2464
8
    //
2465
8
    // To avoid these issues, perform an indirect call via a read-only memory
2466
8
    // location containing the address.
2467
8
    //
2468
8
    // This solution is not perfect, as it assumes that the .rodata section
2469
8
    // is laid out within 2^31 bytes of each function body, but this seems
2470
8
    // to be sufficient for JIT.
2471
8
    // FIXME: Add retpoline support and remove the error here..
2472
8
    if (STI.useRetpolineIndirectCalls())
2473
0
      report_fatal_error("Emitting morestack calls on 64-bit with the large "
2474
0
                         "code model and retpoline not yet implemented.");
2475
8
    BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
2476
8
        .addReg(X86::RIP)
2477
8
        .addImm(0)
2478
8
        .addReg(0)
2479
8
        .addExternalSymbol("__morestack_addr")
2480
8
        .addReg(0);
2481
8
    MF.getMMI().setUsesMorestackAddr(true);
2482
167
  } else {
2483
167
    if (Is64Bit)
2484
101
      BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
2485
101
        .addExternalSymbol("__morestack");
2486
66
    else
2487
66
      BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
2488
66
        .addExternalSymbol("__morestack");
2489
167
  }
2490
175
2491
175
  if (IsNested)
2492
13
    BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
2493
162
  else
2494
162
    BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
2495
175
2496
175
  allocMBB->addSuccessor(&PrologueMBB);
2497
175
2498
175
  checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
2499
175
  checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
2500
175
2501
#ifdef EXPENSIVE_CHECKS
2502
  MF.verify();
2503
#endif
2504
}
2505
2506
/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
2507
/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
2508
/// to fields it needs, through a named metadata node "hipe.literals" containing
2509
/// name-value pairs.
2510
static unsigned getHiPELiteral(
2511
15
    NamedMDNode *HiPELiteralsMD, const StringRef LiteralName) {
2512
34
  for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; 
++i19
) {
2513
34
    MDNode *Node = HiPELiteralsMD->getOperand(i);
2514
34
    if (Node->getNumOperands() != 2) 
continue0
;
2515
34
    MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
2516
34
    ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
2517
34
    if (!NodeName || !NodeVal) 
continue0
;
2518
34
    ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
2519
34
    if (ValConst && NodeName->getString() == LiteralName) {
2520
15
      return ValConst->getZExtValue();
2521
15
    }
2522
34
  }
2523
15
2524
15
  report_fatal_error("HiPE literal " + LiteralName
2525
0
                     + " required but not provided");
2526
15
}
2527
2528
/// Erlang programs may need a special prologue to handle the stack size they
2529
/// might need at runtime. That is because Erlang/OTP does not implement a C
2530
/// stack but uses a custom implementation of hybrid stack/heap architecture.
2531
/// (for more information see Eric Stenman's Ph.D. thesis:
2532
/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
2533
///
2534
/// CheckStack:
2535
///       temp0 = sp - MaxStack
2536
///       if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
2537
/// OldStart:
2538
///       ...
2539
/// IncStack:
2540
///       call inc_stack   # doubles the stack space
2541
///       temp0 = sp - MaxStack
2542
///       if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
2543
void X86FrameLowering::adjustForHiPEPrologue(
2544
13
    MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2545
13
  MachineFrameInfo &MFI = MF.getFrameInfo();
2546
13
  DebugLoc DL;
2547
13
2548
13
  // To support shrink-wrapping we would need to insert the new blocks
2549
13
  // at the right place and update the branches to PrologueMBB.
2550
13
  assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
2551
13
2552
13
  // HiPE-specific values
2553
13
  NamedMDNode *HiPELiteralsMD = MF.getMMI().getModule()
2554
13
    ->getNamedMetadata("hipe.literals");
2555
13
  if (!HiPELiteralsMD)
2556
0
    report_fatal_error(
2557
0
        "Can't generate HiPE prologue without runtime parameters");
2558
13
  const unsigned HipeLeafWords
2559
13
    = getHiPELiteral(HiPELiteralsMD,
2560
13
                     Is64Bit ? 
"AMD64_LEAF_WORDS"6
:
"X86_LEAF_WORDS"7
);
2561
13
  const unsigned CCRegisteredArgs = Is64Bit ? 
66
:
57
;
2562
13
  const unsigned Guaranteed = HipeLeafWords * SlotSize;
2563
13
  unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs ?
2564
13
                            
MF.getFunction().arg_size() - CCRegisteredArgs0
: 0;
2565
13
  unsigned MaxStack = MFI.getStackSize() + CallerStkArity*SlotSize + SlotSize;
2566
13
2567
13
  assert(STI.isTargetLinux() &&
2568
13
         "HiPE prologue is only supported on Linux operating systems.");
2569
13
2570
13
  // Compute the largest caller's frame that is needed to fit the callees'
2571
13
  // frames. This 'MaxStack' is computed from:
2572
13
  //
2573
13
  // a) the fixed frame size, which is the space needed for all spilled temps,
2574
13
  // b) outgoing on-stack parameter areas, and
2575
13
  // c) the minimum stack space this function needs to make available for the
2576
13
  //    functions it calls (a tunable ABI property).
2577
13
  if (MFI.hasCalls()) {
2578
3
    unsigned MoreStackForCalls = 0;
2579
3
2580
3
    for (auto &MBB : MF) {
2581
35
      for (auto &MI : MBB) {
2582
35
        if (!MI.isCall())
2583
32
          continue;
2584
3
2585
3
        // Get callee operand.
2586
3
        const MachineOperand &MO = MI.getOperand(0);
2587
3
2588
3
        // Only take account of global function calls (no closures etc.).
2589
3
        if (!MO.isGlobal())
2590
0
          continue;
2591
3
2592
3
        const Function *F = dyn_cast<Function>(MO.getGlobal());
2593
3
        if (!F)
2594
0
          continue;
2595
3
2596
3
        // Do not update 'MaxStack' for primitive and built-in functions
2597
3
        // (encoded with names either starting with "erlang."/"bif_" or not
2598
3
        // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
2599
3
        // "_", such as the BIF "suspend_0") as they are executed on another
2600
3
        // stack.
2601
3
        if (F->getName().find("erlang.") != StringRef::npos ||
2602
3
            F->getName().find("bif_") != StringRef::npos ||
2603
3
            F->getName().find_first_of("._") == StringRef::npos)
2604
1
          continue;
2605
2
2606
2
        unsigned CalleeStkArity =
2607
2
          F->arg_size() > CCRegisteredArgs ? 
F->arg_size()-CCRegisteredArgs0
: 0;
2608
2
        if (HipeLeafWords - 1 > CalleeStkArity)
2609
2
          MoreStackForCalls = std::max(MoreStackForCalls,
2610
2
                               (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
2611
2
      }
2612
3
    }
2613
3
    MaxStack += MoreStackForCalls;
2614
3
  }
2615
13
2616
13
  // If the stack frame needed is larger than the guaranteed then runtime checks
2617
13
  // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
2618
13
  if (MaxStack > Guaranteed) {
2619
2
    MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
2620
2
    MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
2621
2
2622
4
    for (const auto &LI : PrologueMBB.liveins()) {
2623
4
      stackCheckMBB->addLiveIn(LI);
2624
4
      incStackMBB->addLiveIn(LI);
2625
4
    }
2626
2
2627
2
    MF.push_front(incStackMBB);
2628
2
    MF.push_front(stackCheckMBB);
2629
2
2630
2
    unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
2631
2
    unsigned LEAop, CMPop, CALLop;
2632
2
    SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
2633
2
    if (Is64Bit) {
2634
1
      SPReg = X86::RSP;
2635
1
      PReg  = X86::RBP;
2636
1
      LEAop = X86::LEA64r;
2637
1
      CMPop = X86::CMP64rm;
2638
1
      CALLop = X86::CALL64pcrel32;
2639
1
    } else {
2640
1
      SPReg = X86::ESP;
2641
1
      PReg  = X86::EBP;
2642
1
      LEAop = X86::LEA32r;
2643
1
      CMPop = X86::CMP32rm;
2644
1
      CALLop = X86::CALLpcrel32;
2645
1
    }
2646
2
2647
2
    ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
2648
2
    assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
2649
2
           "HiPE prologue scratch register is live-in");
2650
2
2651
2
    // Create new MBB for StackCheck:
2652
2
    addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
2653
2
                 SPReg, false, -MaxStack);
2654
2
    // SPLimitOffset is in a fixed heap location (pointed by BP).
2655
2
    addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
2656
2
                 .addReg(ScratchReg), PReg, false, SPLimitOffset);
2657
2
    BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_AE);
2658
2
2659
2
    // Create new MBB for IncStack:
2660
2
    BuildMI(incStackMBB, DL, TII.get(CALLop)).
2661
2
      addExternalSymbol("inc_stack_0");
2662
2
    addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
2663
2
                 SPReg, false, -MaxStack);
2664
2
    addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
2665
2
                 .addReg(ScratchReg), PReg, false, SPLimitOffset);
2666
2
    BuildMI(incStackMBB, DL, TII.get(X86::JCC_1)).addMBB(incStackMBB).addImm(X86::COND_LE);
2667
2
2668
2
    stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
2669
2
    stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
2670
2
    incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
2671
2
    incStackMBB->addSuccessor(incStackMBB, {1, 100});
2672
2
  }
2673
#ifdef EXPENSIVE_CHECKS
2674
  MF.verify();
2675
#endif
2676
}
2677
2678
bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
2679
                                           MachineBasicBlock::iterator MBBI,
2680
                                           const DebugLoc &DL,
2681
272
                                           int Offset) const {
2682
272
2683
272
  if (Offset <= 0)
2684
91
    return false;
2685
181
2686
181
  if (Offset % SlotSize)
2687
0
    return false;
2688
181
2689
181
  int NumPops = Offset / SlotSize;
2690
181
  // This is only worth it if we have at most 2 pops.
2691
181
  if (NumPops != 1 && 
NumPops != 2178
)
2692
163
    return false;
2693
18
2694
18
  // Handle only the trivial case where the adjustment directly follows
2695
18
  // a call. This is the most common one, anyway.
2696
18
  if (MBBI == MBB.begin())
2697
0
    return false;
2698
18
  MachineBasicBlock::iterator Prev = std::prev(MBBI);
2699
18
  if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
2700
0
    return false;
2701
18
2702
18
  unsigned Regs[2];
2703
18
  unsigned FoundRegs = 0;
2704
18
2705
18
  auto &MRI = MBB.getParent()->getRegInfo();
2706
18
  auto RegMask = Prev->getOperand(1);
2707
18
2708
18
  auto &RegClass =
2709
18
      Is64Bit ? 
X86::GR64_NOREX_NOSPRegClass8
:
X86::GR32_NOREX_NOSPRegClass10
;
2710
18
  // Try to find up to NumPops free registers.
2711
44
  for (auto Candidate : RegClass) {
2712
44
2713
44
    // Poor man's liveness:
2714
44
    // Since we're immediately after a call, any register that is clobbered
2715
44
    // by the call and not defined by it can be considered dead.
2716
44
    if (!RegMask.clobbersPhysReg(Candidate))
2717
4
      continue;
2718
40
2719
40
    // Don't clobber reserved registers
2720
40
    if (MRI.isReserved(Candidate))
2721
1
      continue;
2722
39
2723
39
    bool IsDef = false;
2724
318
    for (const MachineOperand &MO : Prev->implicit_operands()) {
2725
318
      if (MO.isReg() && 
MO.isDef()279
&&
2726
318
          
TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)99
) {
2727
7
        IsDef = true;
2728
7
        break;
2729
7
      }
2730
318
    }
2731
39
2732
39
    if (IsDef)
2733
7
      continue;
2734
32
2735
32
    Regs[FoundRegs++] = Candidate;
2736
32
    if (FoundRegs == (unsigned)NumPops)
2737
17
      break;
2738
32
  }
2739
18
2740
18
  if (FoundRegs == 0)
2741
0
    return false;
2742
18
2743
18
  // If we found only one free register, but need two, reuse the same one twice.
2744
19
  
while (18
FoundRegs < (unsigned)NumPops)
2745
1
    Regs[FoundRegs++] = Regs[0];
2746
18
2747
51
  for (int i = 0; i < NumPops; 
++i33
)
2748
33
    BuildMI(MBB, MBBI, DL,
2749
33
            TII.get(STI.is64Bit() ? 
X86::POP64r16
:
X86::POP32r17
), Regs[i]);
2750
18
2751
18
  return true;
2752
18
}
2753
2754
MachineBasicBlock::iterator X86FrameLowering::
2755
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2756
272k
                              MachineBasicBlock::iterator I) const {
2757
272k
  bool reserveCallFrame = hasReservedCallFrame(MF);
2758
272k
  unsigned Opcode = I->getOpcode();
2759
272k
  bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
2760
272k
  DebugLoc DL = I->getDebugLoc();
2761
272k
  uint64_t Amount = !reserveCallFrame ? 
TII.getFrameSize(*I)68.9k
:
0203k
;
2762
272k
  uint64_t InternalAmt = (isDestroy || 
Amount136k
) ?
TII.getFrameAdjustment(*I)150k
:
0122k
;
2763
272k
  I = MBB.erase(I);
2764
272k
  auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
2765
272k
2766
272k
  if (!reserveCallFrame) {
2767
68.9k
    // If the stack pointer can be changed after prologue, turn the
2768
68.9k
    // adjcallstackup instruction into a 'sub ESP, <amt>' and the
2769
68.9k
    // adjcallstackdown instruction into 'add ESP, <amt>'
2770
68.9k
2771
68.9k
    // We need to keep the stack aligned properly.  To do this, we round the
2772
68.9k
    // amount of space needed for the outgoing arguments up to the next
2773
68.9k
    // alignment boundary.
2774
68.9k
    unsigned StackAlign = getStackAlignment();
2775
68.9k
    Amount = alignTo(Amount, StackAlign);
2776
68.9k
2777
68.9k
    MachineModuleInfo &MMI = MF.getMMI();
2778
68.9k
    const Function &F = MF.getFunction();
2779
68.9k
    bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2780
68.9k
    bool DwarfCFI = !WindowsCFI &&
2781
68.9k
                    
(68.8k
MMI.hasDebugInfo()68.8k
||
F.needsUnwindTableEntry()4.75k
);
2782
68.9k
2783
68.9k
    // If we have any exception handlers in this function, and we adjust
2784
68.9k
    // the SP before calls, we may need to indicate this to the unwinder
2785
68.9k
    // using GNU_ARGS_SIZE. Note that this may be necessary even when
2786
68.9k
    // Amount == 0, because the preceding function may have set a non-0
2787
68.9k
    // GNU_ARGS_SIZE.
2788
68.9k
    // TODO: We don't need to reset this between subsequent functions,
2789
68.9k
    // if it didn't change.
2790
68.9k
    bool HasDwarfEHHandlers = !WindowsCFI && 
!MF.getLandingPads().empty()68.8k
;
2791
68.9k
2792
68.9k
    if (HasDwarfEHHandlers && 
!isDestroy130
&&
2793
68.9k
        
MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences()65
)
2794
65
      BuildCFI(MBB, InsertPos, DL,
2795
65
               MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
2796
68.9k
2797
68.9k
    if (Amount == 0)
2798
40.3k
      return I;
2799
28.6k
2800
28.6k
    // Factor out the amount that gets handled inside the sequence
2801
28.6k
    // (Pushes of argument for frame setup, callee pops for frame destroy)
2802
28.6k
    Amount -= InternalAmt;
2803
28.6k
2804
28.6k
    // TODO: This is needed only if we require precise CFA.
2805
28.6k
    // If this is a callee-pop calling convention, emit a CFA adjust for
2806
28.6k
    // the amount the callee popped.
2807
28.6k
    if (isDestroy && 
InternalAmt14.3k
&&
DwarfCFI126
&&
!hasFP(MF)95
)
2808
63
      BuildCFI(MBB, InsertPos, DL,
2809
63
               MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
2810
28.6k
2811
28.6k
    // Add Amount to SP to destroy a frame, or subtract to setup.
2812
28.6k
    int64_t StackAdjustment = isDestroy ? 
Amount14.3k
:
-Amount14.2k
;
2813
28.6k
2814
28.6k
    if (StackAdjustment) {
2815
24.8k
      // Merge with any previous or following adjustment instruction. Note: the
2816
24.8k
      // instructions merged with here do not have CFI, so their stack
2817
24.8k
      // adjustments do not feed into CfaAdjustment.
2818
24.8k
      StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
2819
24.8k
      StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
2820
24.8k
2821
24.8k
      if (StackAdjustment) {
2822
24.8k
        if (!(F.hasMinSize() &&
2823
24.8k
              
adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)272
))
2824
24.8k
          BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
2825
24.8k
                               /*InEpilogue=*/false);
2826
24.8k
      }
2827
24.8k
    }
2828
28.6k
2829
28.6k
    if (DwarfCFI && 
!hasFP(MF)27.2k
) {
2830
944
      // If we don't have FP, but need to generate unwind information,
2831
944
      // we need to set the correct CFA offset after the stack adjustment.
2832
944
      // How much we adjust the CFA offset depends on whether we're emitting
2833
944
      // CFI only for EH purposes or for debugging. EH only requires the CFA
2834
944
      // offset to be correct at each call site, while for debugging we want
2835
944
      // it to be more precise.
2836
944
2837
944
      int64_t CfaAdjustment = -StackAdjustment;
2838
944
      // TODO: When not using precise CFA, we also need to adjust for the
2839
944
      // InternalAmt here.
2840
944
      if (CfaAdjustment) {
2841
618
        BuildCFI(MBB, InsertPos, DL,
2842
618
                 MCCFIInstruction::createAdjustCfaOffset(nullptr,
2843
618
                                                         CfaAdjustment));
2844
618
      }
2845
944
    }
2846
28.6k
2847
28.6k
    return I;
2848
28.6k
  }
2849
203k
2850
203k
  if (isDestroy && 
InternalAmt101k
) {
2851
79
    // If we are performing frame pointer elimination and if the callee pops
2852
79
    // something off the stack pointer, add it back.  We do this until we have
2853
79
    // more advanced stack pointer tracking ability.
2854
79
    // We are not tracking the stack pointer adjustment by the callee, so make
2855
79
    // sure we restore the stack pointer immediately after the call, there may
2856
79
    // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
2857
79
    MachineBasicBlock::iterator CI = I;
2858
79
    MachineBasicBlock::iterator B = MBB.begin();
2859
79
    while (CI != B && !std::prev(CI)->isCall())
2860
0
      --CI;
2861
79
    BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
2862
79
  }
2863
203k
2864
203k
  return I;
2865
203k
}
2866
2867
482
bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
2868
482
  assert(MBB.getParent() && "Block is not attached to a function!");
2869
482
  const MachineFunction &MF = *MBB.getParent();
2870
482
  return !TRI->needsStackRealignment(MF) || 
!MBB.isLiveIn(X86::EFLAGS)4
;
2871
482
}
2872
2873
482
bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
2874
482
  assert(MBB.getParent() && "Block is not attached to a function!");
2875
482
2876
482
  // Win64 has strict requirements in terms of epilogue and we are
2877
482
  // not taking a chance at messing with them.
2878
482
  // I.e., unless this block is already an exit block, we can't use
2879
482
  // it as an epilogue.
2880
482
  if (STI.isTargetWin64() && 
!MBB.succ_empty()4
&&
!MBB.isReturnBlock()1
)
2881
1
    return false;
2882
481
2883
481
  if (canUseLEAForSPInEpilogue(*MBB.getParent()))
2884
479
    return true;
2885
2
2886
2
  // If we cannot use LEA to adjust SP, we may need to use ADD, which
2887
2
  // clobbers the EFLAGS. Check that we do not need to preserve it,
2888
2
  // otherwise, conservatively assume this is not
2889
2
  // safe to insert the epilogue here.
2890
2
  return !flagsNeedToBePreservedBeforeTheTerminators(MBB);
2891
2
}
2892
2893
135k
bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2894
135k
  // If we may need to emit frameless compact unwind information, give
2895
135k
  // up as this is currently broken: PR25614.
2896
135k
  return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || 
hasFP(MF)75.2k
) &&
2897
135k
         // The lowering of segmented stack and HiPE only support entry blocks
2898
135k
         // as prologue blocks: PR26107.
2899
135k
         // This limitation may be lifted if we fix:
2900
135k
         // - adjustForSegmentedStacks
2901
135k
         // - adjustForHiPEPrologue
2902
135k
         
MF.getFunction().getCallingConv() != CallingConv::HiPE60.9k
&&
2903
135k
         
!MF.shouldSplitStack()60.9k
;
2904
135k
}
2905
2906
MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
2907
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
2908
69
    const DebugLoc &DL, bool RestoreSP) const {
2909
69
  assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
2910
69
  assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
2911
69
  assert(STI.is32Bit() && !Uses64BitFramePtr &&
2912
69
         "restoring EBP/ESI on non-32-bit target");
2913
69
2914
69
  MachineFunction &MF = *MBB.getParent();
2915
69
  unsigned FramePtr = TRI->getFrameRegister(MF);
2916
69
  unsigned BasePtr = TRI->getBaseRegister();
2917
69
  WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
2918
69
  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
2919
69
  MachineFrameInfo &MFI = MF.getFrameInfo();
2920
69
2921
69
  // FIXME: Don't set FrameSetup flag in catchret case.
2922
69
2923
69
  int FI = FuncInfo.EHRegNodeFrameIndex;
2924
69
  int EHRegSize = MFI.getObjectSize(FI);
2925
69
2926
69
  if (RestoreSP) {
2927
17
    // MOV32rm -EHRegSize(%ebp), %esp
2928
17
    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
2929
17
                 X86::EBP, true, -EHRegSize)
2930
17
        .setMIFlag(MachineInstr::FrameSetup);
2931
17
  }
2932
69
2933
69
  unsigned UsedReg;
2934
69
  int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg);
2935
69
  int EndOffset = -EHRegOffset - EHRegSize;
2936
69
  FuncInfo.EHRegNodeEndOffset = EndOffset;
2937
69
2938
69
  if (UsedReg == FramePtr) {
2939
65
    // ADD $offset, %ebp
2940
65
    unsigned ADDri = getADDriOpcode(false, EndOffset);
2941
65
    BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
2942
65
        .addReg(FramePtr)
2943
65
        .addImm(EndOffset)
2944
65
        .setMIFlag(MachineInstr::FrameSetup)
2945
65
        ->getOperand(3)
2946
65
        .setIsDead();
2947
65
    assert(EndOffset >= 0 &&
2948
65
           "end of registration object above normal EBP position!");
2949
65
  } else 
if (4
UsedReg == BasePtr4
) {
2950
4
    // LEA offset(%ebp), %esi
2951
4
    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
2952
4
                 FramePtr, false, EndOffset)
2953
4
        .setMIFlag(MachineInstr::FrameSetup);
2954
4
    // MOV32rm SavedEBPOffset(%esi), %ebp
2955
4
    assert(X86FI->getHasSEHFramePtrSave());
2956
4
    int Offset =
2957
4
        getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg);
2958
4
    assert(UsedReg == BasePtr);
2959
4
    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
2960
4
                 UsedReg, true, Offset)
2961
4
        .setMIFlag(MachineInstr::FrameSetup);
2962
4
  } else {
2963
0
    llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
2964
0
  }
2965
69
  return MBBI;
2966
69
}
2967
2968
66.7k
int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
2969
66.7k
  return TRI->getSlotSize();
2970
66.7k
}
2971
2972
unsigned X86FrameLowering::getInitialCFARegister(const MachineFunction &MF)
2973
66.7k
    const {
2974
66.7k
  return TRI->getDwarfRegNum(StackPtr, true);
2975
66.7k
}
2976
2977
namespace {
2978
// Struct used by orderFrameObjects to help sort the stack objects.
2979
struct X86FrameSortingObject {
2980
  bool IsValid = false;         // true if we care about this Object.
2981
  unsigned ObjectIndex = 0;     // Index of Object into MFI list.
2982
  unsigned ObjectSize = 0;      // Size of Object in bytes.
2983
  unsigned ObjectAlignment = 1; // Alignment of Object in bytes.
2984
  unsigned ObjectNumUses = 0;   // Object static number of uses.
2985
};
2986
2987
// The comparison function we use for std::sort to order our local
2988
// stack symbols. The current algorithm is to use an estimated
2989
// "density". This takes into consideration the size and number of
2990
// uses each object has in order to roughly minimize code size.
2991
// So, for example, an object of size 16B that is referenced 5 times
2992
// will get higher priority than 4 4B objects referenced 1 time each.
2993
// It's not perfect and we may be able to squeeze a few more bytes out of
2994
// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
2995
// fringe end can have special consideration, given their size is less
2996
// important, etc.), but the algorithmic complexity grows too much to be
2997
// worth the extra gains we get. This gets us pretty close.
2998
// The final order leaves us with objects with highest priority going
2999
// at the end of our list.
3000
struct X86FrameSortingComparator {
3001
  inline bool operator()(const X86FrameSortingObject &A,
3002
96.9k
                         const X86FrameSortingObject &B) {
3003
96.9k
    uint64_t DensityAScaled, DensityBScaled;
3004
96.9k
3005
96.9k
    // For consistency in our comparison, all invalid objects are placed
3006
96.9k
    // at the end. This also allows us to stop walking when we hit the
3007
96.9k
    // first invalid item after it's all sorted.
3008
96.9k
    if (!A.IsValid)
3009
9.84k
      return false;
3010
87.0k
    if (!B.IsValid)
3011
20.1k
      return true;
3012
66.9k
3013
66.9k
    // The density is calculated by doing :
3014
66.9k
    //     (double)DensityA = A.ObjectNumUses / A.ObjectSize
3015
66.9k
    //     (double)DensityB = B.ObjectNumUses / B.ObjectSize
3016
66.9k
    // Since this approach may cause inconsistencies in
3017
66.9k
    // the floating point <, >, == comparisons, depending on the floating
3018
66.9k
    // point model with which the compiler was built, we're going
3019
66.9k
    // to scale both sides by multiplying with
3020
66.9k
    // A.ObjectSize * B.ObjectSize. This ends up factoring away
3021
66.9k
    // the division and, with it, the need for any floating point
3022
66.9k
    // arithmetic.
3023
66.9k
    DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
3024
66.9k
      static_cast<uint64_t>(B.ObjectSize);
3025
66.9k
    DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
3026
66.9k
      static_cast<uint64_t>(A.ObjectSize);
3027
66.9k
3028
66.9k
    // If the two densities are equal, prioritize highest alignment
3029
66.9k
    // objects. This allows for similar alignment objects
3030
66.9k
    // to be packed together (given the same density).
3031
66.9k
    // There's room for improvement here, also, since we can pack
3032
66.9k
    // similar alignment (different density) objects next to each
3033
66.9k
    // other to save padding. This will also require further
3034
66.9k
    // complexity/iterations, and the overall gain isn't worth it,
3035
66.9k
    // in general. Something to keep in mind, though.
3036
66.9k
    if (DensityAScaled == DensityBScaled)
3037
13.6k
      return A.ObjectAlignment < B.ObjectAlignment;
3038
53.2k
3039
53.2k
    return DensityAScaled < DensityBScaled;
3040
53.2k
  }
3041
};
3042
} // namespace
3043
3044
// Order the symbols in the local stack.
3045
// We want to place the local stack objects in some sort of sensible order.
3046
// The heuristic we use is to try and pack them according to static number
3047
// of uses and size of object in order to minimize code size.
3048
void X86FrameLowering::orderFrameObjects(
3049
134k
    const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
3050
134k
  const MachineFrameInfo &MFI = MF.getFrameInfo();
3051
134k
3052
134k
  // Don't waste time if there's nothing to do.
3053
134k
  if (ObjectsToAllocate.empty())
3054
118k
    return;
3055
16.5k
3056
16.5k
  // Create an array of all MFI objects. We won't need all of these
3057
16.5k
  // objects, but we're going to create a full array of them to make
3058
16.5k
  // it easier to index into when we're counting "uses" down below.
3059
16.5k
  // We want to be able to easily/cheaply access an object by simply
3060
16.5k
  // indexing into it, instead of having to search for it every time.
3061
16.5k
  std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
3062
16.5k
3063
16.5k
  // Walk the objects we care about and mark them as such in our working
3064
16.5k
  // struct.
3065
40.1k
  for (auto &Obj : ObjectsToAllocate) {
3066
40.1k
    SortingObjects[Obj].IsValid = true;
3067
40.1k
    SortingObjects[Obj].ObjectIndex = Obj;
3068
40.1k
    SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlignment(Obj);
3069
40.1k
    // Set the size.
3070
40.1k
    int ObjectSize = MFI.getObjectSize(Obj);
3071
40.1k
    if (ObjectSize == 0)
3072
273
      // Variable size. Just use 4.
3073
273
      SortingObjects[Obj].ObjectSize = 4;
3074
39.8k
    else
3075
39.8k
      SortingObjects[Obj].ObjectSize = ObjectSize;
3076
40.1k
  }
3077
16.5k
3078
16.5k
  // Count the number of uses for each object.
3079
203k
  for (auto &MBB : MF) {
3080
1.32M
    for (auto &MI : MBB) {
3081
1.32M
      if (MI.isDebugInstr())
3082
162
        continue;
3083
5.56M
      
for (const MachineOperand &MO : MI.operands())1.32M
{
3084
5.56M
        // Check to see if it's a local stack symbol.
3085
5.56M
        if (!MO.isFI())
3086
5.37M
          continue;
3087
191k
        int Index = MO.getIndex();
3088
191k
        // Check to see if it falls within our range, and is tagged
3089
191k
        // to require ordering.
3090
191k
        if (Index >= 0 && 
Index < MFI.getObjectIndexEnd()173k
&&
3091
191k
            
SortingObjects[Index].IsValid173k
)
3092
172k
          SortingObjects[Index].ObjectNumUses++;
3093
191k
      }
3094
1.32M
    }
3095
203k
  }
3096
16.5k
3097
16.5k
  // Sort the objects using X86FrameSortingAlgorithm (see its comment for
3098
16.5k
  // info).
3099
16.5k
  llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
3100
16.5k
3101
16.5k
  // Now modify the original list to represent the final order that
3102
16.5k
  // we want. The order will depend on whether we're going to access them
3103
16.5k
  // from the stack pointer or the frame pointer. For SP, the list should
3104
16.5k
  // end up with the END containing objects that we want with smaller offsets.
3105
16.5k
  // For FP, it should be flipped.
3106
16.5k
  int i = 0;
3107
42.5k
  for (auto &Obj : SortingObjects) {
3108
42.5k
    // All invalid items are sorted at the end, so it's safe to stop.
3109
42.5k
    if (!Obj.IsValid)
3110
2.36k
      break;
3111
40.1k
    ObjectsToAllocate[i++] = Obj.ObjectIndex;
3112
40.1k
  }
3113
16.5k
3114
16.5k
  // Flip it if we're accessing off of the FP.
3115
16.5k
  if (!TRI->needsStackRealignment(MF) && 
hasFP(MF)15.2k
)
3116
9.46k
    std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
3117
16.5k
}
3118
3119
3120
31
unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const {
3121
31
  // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
3122
31
  unsigned Offset = 16;
3123
31
  // RBP is immediately pushed.
3124
31
  Offset += SlotSize;
3125
31
  // All callee-saved registers are then pushed.
3126
31
  Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
3127
31
  // Every funclet allocates enough stack space for the largest outgoing call.
3128
31
  Offset += getWinEHFuncletFrameSize(MF);
3129
31
  return Offset;
3130
31
}
3131
3132
void X86FrameLowering::processFunctionBeforeFrameFinalized(
3133
137k
    MachineFunction &MF, RegScavenger *RS) const {
3134
137k
  // Mark the function as not having WinCFI. We will set it back to true in
3135
137k
  // emitPrologue if it gets called and emits CFI.
3136
137k
  MF.setHasWinCFI(false);
3137
137k
3138
137k
  // If this function isn't doing Win64-style C++ EH, we don't need to do
3139
137k
  // anything.
3140
137k
  const Function &F = MF.getFunction();
3141
137k
  if (!STI.is64Bit() || 
!MF.hasEHFunclets()110k
||
3142
137k
      
classifyEHPersonality(F.getPersonalityFn()) != EHPersonality::MSVC_CXX54
)
3143
137k
    return;
3144
31
3145
31
  // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
3146
31
  // relative to RSP after the prologue.  Find the offset of the last fixed
3147
31
  // object, so that we can allocate a slot immediately following it. If there
3148
31
  // were no fixed objects, use offset -SlotSize, which is immediately after the
3149
31
  // return address. Fixed objects have negative frame indices.
3150
31
  MachineFrameInfo &MFI = MF.getFrameInfo();
3151
31
  WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
3152
31
  int64_t MinFixedObjOffset = -SlotSize;
3153
85
  for (int I = MFI.getObjectIndexBegin(); I < 0; 
++I54
)
3154
54
    MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
3155
31
3156
40
  for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
3157
46
    for (WinEHHandlerType &H : TBME.HandlerArray) {
3158
46
      int FrameIndex = H.CatchObj.FrameIndex;
3159
46
      if (FrameIndex != INT_MAX) {
3160
6
        // Ensure alignment.
3161
6
        unsigned Align = MFI.getObjectAlignment(FrameIndex);
3162
6
        MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
3163
6
        MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
3164
6
        MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
3165
6
      }
3166
46
    }
3167
40
  }
3168
31
3169
31
  // Ensure alignment.
3170
31
  MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
3171
31
  int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
3172
31
  int UnwindHelpFI =
3173
31
      MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
3174
31
  EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
3175
31
3176
31
  // Store -2 into UnwindHelp on function entry. We have to scan forwards past
3177
31
  // other frame setup instructions.
3178
31
  MachineBasicBlock &MBB = MF.front();
3179
31
  auto MBBI = MBB.begin();
3180
47
  while (
MBBI != MBB.end()43
&& MBBI->getFlag(MachineInstr::FrameSetup))
3181
12
    ++MBBI;
3182
31
3183
31
  DebugLoc DL = MBB.findDebugLoc(MBBI);
3184
31
  addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
3185
31
                    UnwindHelpFI)
3186
31
      .addImm(-2);
3187
31
}