Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- HexagonFrameLowering.cpp - Define frame lowering -------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//
9
//===----------------------------------------------------------------------===//
10
11
#include "HexagonFrameLowering.h"
12
#include "HexagonBlockRanges.h"
13
#include "HexagonInstrInfo.h"
14
#include "HexagonMachineFunctionInfo.h"
15
#include "HexagonRegisterInfo.h"
16
#include "HexagonSubtarget.h"
17
#include "HexagonTargetMachine.h"
18
#include "MCTargetDesc/HexagonBaseInfo.h"
19
#include "llvm/ADT/BitVector.h"
20
#include "llvm/ADT/DenseMap.h"
21
#include "llvm/ADT/None.h"
22
#include "llvm/ADT/Optional.h"
23
#include "llvm/ADT/PostOrderIterator.h"
24
#include "llvm/ADT/SetVector.h"
25
#include "llvm/ADT/SmallSet.h"
26
#include "llvm/ADT/SmallVector.h"
27
#include "llvm/CodeGen/LivePhysRegs.h"
28
#include "llvm/CodeGen/MachineBasicBlock.h"
29
#include "llvm/CodeGen/MachineDominators.h"
30
#include "llvm/CodeGen/MachineFrameInfo.h"
31
#include "llvm/CodeGen/MachineFunction.h"
32
#include "llvm/CodeGen/MachineFunctionPass.h"
33
#include "llvm/CodeGen/MachineInstr.h"
34
#include "llvm/CodeGen/MachineInstrBuilder.h"
35
#include "llvm/CodeGen/MachineMemOperand.h"
36
#include "llvm/CodeGen/MachineModuleInfo.h"
37
#include "llvm/CodeGen/MachineOperand.h"
38
#include "llvm/CodeGen/MachinePostDominators.h"
39
#include "llvm/CodeGen/MachineRegisterInfo.h"
40
#include "llvm/CodeGen/RegisterScavenging.h"
41
#include "llvm/IR/Attributes.h"
42
#include "llvm/IR/DebugLoc.h"
43
#include "llvm/IR/Function.h"
44
#include "llvm/MC/MCDwarf.h"
45
#include "llvm/MC/MCRegisterInfo.h"
46
#include "llvm/Pass.h"
47
#include "llvm/Support/CodeGen.h"
48
#include "llvm/Support/CommandLine.h"
49
#include "llvm/Support/Compiler.h"
50
#include "llvm/Support/Debug.h"
51
#include "llvm/Support/ErrorHandling.h"
52
#include "llvm/Support/MathExtras.h"
53
#include "llvm/Support/raw_ostream.h"
54
#include "llvm/Target/TargetMachine.h"
55
#include "llvm/Target/TargetOptions.h"
56
#include "llvm/Target/TargetRegisterInfo.h"
57
#include <algorithm>
58
#include <cassert>
59
#include <cstdint>
60
#include <iterator>
61
#include <limits>
62
#include <map>
63
#include <utility>
64
#include <vector>
65
66
#define DEBUG_TYPE "hexagon-pei"
67
68
// Hexagon stack frame layout as defined by the ABI:
69
//
70
//                                                       Incoming arguments
71
//                                                       passed via stack
72
//                                                                      |
73
//                                                                      |
74
//        SP during function's                 FP during function's     |
75
//    +-- runtime (top of stack)               runtime (bottom) --+     |
76
//    |                                                           |     |
77
// --++---------------------+------------------+-----------------++-+-------
78
//   |  parameter area for  |  variable-size   |   fixed-size    |LR|  arg
79
//   |   called functions   |  local objects   |  local objects  |FP|
80
// --+----------------------+------------------+-----------------+--+-------
81
//    <-    size known    -> <- size unknown -> <- size known  ->
82
//
83
// Low address                                                 High address
84
//
85
// <--- stack growth
86
//
87
//
88
// - In any circumstances, the outgoing function arguments are always accessi-
89
//   ble using the SP, and the incoming arguments are accessible using the FP.
90
// - If the local objects are not aligned, they can always be accessed using
91
//   the FP.
92
// - If there are no variable-sized objects, the local objects can always be
93
//   accessed using the SP, regardless whether they are aligned or not. (The
94
//   alignment padding will be at the bottom of the stack (highest address),
95
//   and so the offset with respect to the SP will be known at the compile-
96
//   -time.)
97
//
98
// The only complication occurs if there are both, local aligned objects, and
99
// dynamically allocated (variable-sized) objects. The alignment pad will be
100
// placed between the FP and the local objects, thus preventing the use of the
101
// FP to access the local objects. At the same time, the variable-sized objects
102
// will be between the SP and the local objects, thus introducing an unknown
103
// distance from the SP to the locals.
104
//
105
// To avoid this problem, a new register is created that holds the aligned
106
// address of the bottom of the stack, referred in the sources as AP (aligned
107
// pointer). The AP will be equal to "FP-p", where "p" is the smallest pad
108
// that aligns AP to the required boundary (a maximum of the alignments of
109
// all stack objects, fixed- and variable-sized). All local objects[1] will
110
// then use AP as the base pointer.
111
// [1] The exception is with "fixed" stack objects. "Fixed" stack objects get
112
// their name from being allocated at fixed locations on the stack, relative
113
// to the FP. In the presence of dynamic allocation and local alignment, such
114
// objects can only be accessed through the FP.
115
//
116
// Illustration of the AP:
117
//                                                                FP --+
118
//                                                                     |
119
// ---------------+---------------------+-----+-----------------------++-+--
120
//   Rest of the  | Local stack objects | Pad |  Fixed stack objects  |LR|
121
//   stack frame  | (aligned)           |     |  (CSR, spills, etc.)  |FP|
122
// ---------------+---------------------+-----+-----------------+-----+--+--
123
//                                      |<-- Multiple of the -->|
124
//                                           stack alignment    +-- AP
125
//
126
// The AP is set up at the beginning of the function. Since it is not a dedi-
127
// cated (reserved) register, it needs to be kept live throughout the function
128
// to be available as the base register for local object accesses.
129
// Normally, an address of a stack objects is obtained by a pseudo-instruction
130
// PS_fi. To access local objects with the AP register present, a different
131
// pseudo-instruction needs to be used: PS_fia. The PS_fia takes one extra
132
// argument compared to PS_fi: the first input register is the AP register.
133
// This keeps the register live between its definition and its uses.
134
135
// The AP register is originally set up using pseudo-instruction PS_aligna:
136
//   AP = PS_aligna A
137
// where
138
//   A  - required stack alignment
139
// The alignment value must be the maximum of all alignments required by
140
// any stack object.
141
142
// The dynamic allocation uses a pseudo-instruction PS_alloca:
143
//   Rd = PS_alloca Rs, A
144
// where
145
//   Rd - address of the allocated space
146
//   Rs - minimum size (the actual allocated can be larger to accommodate
147
//        alignment)
148
//   A  - required alignment
149
150
using namespace llvm;
151
152
static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret",
153
    cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target"));
154
155
static cl::opt<unsigned> NumberScavengerSlots("number-scavenger-slots",
156
    cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2),
157
    cl::ZeroOrMore);
158
159
static cl::opt<int> SpillFuncThreshold("spill-func-threshold",
160
    cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"),
161
    cl::init(6), cl::ZeroOrMore);
162
163
static cl::opt<int> SpillFuncThresholdOs("spill-func-threshold-Os",
164
    cl::Hidden, cl::desc("Specify Os spill func threshold"),
165
    cl::init(1), cl::ZeroOrMore);
166
167
static cl::opt<bool> EnableStackOVFSanitizer("enable-stackovf-sanitizer",
168
    cl::Hidden, cl::desc("Enable runtime checks for stack overflow."),
169
    cl::init(false), cl::ZeroOrMore);
170
171
static cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame",
172
    cl::init(true), cl::Hidden, cl::ZeroOrMore,
173
    cl::desc("Enable stack frame shrink wrapping"));
174
175
static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit",
176
    cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden, cl::ZeroOrMore,
177
    cl::desc("Max count of stack frame shrink-wraps"));
178
179
static cl::opt<bool> EnableSaveRestoreLong("enable-save-restore-long",
180
    cl::Hidden, cl::desc("Enable long calls for save-restore stubs."),
181
    cl::init(false), cl::ZeroOrMore);
182
183
static cl::opt<bool> EliminateFramePointer("hexagon-fp-elim", cl::init(true),
184
    cl::Hidden, cl::desc("Refrain from using FP whenever possible"));
185
186
static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden,
187
    cl::init(true), cl::desc("Optimize spill slots"));
188
189
#ifndef NDEBUG
190
static cl::opt<unsigned> SpillOptMax("spill-opt-max", cl::Hidden,
191
    cl::init(std::numeric_limits<unsigned>::max()));
192
static unsigned SpillOptCount = 0;
193
#endif
194
195
namespace llvm {
196
197
  void initializeHexagonCallFrameInformationPass(PassRegistry&);
198
  FunctionPass *createHexagonCallFrameInformation();
199
200
} // end namespace llvm
201
202
namespace {
203
204
  class HexagonCallFrameInformation : public MachineFunctionPass {
205
  public:
206
    static char ID;
207
208
441
    HexagonCallFrameInformation() : MachineFunctionPass(ID) {
209
441
      PassRegistry &PR = *PassRegistry::getPassRegistry();
210
441
      initializeHexagonCallFrameInformationPass(PR);
211
441
    }
212
213
    bool runOnMachineFunction(MachineFunction &MF) override;
214
215
440
    MachineFunctionProperties getRequiredProperties() const override {
216
440
      return MachineFunctionProperties().set(
217
440
          MachineFunctionProperties::Property::NoVRegs);
218
440
    }
219
  };
220
221
  char HexagonCallFrameInformation::ID = 0;
222
223
} // end anonymous namespace
224
225
2.41k
bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) {
226
2.41k
  auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();
227
2.41k
  bool NeedCFI = MF.getMMI().hasDebugInfo() ||
228
2.40k
                 MF.getFunction()->needsUnwindTableEntry();
229
2.41k
230
2.41k
  if (!NeedCFI)
231
754
    return false;
232
1.65k
  HFI.insertCFIInstructions(MF);
233
1.65k
  return true;
234
1.65k
}
235
236
INITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi",
237
                "Hexagon call frame information", false, false)
238
239
441
FunctionPass *llvm::createHexagonCallFrameInformation() {
240
441
  return new HexagonCallFrameInformation();
241
441
}
242
243
/// Map a register pair Reg to the subregister that has the greater "number",
244
/// i.e. D3 (aka R7:6) will be mapped to R7, etc.
245
static unsigned getMax32BitSubRegister(unsigned Reg,
246
                                       const TargetRegisterInfo &TRI,
247
47
                                       bool hireg = true) {
248
47
    if (
Reg < Hexagon::D0 || 47
Reg > Hexagon::D1547
)
249
0
      return Reg;
250
47
251
47
    unsigned RegNo = 0;
252
141
    for (MCSubRegIterator SubRegs(Reg, &TRI); 
SubRegs.isValid()141
;
++SubRegs94
) {
253
94
      if (
hireg94
) {
254
94
        if (*SubRegs > RegNo)
255
94
          RegNo = *SubRegs;
256
0
      } else {
257
0
        if (
!RegNo || 0
*SubRegs < RegNo0
)
258
0
          RegNo = *SubRegs;
259
0
      }
260
94
    }
261
47
    return RegNo;
262
47
}
263
264
/// Returns the callee saved register with the largest id in the vector.
265
static unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI,
266
13
                                     const TargetRegisterInfo &TRI) {
267
13
    static_assert(Hexagon::R1 > 0,
268
13
                  "Assume physical registers are encoded as positive integers");
269
13
    if (CSI.empty())
270
0
      return 0;
271
13
272
13
    unsigned Max = getMax32BitSubRegister(CSI[0].getReg(), TRI);
273
47
    for (unsigned I = 1, E = CSI.size(); 
I < E47
;
++I34
) {
274
34
      unsigned Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI);
275
34
      if (Reg > Max)
276
34
        Max = Reg;
277
34
    }
278
13
    return Max;
279
13
}
280
281
/// Checks if the basic block contains any instruction that needs a stack
282
/// frame to be already in place.
283
static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
284
2.52k
                            const HexagonRegisterInfo &HRI) {
285
9.36k
    for (auto &I : MBB) {
286
9.36k
      const MachineInstr *MI = &I;
287
9.36k
      if (MI->isCall())
288
85
        return true;
289
9.27k
      unsigned Opc = MI->getOpcode();
290
9.27k
      switch (Opc) {
291
2
        case Hexagon::PS_alloca:
292
2
        case Hexagon::PS_aligna:
293
2
          return true;
294
9.27k
        default:
295
9.27k
          break;
296
9.27k
      }
297
9.27k
      // Check individual operands.
298
9.27k
      
for (const MachineOperand &MO : MI->operands()) 9.27k
{
299
21.4k
        // While the presence of a frame index does not prove that a stack
300
21.4k
        // frame will be required, all frame indexes should be within alloc-
301
21.4k
        // frame/deallocframe. Otherwise, the code that translates a frame
302
21.4k
        // index into an offset would have to be aware of the placement of
303
21.4k
        // the frame creation/destruction instructions.
304
21.4k
        if (MO.isFI())
305
1.64k
          return true;
306
19.7k
        
if (19.7k
MO.isReg()19.7k
) {
307
17.7k
          unsigned R = MO.getReg();
308
17.7k
          // Virtual registers will need scavenging, which then may require
309
17.7k
          // a stack slot.
310
17.7k
          if (TargetRegisterInfo::isVirtualRegister(R))
311
1
            return true;
312
44.8k
          
for (MCSubRegIterator S(R, &HRI, true); 17.7k
S.isValid()44.8k
;
++S27.0k
)
313
27.1k
            
if (27.1k
CSR[*S]27.1k
)
314
74
              return true;
315
17.7k
          continue;
316
1.98k
        }
317
1.98k
        
if (1.98k
MO.isRegMask()1.98k
) {
318
0
          // A regmask would normally have all callee-saved registers marked
319
0
          // as preserved, so this check would not be needed, but in case of
320
0
          // ever having other regmasks (for other calling conventions),
321
0
          // make sure they would be processed correctly.
322
0
          const uint32_t *BM = MO.getRegMask();
323
0
          for (int x = CSR.find_first(); 
x >= 00
;
x = CSR.find_next(x)0
) {
324
0
            unsigned R = x;
325
0
            // If this regmask does not preserve a CSR, a frame will be needed.
326
0
            if (!(BM[R/32] & (1u << (R%32))))
327
0
              return true;
328
0
          }
329
0
        }
330
21.4k
      }
331
9.36k
    }
332
711
    return false;
333
2.52k
}
334
335
  /// Returns true if MBB has a machine instructions that indicates a tail call
336
  /// in the block.
337
8
static bool hasTailCall(const MachineBasicBlock &MBB) {
338
8
    MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
339
8
    unsigned RetOpc = I->getOpcode();
340
7
    return RetOpc == Hexagon::PS_tailcall_i || RetOpc == Hexagon::PS_tailcall_r;
341
8
}
342
343
/// Returns true if MBB contains an instruction that returns.
344
7
static bool hasReturn(const MachineBasicBlock &MBB) {
345
7
    for (auto I = MBB.getFirstTerminator(), E = MBB.end(); 
I != E7
;
++I0
)
346
7
      
if (7
I->isReturn()7
)
347
7
        return true;
348
0
    return false;
349
7
}
350
351
/// Returns the "return" instruction from this block, or nullptr if there
352
/// isn't any.
353
3.31k
static MachineInstr *getReturn(MachineBasicBlock &MBB) {
354
3.31k
    for (auto &I : MBB)
355
21.9k
      
if (21.9k
I.isReturn()21.9k
)
356
2.35k
        return &I;
357
957
    return nullptr;
358
957
}
359
360
2.39k
static bool isRestoreCall(unsigned Opc) {
361
2.39k
    switch (Opc) {
362
7
      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
363
7
      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC:
364
7
      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT:
365
7
      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC:
366
7
      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT:
367
7
      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC:
368
7
      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4:
369
7
      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC:
370
7
        return true;
371
2.39k
    }
372
2.39k
    return false;
373
2.39k
}
374
375
2.40k
static inline bool isOptNone(const MachineFunction &MF) {
376
2.40k
    return MF.getFunction()->hasFnAttribute(Attribute::OptimizeNone) ||
377
2.40k
           MF.getTarget().getOptLevel() == CodeGenOpt::None;
378
2.40k
}
379
380
125
static inline bool isOptSize(const MachineFunction &MF) {
381
125
    const Function &F = *MF.getFunction();
382
34
    return F.optForSize() && !F.optForMinSize();
383
125
}
384
385
116
static inline bool isMinSize(const MachineFunction &MF) {
386
116
    return MF.getFunction()->optForMinSize();
387
116
}
388
389
/// Implements shrink-wrapping of the stack frame. By default, stack frame
390
/// is created in the function entry block, and is cleaned up in every block
391
/// that returns. This function finds alternate blocks: one for the frame
392
/// setup (prolog) and one for the cleanup (epilog).
393
void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
394
2.40k
      MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const {
395
2.40k
  static unsigned ShrinkCounter = 0;
396
2.40k
397
2.40k
  if (
ShrinkLimit.getPosition()2.40k
) {
398
0
    if (ShrinkCounter >= ShrinkLimit)
399
0
      return;
400
0
    ShrinkCounter++;
401
0
  }
402
2.40k
403
2.40k
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
404
2.40k
405
2.40k
  MachineDominatorTree MDT;
406
2.40k
  MDT.runOnMachineFunction(MF);
407
2.40k
  MachinePostDominatorTree MPT;
408
2.40k
  MPT.runOnMachineFunction(MF);
409
2.40k
410
2.40k
  using UnsignedMap = DenseMap<unsigned, unsigned>;
411
2.40k
  using RPOTType = ReversePostOrderTraversal<const MachineFunction *>;
412
2.40k
413
2.40k
  UnsignedMap RPO;
414
2.40k
  RPOTType RPOT(&MF);
415
2.40k
  unsigned RPON = 0;
416
6.02k
  for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); 
I != E6.02k
;
++I3.61k
)
417
3.61k
    RPO[(*I)->getNumber()] = RPON++;
418
2.40k
419
2.40k
  // Don't process functions that have loops, at least for now. Placement
420
2.40k
  // of prolog and epilog must take loop structure into account. For simpli-
421
2.40k
  // city don't do it right now.
422
3.20k
  for (auto &I : MF) {
423
3.20k
    unsigned BN = RPO[I.getNumber()];
424
4.34k
    for (auto SI = I.succ_begin(), SE = I.succ_end(); 
SI != SE4.34k
;
++SI1.13k
) {
425
1.29k
      // If found a back-edge, return.
426
1.29k
      if (RPO[(*SI)->getNumber()] <= BN)
427
152
        return;
428
1.29k
    }
429
3.20k
  }
430
2.40k
431
2.40k
  // Collect the set of blocks that need a stack frame to execute. Scan
432
2.40k
  // each block for uses/defs of callee-saved registers, calls, etc.
433
2.25k
  SmallVector<MachineBasicBlock*,16> SFBlocks;
434
2.25k
  BitVector CSR(Hexagon::NUM_TARGET_REGS);
435
29.3k
  for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); 
*P29.3k
;
++P27.0k
)
436
54.1k
    
for (MCSubRegIterator S(*P, &HRI, true); 27.0k
S.isValid()54.1k
;
++S27.0k
)
437
27.0k
      CSR[*S] = true;
438
2.25k
439
2.25k
  for (auto &I : MF)
440
2.52k
    
if (2.52k
needsStackFrame(I, CSR, HRI)2.52k
)
441
1.80k
      SFBlocks.push_back(&I);
442
2.25k
443
2.25k
  DEBUG({
444
2.25k
    dbgs() << "Blocks needing SF: {";
445
2.25k
    for (auto &B : SFBlocks)
446
2.25k
      dbgs() << " BB#" << B->getNumber();
447
2.25k
    dbgs() << " }\n";
448
2.25k
  });
449
2.25k
  // No frame needed?
450
2.25k
  if (SFBlocks.empty())
451
540
    return;
452
1.71k
453
1.71k
  // Pick a common dominator and a common post-dominator.
454
1.71k
  MachineBasicBlock *DomB = SFBlocks[0];
455
1.80k
  for (unsigned i = 1, n = SFBlocks.size(); 
i < n1.80k
;
++i93
) {
456
93
    DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]);
457
93
    if (!DomB)
458
0
      break;
459
93
  }
460
1.71k
  MachineBasicBlock *PDomB = SFBlocks[0];
461
1.77k
  for (unsigned i = 1, n = SFBlocks.size(); 
i < n1.77k
;
++i62
) {
462
72
    PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]);
463
72
    if (!PDomB)
464
10
      break;
465
72
  }
466
1.71k
  DEBUG({
467
1.71k
    dbgs() << "Computed dom block: BB#";
468
1.71k
    if (DomB) dbgs() << DomB->getNumber();
469
1.71k
    else      dbgs() << "<null>";
470
1.71k
    dbgs() << ", computed pdom block: BB#";
471
1.71k
    if (PDomB) dbgs() << PDomB->getNumber();
472
1.71k
    else       dbgs() << "<null>";
473
1.71k
    dbgs() << "\n";
474
1.71k
  });
475
1.71k
  if (
!DomB || 1.71k
!PDomB1.71k
)
476
10
    return;
477
1.70k
478
1.70k
  // Make sure that DomB dominates PDomB and PDomB post-dominates DomB.
479
1.70k
  
if (1.70k
!MDT.dominates(DomB, PDomB)1.70k
) {
480
0
    DEBUG(dbgs() << "Dom block does not dominate pdom block\n");
481
0
    return;
482
0
  }
483
1.70k
  
if (1.70k
!MPT.dominates(PDomB, DomB)1.70k
) {
484
0
    DEBUG(dbgs() << "PDom block does not post-dominate dom block\n");
485
0
    return;
486
0
  }
487
1.70k
488
1.70k
  // Finally, everything seems right.
489
1.70k
  PrologB = DomB;
490
1.70k
  EpilogB = PDomB;
491
1.70k
}
492
493
/// Perform most of the PEI work here:
494
/// - saving/restoring of the callee-saved registers,
495
/// - stack frame creation and destruction.
496
/// Normally, this work is distributed among various functions, but doing it
497
/// in one place allows shrink-wrapping of the stack frame.
498
void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
499
2.40k
                                        MachineBasicBlock &MBB) const {
500
2.40k
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
501
2.40k
502
2.40k
  MachineFrameInfo &MFI = MF.getFrameInfo();
503
2.40k
  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
504
2.40k
505
2.40k
  MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr;
506
2.40k
  if (EnableShrinkWrapping)
507
2.40k
    findShrunkPrologEpilog(MF, PrologB, EpilogB);
508
2.40k
509
2.40k
  bool PrologueStubs = false;
510
2.40k
  insertCSRSpillsInBlock(*PrologB, CSI, HRI, PrologueStubs);
511
2.40k
  insertPrologueInBlock(*PrologB, PrologueStubs);
512
2.40k
  updateEntryPaths(MF, *PrologB);
513
2.40k
514
2.40k
  if (
EpilogB2.40k
) {
515
1.70k
    insertCSRRestoresInBlock(*EpilogB, CSI, HRI);
516
1.70k
    insertEpilogueInBlock(*EpilogB);
517
2.40k
  } else {
518
702
    for (auto &B : MF)
519
1.78k
      
if (1.78k
B.isReturnBlock()1.78k
)
520
701
        insertCSRRestoresInBlock(B, CSI, HRI);
521
702
522
702
    for (auto &B : MF)
523
1.78k
      
if (1.78k
B.isReturnBlock()1.78k
)
524
701
        insertEpilogueInBlock(B);
525
702
526
1.78k
    for (auto &B : MF) {
527
1.78k
      if (B.empty())
528
139
        continue;
529
1.64k
      MachineInstr *RetI = getReturn(B);
530
1.64k
      if (
!RetI || 1.64k
isRestoreCall(RetI->getOpcode())701
)
531
944
        continue;
532
701
      for (auto &R : CSI)
533
48
        RetI->addOperand(MachineOperand::CreateReg(R.getReg(), false, true));
534
1.78k
    }
535
702
  }
536
2.40k
537
2.40k
  if (
EpilogB2.40k
) {
538
1.70k
    // If there is an epilog block, it may not have a return instruction.
539
1.70k
    // In such case, we need to add the callee-saved registers as live-ins
540
1.70k
    // in all blocks on all paths from the epilog to any return block.
541
1.70k
    unsigned MaxBN = MF.getNumBlockIDs();
542
1.70k
    BitVector DoneT(MaxBN+1), DoneF(MaxBN+1), Path(MaxBN+1);
543
1.70k
    updateExitPaths(*EpilogB, *EpilogB, DoneT, DoneF, Path);
544
1.70k
  }
545
2.40k
}
546
547
void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
548
2.40k
      bool PrologueStubs) const {
549
2.40k
  MachineFunction &MF = *MBB.getParent();
550
2.40k
  MachineFrameInfo &MFI = MF.getFrameInfo();
551
2.40k
  auto &HST = MF.getSubtarget<HexagonSubtarget>();
552
2.40k
  auto &HII = *HST.getInstrInfo();
553
2.40k
  auto &HRI = *HST.getRegisterInfo();
554
2.40k
555
2.40k
  unsigned MaxAlign = std::max(MFI.getMaxAlignment(), getStackAlignment());
556
2.40k
557
2.40k
  // Calculate the total stack frame size.
558
2.40k
  // Get the number of bytes to allocate from the FrameInfo.
559
2.40k
  unsigned FrameSize = MFI.getStackSize();
560
2.40k
  // Round up the max call frame size to the max alignment on the stack.
561
2.40k
  unsigned MaxCFA = alignTo(MFI.getMaxCallFrameSize(), MaxAlign);
562
2.40k
  MFI.setMaxCallFrameSize(MaxCFA);
563
2.40k
564
2.40k
  FrameSize = MaxCFA + alignTo(FrameSize, MaxAlign);
565
2.40k
  MFI.setStackSize(FrameSize);
566
2.40k
567
2.40k
  bool AlignStack = (MaxAlign > getStackAlignment());
568
2.40k
569
2.40k
  // Get the number of bytes to allocate from the FrameInfo.
570
2.40k
  unsigned NumBytes = MFI.getStackSize();
571
2.40k
  unsigned SP = HRI.getStackRegister();
572
2.40k
  unsigned MaxCF = MFI.getMaxCallFrameSize();
573
2.40k
  MachineBasicBlock::iterator InsertPt = MBB.begin();
574
2.40k
575
2.40k
  SmallVector<MachineInstr *, 4> AdjustRegs;
576
2.40k
  for (auto &MBB : MF)
577
3.61k
    for (auto &MI : MBB)
578
21.1k
      
if (21.1k
MI.getOpcode() == Hexagon::PS_alloca21.1k
)
579
3
        AdjustRegs.push_back(&MI);
580
2.40k
581
3
  for (auto MI : AdjustRegs) {
582
3
    assert((MI->getOpcode() == Hexagon::PS_alloca) && "Expected alloca");
583
3
    expandAlloca(MI, HII, SP, MaxCF);
584
3
    MI->eraseFromParent();
585
3
  }
586
2.40k
587
2.40k
  DebugLoc dl = MBB.findDebugLoc(InsertPt);
588
2.40k
589
2.40k
  if (
hasFP(MF)2.40k
) {
590
1.66k
    insertAllocframe(MBB, InsertPt, NumBytes);
591
1.66k
    if (
AlignStack1.66k
) {
592
13
      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
593
13
          .addReg(SP)
594
13
          .addImm(-int64_t(MaxAlign));
595
13
    }
596
1.66k
    // If the stack-checking is enabled, and we spilled the callee-saved
597
1.66k
    // registers inline (i.e. did not use a spill function), then call
598
1.66k
    // the stack checker directly.
599
1.66k
    if (
EnableStackOVFSanitizer && 1.66k
!PrologueStubs2
)
600
1
      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk))
601
1
             .addExternalSymbol("__runtime_stack_check");
602
2.40k
  } else 
if (739
NumBytes > 0739
) {
603
63
    assert(alignTo(NumBytes, 8) == NumBytes);
604
63
    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
605
63
      .addReg(SP)
606
63
      .addImm(-int(NumBytes));
607
63
  }
608
2.40k
}
609
610
2.40k
void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
611
2.40k
  MachineFunction &MF = *MBB.getParent();
612
2.40k
  auto &HST = MF.getSubtarget<HexagonSubtarget>();
613
2.40k
  auto &HII = *HST.getInstrInfo();
614
2.40k
  auto &HRI = *HST.getRegisterInfo();
615
2.40k
  unsigned SP = HRI.getStackRegister();
616
2.40k
617
2.40k
  MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
618
2.40k
  DebugLoc dl = MBB.findDebugLoc(InsertPt);
619
2.40k
620
2.40k
  if (
!hasFP(MF)2.40k
) {
621
740
    MachineFrameInfo &MFI = MF.getFrameInfo();
622
740
    if (unsigned 
NumBytes740
= MFI.getStackSize()) {
623
62
      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
624
62
        .addReg(SP)
625
62
        .addImm(NumBytes);
626
62
    }
627
740
    return;
628
740
  }
629
1.66k
630
1.66k
  MachineInstr *RetI = getReturn(MBB);
631
1.66k
  unsigned RetOpc = RetI ? 
RetI->getOpcode()1.65k
:
013
;
632
1.66k
633
1.66k
  // Handle EH_RETURN.
634
1.66k
  if (
RetOpc == Hexagon::EH_RETURN_JMPR1.66k
) {
635
1
    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe));
636
1
    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_add), SP)
637
1
        .addReg(SP)
638
1
        .addReg(Hexagon::R28);
639
1
    return;
640
1
  }
641
1.66k
642
1.66k
  // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc-
643
1.66k
  // frame instruction if we encounter it.
644
1.66k
  
if (1.66k
RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4 ||
645
1.66k
      RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC ||
646
1.66k
      RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT ||
647
1.66k
      
RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC1.65k
) {
648
7
    MachineBasicBlock::iterator It = RetI;
649
7
    ++It;
650
7
    // Delete all instructions after the RESTORE (except labels).
651
14
    while (
It != MBB.end()14
) {
652
7
      if (!It->isLabel())
653
7
        It = MBB.erase(It);
654
7
      else
655
0
        ++It;
656
7
    }
657
7
    return;
658
7
  }
659
1.65k
660
1.65k
  // It is possible that the restoring code is a call to a library function.
661
1.65k
  // All of the restore* functions include "deallocframe", so we need to make
662
1.65k
  // sure that we don't add an extra one.
663
1.65k
  bool NeedsDeallocframe = true;
664
1.65k
  if (
!MBB.empty() && 1.65k
InsertPt != MBB.begin()1.65k
) {
665
1.65k
    MachineBasicBlock::iterator PrevIt = std::prev(InsertPt);
666
1.65k
    unsigned COpc = PrevIt->getOpcode();
667
1.65k
    if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 ||
668
1.65k
        COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC ||
669
1.65k
        COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT ||
670
1.65k
        COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC ||
671
1.65k
        
COpc == Hexagon::PS_call_nr1.65k
||
COpc == Hexagon::PS_callr_nr1.64k
)
672
5
      NeedsDeallocframe = false;
673
1.65k
  }
674
1.65k
675
1.65k
  if (!NeedsDeallocframe)
676
5
    return;
677
1.65k
  // If the returning instruction is PS_jmpret, replace it with dealloc_return,
678
1.65k
  // otherwise just add deallocframe. The function could be returning via a
679
1.65k
  // tail call.
680
1.65k
  
if (1.65k
RetOpc != Hexagon::PS_jmpret || 1.65k
DisableDeallocRet1.64k
) {
681
11
    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe));
682
11
    return;
683
11
  }
684
1.64k
  unsigned NewOpc = Hexagon::L4_return;
685
1.64k
  MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc));
686
1.64k
  // Transfer the function live-out registers.
687
1.64k
  NewI->copyImplicitOps(MF, *RetI);
688
1.64k
  MBB.erase(RetI);
689
1.64k
}
690
691
void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB,
692
1.66k
      MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const {
693
1.66k
  MachineFunction &MF = *MBB.getParent();
694
1.66k
  auto &HST = MF.getSubtarget<HexagonSubtarget>();
695
1.66k
  auto &HII = *HST.getInstrInfo();
696
1.66k
  auto &HRI = *HST.getRegisterInfo();
697
1.66k
698
1.66k
  // Check for overflow.
699
1.66k
  // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
700
1.66k
  const unsigned int ALLOCFRAME_MAX = 16384;
701
1.66k
702
1.66k
  // Create a dummy memory operand to avoid allocframe from being treated as
703
1.66k
  // a volatile memory reference.
704
1.66k
  auto *MMO = MF.getMachineMemOperand(MachinePointerInfo::getStack(MF, 0),
705
1.66k
                                      MachineMemOperand::MOStore, 4, 4);
706
1.66k
707
1.66k
  DebugLoc dl = MBB.findDebugLoc(InsertPt);
708
1.66k
709
1.66k
  if (
NumBytes >= ALLOCFRAME_MAX1.66k
) {
710
0
    // Emit allocframe(#0).
711
0
    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
712
0
      .addImm(0)
713
0
      .addMemOperand(MMO);
714
0
715
0
    // Subtract the size from the stack pointer.
716
0
    unsigned SP = HRI.getStackRegister();
717
0
    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
718
0
      .addReg(SP)
719
0
      .addImm(-int(NumBytes));
720
1.66k
  } else {
721
1.66k
    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
722
1.66k
      .addImm(NumBytes)
723
1.66k
      .addMemOperand(MMO);
724
1.66k
  }
725
1.66k
}
726
727
void HexagonFrameLowering::updateEntryPaths(MachineFunction &MF,
728
2.40k
      MachineBasicBlock &SaveB) const {
729
2.40k
  SetVector<unsigned> Worklist;
730
2.40k
731
2.40k
  MachineBasicBlock &EntryB = MF.front();
732
2.40k
  Worklist.insert(EntryB.getNumber());
733
2.40k
734
2.40k
  unsigned SaveN = SaveB.getNumber();
735
2.40k
  auto &CSI = MF.getFrameInfo().getCalleeSavedInfo();
736
2.40k
737
4.84k
  for (unsigned i = 0; 
i < Worklist.size()4.84k
;
++i2.43k
) {
738
2.43k
    unsigned BN = Worklist[i];
739
2.43k
    MachineBasicBlock &MBB = *MF.getBlockNumbered(BN);
740
2.43k
    for (auto &R : CSI)
741
126
      
if (126
!MBB.isLiveIn(R.getReg())126
)
742
5
        MBB.addLiveIn(R.getReg());
743
2.43k
    if (BN != SaveN)
744
31
      for (auto &SB : MBB.successors())
745
38
        Worklist.insert(SB->getNumber());
746
2.43k
  }
747
2.40k
}
748
749
bool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB,
750
      MachineBasicBlock &RestoreB, BitVector &DoneT, BitVector &DoneF,
751
1.72k
      BitVector &Path) const {
752
1.72k
  assert(MBB.getNumber() >= 0);
753
1.72k
  unsigned BN = MBB.getNumber();
754
1.72k
  if (
Path[BN] || 1.72k
DoneF[BN]1.72k
)
755
0
    return false;
756
1.72k
  
if (1.72k
DoneT[BN]1.72k
)
757
7
    return true;
758
1.72k
759
1.72k
  auto &CSI = MBB.getParent()->getFrameInfo().getCalleeSavedInfo();
760
1.72k
761
1.72k
  Path[BN] = true;
762
1.72k
  bool ReachedExit = false;
763
1.72k
  for (auto &SB : MBB.successors())
764
21
    ReachedExit |= updateExitPaths(*SB, RestoreB, DoneT, DoneF, Path);
765
1.72k
766
1.72k
  if (
!MBB.empty() && 1.72k
MBB.back().isReturn()1.71k
) {
767
1.69k
    // Add implicit uses of all callee-saved registers to the reached
768
1.69k
    // return instructions. This is to prevent the anti-dependency breaker
769
1.69k
    // from renaming these registers.
770
1.69k
    MachineInstr &RetI = MBB.back();
771
1.69k
    if (!isRestoreCall(RetI.getOpcode()))
772
1.69k
      for (auto &R : CSI)
773
47
        RetI.addOperand(MachineOperand::CreateReg(R.getReg(), false, true));
774
1.69k
    ReachedExit = true;
775
1.69k
  }
776
1.72k
777
1.72k
  // We don't want to add unnecessary live-ins to the restore block: since
778
1.72k
  // the callee-saved registers are being defined in it, the entry of the
779
1.72k
  // restore block cannot be on the path from the definitions to any exit.
780
1.72k
  if (
ReachedExit && 1.72k
&MBB != &RestoreB1.70k
) {
781
12
    for (auto &R : CSI)
782
1
      
if (1
!MBB.isLiveIn(R.getReg())1
)
783
1
        MBB.addLiveIn(R.getReg());
784
12
    DoneT[BN] = true;
785
12
  }
786
1.72k
  if (!ReachedExit)
787
11
    DoneF[BN] = true;
788
1.72k
789
1.72k
  Path[BN] = false;
790
1.72k
  return ReachedExit;
791
1.72k
}
792
793
static Optional<MachineBasicBlock::iterator>
794
1.82k
findCFILocation(MachineBasicBlock &B) {
795
1.82k
    // The CFI instructions need to be inserted right after allocframe.
796
1.82k
    // An exception to this is a situation where allocframe is bundled
797
1.82k
    // with a call: then the CFI instructions need to be inserted before
798
1.82k
    // the packet with the allocframe+call (in case the call throws an
799
1.82k
    // exception).
800
1.82k
    auto End = B.instr_end();
801
1.82k
802
2.21k
    for (MachineInstr &I : B) {
803
2.21k
      MachineBasicBlock::iterator It = I.getIterator();
804
2.21k
      if (
!I.isBundle()2.21k
) {
805
1.74k
        if (I.getOpcode() == Hexagon::S2_allocframe)
806
1.47k
          return std::next(It);
807
271
        continue;
808
271
      }
809
469
      // I is a bundle.
810
469
      bool HasCall = false, HasAllocFrame = false;
811
469
      auto T = It.getInstrIterator();
812
2.80k
      while (
++T != End && 2.80k
T->isBundled()2.46k
) {
813
2.33k
        if (T->getOpcode() == Hexagon::S2_allocframe)
814
28
          HasAllocFrame = true;
815
2.30k
        else 
if (2.30k
T->isCall()2.30k
)
816
81
          HasCall = true;
817
2.33k
      }
818
469
      if (HasAllocFrame)
819
28
        
return HasCall ? 28
It18
:
std::next(It)10
;
820
322
    }
821
322
    return None;
822
322
}
823
824
1.65k
void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const {
825
1.82k
  for (auto &B : MF) {
826
1.82k
    auto At = findCFILocation(B);
827
1.82k
    if (At.hasValue())
828
1.49k
      insertCFIInstructionsAt(B, At.getValue());
829
1.82k
  }
830
1.65k
}
831
832
void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
833
1.49k
      MachineBasicBlock::iterator At) const {
834
1.49k
  MachineFunction &MF = *MBB.getParent();
835
1.49k
  MachineFrameInfo &MFI = MF.getFrameInfo();
836
1.49k
  MachineModuleInfo &MMI = MF.getMMI();
837
1.49k
  auto &HST = MF.getSubtarget<HexagonSubtarget>();
838
1.49k
  auto &HII = *HST.getInstrInfo();
839
1.49k
  auto &HRI = *HST.getRegisterInfo();
840
1.49k
841
1.49k
  // If CFI instructions have debug information attached, something goes
842
1.49k
  // wrong with the final assembly generation: the prolog_end is placed
843
1.49k
  // in a wrong location.
844
1.49k
  DebugLoc DL;
845
1.49k
  const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION);
846
1.49k
847
1.49k
  MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
848
1.49k
  bool HasFP = hasFP(MF);
849
1.49k
850
1.49k
  if (
HasFP1.49k
) {
851
1.49k
    unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
852
1.49k
    unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
853
1.49k
854
1.49k
    // Define CFA via an offset from the value of FP.
855
1.49k
    //
856
1.49k
    //  -8   -4    0 (SP)
857
1.49k
    // --+----+----+---------------------
858
1.49k
    //   | FP | LR |          increasing addresses -->
859
1.49k
    // --+----+----+---------------------
860
1.49k
    //   |         +-- Old SP (before allocframe)
861
1.49k
    //   +-- New FP (after allocframe)
862
1.49k
    //
863
1.49k
    // MCCFIInstruction::createDefCfa subtracts the offset from the register.
864
1.49k
    // MCCFIInstruction::createOffset takes the offset without sign change.
865
1.49k
    auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8);
866
1.49k
    BuildMI(MBB, At, DL, CFID)
867
1.49k
        .addCFIIndex(MF.addFrameInst(DefCfa));
868
1.49k
    // R31 (return addr) = CFA - 4
869
1.49k
    auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4);
870
1.49k
    BuildMI(MBB, At, DL, CFID)
871
1.49k
        .addCFIIndex(MF.addFrameInst(OffR31));
872
1.49k
    // R30 (frame ptr) = CFA - 8
873
1.49k
    auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8);
874
1.49k
    BuildMI(MBB, At, DL, CFID)
875
1.49k
        .addCFIIndex(MF.addFrameInst(OffR30));
876
1.49k
  }
877
1.49k
878
1.49k
  static unsigned int RegsToMove[] = {
879
1.49k
    Hexagon::R1,  Hexagon::R0,  Hexagon::R3,  Hexagon::R2,
880
1.49k
    Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
881
1.49k
    Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
882
1.49k
    Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
883
1.49k
    Hexagon::D0,  Hexagon::D1,  Hexagon::D8,  Hexagon::D9,
884
1.49k
    Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13,
885
1.49k
    Hexagon::NoRegister
886
1.49k
  };
887
1.49k
888
1.49k
  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
889
1.49k
890
37.4k
  for (unsigned i = 0; 
RegsToMove[i] != Hexagon::NoRegister37.4k
;
++i35.9k
) {
891
35.9k
    unsigned Reg = RegsToMove[i];
892
509
    auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool {
893
509
      return C.getReg() == Reg;
894
509
    };
895
35.9k
    auto F = find_if(CSI, IfR);
896
35.9k
    if (F == CSI.end())
897
35.9k
      continue;
898
22
899
22
    int64_t Offset;
900
22
    if (
HasFP22
) {
901
22
      // If the function has a frame pointer (i.e. has an allocframe),
902
22
      // then the CFA has been defined in terms of FP. Any offsets in
903
22
      // the following CFI instructions have to be defined relative
904
22
      // to FP, which points to the bottom of the stack frame.
905
22
      // The function getFrameIndexReference can still choose to use SP
906
22
      // for the offset calculation, so we cannot simply call it here.
907
22
      // Instead, get the offset (relative to the FP) directly.
908
22
      Offset = MFI.getObjectOffset(F->getFrameIdx());
909
22
    } else {
910
0
      unsigned FrameReg;
911
0
      Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg);
912
0
    }
913
22
    // Subtract 8 to make room for R30 and R31, which are added above.
914
22
    Offset -= 8;
915
22
916
22
    if (
Reg < Hexagon::D0 || 22
Reg > Hexagon::D1522
) {
917
0
      unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true);
918
0
      auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg,
919
0
                                                   Offset);
920
0
      BuildMI(MBB, At, DL, CFID)
921
0
          .addCFIIndex(MF.addFrameInst(OffReg));
922
22
    } else {
923
22
      // Split the double regs into subregs, and generate appropriate
924
22
      // cfi_offsets.
925
22
      // The only reason, we are split double regs is, llvm-mc does not
926
22
      // understand paired registers for cfi_offset.
927
22
      // Eg .cfi_offset r1:0, -64
928
22
929
22
      unsigned HiReg = HRI.getSubReg(Reg, Hexagon::isub_hi);
930
22
      unsigned LoReg = HRI.getSubReg(Reg, Hexagon::isub_lo);
931
22
      unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
932
22
      unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
933
22
      auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg,
934
22
                                                  Offset+4);
935
22
      BuildMI(MBB, At, DL, CFID)
936
22
          .addCFIIndex(MF.addFrameInst(OffHi));
937
22
      auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg,
938
22
                                                  Offset);
939
22
      BuildMI(MBB, At, DL, CFID)
940
22
          .addCFIIndex(MF.addFrameInst(OffLo));
941
22
    }
942
35.9k
  }
943
1.49k
}
944
945
15.4k
bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
946
15.4k
  if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
947
0
    return false;
948
15.4k
949
15.4k
  auto &MFI = MF.getFrameInfo();
950
15.4k
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
951
15.4k
  bool HasExtraAlign = HRI.needsStackRealignment(MF);
952
15.4k
  bool HasAlloca = MFI.hasVarSizedObjects();
953
15.4k
954
15.4k
  // Insert ALLOCFRAME if we need to or at -O0 for the debugger.  Think
955
15.4k
  // that this shouldn't be required, but doing so now because gcc does and
956
15.4k
  // gdb can't break at the start of the function without it.  Will remove if
957
15.4k
  // this turns out to be a gdb bug.
958
15.4k
  //
959
15.4k
  if (MF.getTarget().getOptLevel() == CodeGenOpt::None)
960
10.8k
    return true;
961
4.61k
962
4.61k
  // By default we want to use SP (since it's always there). FP requires
963
4.61k
  // some setup (i.e. ALLOCFRAME).
964
4.61k
  // Both, alloca and stack alignment modify the stack pointer by an
965
4.61k
  // undetermined value, so we need to save it at the entry to the function
966
4.61k
  // (i.e. use allocframe).
967
4.61k
  
if (4.61k
HasAlloca || 4.61k
HasExtraAlign4.60k
)
968
1.14k
    return true;
969
3.47k
970
3.47k
  
if (3.47k
MFI.getStackSize() > 03.47k
) {
971
1.09k
    // If FP-elimination is disabled, we have to use FP at this point.
972
1.09k
    const TargetMachine &TM = MF.getTarget();
973
1.09k
    if (
TM.Options.DisableFramePointerElim(MF) || 1.09k
!EliminateFramePointer803
)
974
289
      return true;
975
803
    
if (803
EnableStackOVFSanitizer803
)
976
0
      return true;
977
3.18k
  }
978
3.18k
979
3.18k
  const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
980
3.18k
  if (
MFI.hasCalls() || 3.18k
HMFI.hasClobberLR()2.45k
)
981
731
    return true;
982
2.45k
983
2.45k
  return false;
984
2.45k
}
985
986
enum SpillKind {
987
  SK_ToMem,
988
  SK_FromMem,
989
  SK_FromMemTailcall
990
};
991
992
static const char *getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType,
993
13
      bool Stkchk = false) {
994
13
  const char * V4SpillToMemoryFunctions[] = {
995
13
    "__save_r16_through_r17",
996
13
    "__save_r16_through_r19",
997
13
    "__save_r16_through_r21",
998
13
    "__save_r16_through_r23",
999
13
    "__save_r16_through_r25",
1000
13
    "__save_r16_through_r27" };
1001
13
1002
13
  const char * V4SpillToMemoryStkchkFunctions[] = {
1003
13
    "__save_r16_through_r17_stkchk",
1004
13
    "__save_r16_through_r19_stkchk",
1005
13
    "__save_r16_through_r21_stkchk",
1006
13
    "__save_r16_through_r23_stkchk",
1007
13
    "__save_r16_through_r25_stkchk",
1008
13
    "__save_r16_through_r27_stkchk" };
1009
13
1010
13
  const char * V4SpillFromMemoryFunctions[] = {
1011
13
    "__restore_r16_through_r17_and_deallocframe",
1012
13
    "__restore_r16_through_r19_and_deallocframe",
1013
13
    "__restore_r16_through_r21_and_deallocframe",
1014
13
    "__restore_r16_through_r23_and_deallocframe",
1015
13
    "__restore_r16_through_r25_and_deallocframe",
1016
13
    "__restore_r16_through_r27_and_deallocframe" };
1017
13
1018
13
  const char * V4SpillFromMemoryTailcallFunctions[] = {
1019
13
    "__restore_r16_through_r17_and_deallocframe_before_tailcall",
1020
13
    "__restore_r16_through_r19_and_deallocframe_before_tailcall",
1021
13
    "__restore_r16_through_r21_and_deallocframe_before_tailcall",
1022
13
    "__restore_r16_through_r23_and_deallocframe_before_tailcall",
1023
13
    "__restore_r16_through_r25_and_deallocframe_before_tailcall",
1024
13
    "__restore_r16_through_r27_and_deallocframe_before_tailcall"
1025
13
  };
1026
13
1027
13
  const char **SpillFunc = nullptr;
1028
13
1029
13
  switch(SpillType) {
1030
5
  case SK_ToMem:
1031
1
    SpillFunc = Stkchk ? V4SpillToMemoryStkchkFunctions
1032
4
                       : V4SpillToMemoryFunctions;
1033
5
    break;
1034
7
  case SK_FromMem:
1035
7
    SpillFunc = V4SpillFromMemoryFunctions;
1036
7
    break;
1037
1
  case SK_FromMemTailcall:
1038
1
    SpillFunc = V4SpillFromMemoryTailcallFunctions;
1039
1
    break;
1040
13
  }
1041
13
  assert(SpillFunc && "Unknown spill kind");
1042
13
1043
13
  // Spill all callee-saved registers up to the highest register used.
1044
13
  switch (MaxReg) {
1045
3
  case Hexagon::R17:
1046
3
    return SpillFunc[0];
1047
4
  case Hexagon::R19:
1048
4
    return SpillFunc[1];
1049
0
  case Hexagon::R21:
1050
0
    return SpillFunc[2];
1051
0
  case Hexagon::R23:
1052
0
    return SpillFunc[3];
1053
0
  case Hexagon::R25:
1054
0
    return SpillFunc[4];
1055
6
  case Hexagon::R27:
1056
6
    return SpillFunc[5];
1057
0
  default:
1058
0
    llvm_unreachable("Unhandled maximum callee save register");
1059
0
  }
1060
0
  return nullptr;
1061
0
}
1062
1063
int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1064
4.98k
      int FI, unsigned &FrameReg) const {
1065
4.98k
  auto &MFI = MF.getFrameInfo();
1066
4.98k
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1067
4.98k
1068
4.98k
  int Offset = MFI.getObjectOffset(FI);
1069
4.98k
  bool HasAlloca = MFI.hasVarSizedObjects();
1070
4.98k
  bool HasExtraAlign = HRI.needsStackRealignment(MF);
1071
4.98k
  bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
1072
4.98k
1073
4.98k
  auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1074
4.98k
  unsigned FrameSize = MFI.getStackSize();
1075
4.98k
  unsigned SP = HRI.getStackRegister();
1076
4.98k
  unsigned FP = HRI.getFrameRegister();
1077
4.98k
  unsigned AP = HMFI.getStackAlignBasePhysReg();
1078
4.98k
  // It may happen that AP will be absent even HasAlloca && HasExtraAlign
1079
4.98k
  // is true. HasExtraAlign may be set because of vector spills, without
1080
4.98k
  // aligned locals or aligned outgoing function arguments. Since vector
1081
4.98k
  // spills will ultimately be "unaligned", it is safe to use FP as the
1082
4.98k
  // base register.
1083
4.98k
  // In fact, in such a scenario the stack is actually not required to be
1084
4.98k
  // aligned, although it may end up being aligned anyway, since this
1085
4.98k
  // particular case is not easily detectable. The alignment will be
1086
4.98k
  // unnecessary, but not incorrect.
1087
4.98k
  // Unfortunately there is no quick way to verify that the above is
1088
4.98k
  // indeed the case (and that it's not a result of an error), so just
1089
4.98k
  // assume that missing AP will be replaced by FP.
1090
4.98k
  // (A better fix would be to rematerialize AP from FP and always align
1091
4.98k
  // vector spills.)
1092
4.98k
  if (AP == 0)
1093
4.98k
    AP = FP;
1094
4.98k
1095
4.98k
  bool UseFP = false, UseAP = false;  // Default: use SP (except at -O0).
1096
4.98k
  // Use FP at -O0, except when there are objects with extra alignment.
1097
4.98k
  // That additional alignment requirement may cause a pad to be inserted,
1098
4.98k
  // which will make it impossible to use FP to access objects located
1099
4.98k
  // past the pad.
1100
4.98k
  if (
NoOpt && 4.98k
!HasExtraAlign3.19k
)
1101
3.16k
    UseFP = true;
1102
4.98k
  if (
MFI.isFixedObjectIndex(FI) || 4.98k
MFI.isObjectPreAllocated(FI)4.78k
) {
1103
211
    // Fixed and preallocated objects will be located before any padding
1104
211
    // so FP must be used to access them.
1105
205
    UseFP |= (HasAlloca || HasExtraAlign);
1106
4.98k
  } else {
1107
4.77k
    if (
HasAlloca4.77k
) {
1108
3
      if (HasExtraAlign)
1109
2
        UseAP = true;
1110
3
      else
1111
1
        UseFP = true;
1112
3
    }
1113
4.77k
  }
1114
4.98k
1115
4.98k
  // If FP was picked, then there had better be FP.
1116
4.98k
  bool HasFP = hasFP(MF);
1117
4.98k
  assert((HasFP || !UseFP) && "This function must have frame pointer");
1118
4.98k
1119
4.98k
  // Having FP implies allocframe. Allocframe will store extra 8 bytes:
1120
4.98k
  // FP/LR. If the base register is used to access an object across these
1121
4.98k
  // 8 bytes, then the offset will need to be adjusted by 8.
1122
4.98k
  //
1123
4.98k
  // After allocframe:
1124
4.98k
  //                    HexagonISelLowering adds 8 to ---+
1125
4.98k
  //                    the offsets of all stack-based   |
1126
4.98k
  //                    arguments (*)                    |
1127
4.98k
  //                                                     |
1128
4.98k
  //   getObjectOffset < 0   0     8  getObjectOffset >= 8
1129
4.98k
  // ------------------------+-----+------------------------> increasing
1130
4.98k
  //     <local objects>     |FP/LR|    <input arguments>     addresses
1131
4.98k
  // -----------------+------+-----+------------------------>
1132
4.98k
  //                  |      |
1133
4.98k
  //    SP/AP point --+      +-- FP points here (**)
1134
4.98k
  //    somewhere on
1135
4.98k
  //    this side of FP/LR
1136
4.98k
  //
1137
4.98k
  // (*) See LowerFormalArguments. The FP/LR is assumed to be present.
1138
4.98k
  // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR.
1139
4.98k
1140
4.98k
  // The lowering assumes that FP/LR is present, and so the offsets of
1141
4.98k
  // the formal arguments start at 8. If FP/LR is not there we need to
1142
4.98k
  // reduce the offset by 8.
1143
4.98k
  if (
Offset > 0 && 4.98k
!HasFP8
)
1144
8
    Offset -= 8;
1145
4.98k
1146
4.98k
  if (UseFP)
1147
3.18k
    FrameReg = FP;
1148
1.80k
  else 
if (1.80k
UseAP1.80k
)
1149
2
    FrameReg = AP;
1150
1.80k
  else
1151
1.80k
    FrameReg = SP;
1152
4.98k
1153
4.98k
  // Calculate the actual offset in the instruction. If there is no FP
1154
4.98k
  // (in other words, no allocframe), then SP will not be adjusted (i.e.
1155
4.98k
  // there will be no SP -= FrameSize), so the frame size should not be
1156
4.98k
  // added to the calculated offset.
1157
4.98k
  int RealOffset = Offset;
1158
4.98k
  if (
!UseFP && 4.98k
!UseAP1.80k
)
1159
1.80k
    RealOffset = FrameSize+Offset;
1160
4.98k
  return RealOffset;
1161
4.98k
}
1162
1163
bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
1164
      const CSIVect &CSI, const HexagonRegisterInfo &HRI,
1165
2.40k
      bool &PrologueStubs) const {
1166
2.40k
  if (CSI.empty())
1167
2.35k
    return true;
1168
54
1169
54
  MachineBasicBlock::iterator MI = MBB.begin();
1170
54
  PrologueStubs = false;
1171
54
  MachineFunction &MF = *MBB.getParent();
1172
54
  auto &HST = MF.getSubtarget<HexagonSubtarget>();
1173
54
  auto &HII = *HST.getInstrInfo();
1174
54
1175
54
  if (
useSpillFunction(MF, CSI)54
) {
1176
5
    PrologueStubs = true;
1177
5
    unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI);
1178
5
    bool StkOvrFlowEnabled = EnableStackOVFSanitizer;
1179
5
    const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem,
1180
5
                                               StkOvrFlowEnabled);
1181
5
    auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
1182
5
    bool IsPIC = HTM.isPositionIndependent();
1183
5
    bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong;
1184
5
1185
5
    // Call spill function.
1186
5
    DebugLoc DL = MI != MBB.end() ? 
MI->getDebugLoc()5
:
DebugLoc()0
;
1187
5
    unsigned SpillOpc;
1188
5
    if (
StkOvrFlowEnabled5
) {
1189
1
      if (LongCalls)
1190
0
        
SpillOpc = IsPIC ? 0
Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT_PIC0
1191
0
                         : Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT;
1192
1
      else
1193
1
        
SpillOpc = IsPIC ? 1
Hexagon::SAVE_REGISTERS_CALL_V4STK_PIC0
1194
1
                         : Hexagon::SAVE_REGISTERS_CALL_V4STK;
1195
5
    } else {
1196
4
      if (LongCalls)
1197
0
        
SpillOpc = IsPIC ? 0
Hexagon::SAVE_REGISTERS_CALL_V4_EXT_PIC0
1198
0
                         : Hexagon::SAVE_REGISTERS_CALL_V4_EXT;
1199
4
      else
1200
4
        
SpillOpc = IsPIC ? 4
Hexagon::SAVE_REGISTERS_CALL_V4_PIC3
1201
1
                         : Hexagon::SAVE_REGISTERS_CALL_V4;
1202
4
    }
1203
5
1204
5
    MachineInstr *SaveRegsCall =
1205
5
        BuildMI(MBB, MI, DL, HII.get(SpillOpc))
1206
5
          .addExternalSymbol(SpillFun);
1207
5
1208
5
    // Add callee-saved registers as use.
1209
5
    addCalleeSaveRegistersAsImpOperand(SaveRegsCall, CSI, false, true);
1210
5
    // Add live in registers.
1211
27
    for (unsigned I = 0; 
I < CSI.size()27
;
++I22
)
1212
22
      MBB.addLiveIn(CSI[I].getReg());
1213
5
    return true;
1214
5
  }
1215
49
1216
147
  
for (unsigned i = 0, n = CSI.size(); 49
i < n147
;
++i98
) {
1217
98
    unsigned Reg = CSI[i].getReg();
1218
98
    // Add live in registers. We treat eh_return callee saved register r0 - r3
1219
98
    // specially. They are not really callee saved registers as they are not
1220
98
    // supposed to be killed.
1221
98
    bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
1222
98
    int FI = CSI[i].getFrameIdx();
1223
98
    const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
1224
98
    HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
1225
98
    if (IsKill)
1226
96
      MBB.addLiveIn(Reg);
1227
98
  }
1228
2.40k
  return true;
1229
2.40k
}
1230
1231
bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
1232
2.40k
      const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
1233
2.40k
  if (CSI.empty())
1234
2.35k
    return false;
1235
53
1236
53
  MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
1237
53
  MachineFunction &MF = *MBB.getParent();
1238
53
  auto &HST = MF.getSubtarget<HexagonSubtarget>();
1239
53
  auto &HII = *HST.getInstrInfo();
1240
53
1241
53
  if (
useRestoreFunction(MF, CSI)53
) {
1242
7
    bool HasTC = hasTailCall(MBB) || !hasReturn(MBB);
1243
8
    unsigned MaxR = getMaxCalleeSavedReg(CSI, HRI);
1244
8
    SpillKind Kind = HasTC ? 
SK_FromMemTailcall1
:
SK_FromMem7
;
1245
8
    const char *RestoreFn = getSpillFunctionFor(MaxR, Kind);
1246
8
    auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
1247
8
    bool IsPIC = HTM.isPositionIndependent();
1248
7
    bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong;
1249
8
1250
8
    // Call spill function.
1251
8
    DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()
1252
0
                                  : MBB.getLastNonDebugInstr()->getDebugLoc();
1253
8
    MachineInstr *DeallocCall = nullptr;
1254
8
1255
8
    if (
HasTC8
) {
1256
1
      unsigned RetOpc;
1257
1
      if (LongCalls)
1258
0
        
RetOpc = IsPIC ? 0
Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC0
1259
0
                       : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT;
1260
1
      else
1261
1
        
RetOpc = IsPIC ? 1
Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC0
1262
1
                       : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;
1263
1
      DeallocCall = BuildMI(MBB, MI, DL, HII.get(RetOpc))
1264
1
          .addExternalSymbol(RestoreFn);
1265
8
    } else {
1266
7
      // The block has a return.
1267
7
      MachineBasicBlock::iterator It = MBB.getFirstTerminator();
1268
7
      assert(It->isReturn() && std::next(It) == MBB.end());
1269
7
      unsigned RetOpc;
1270
7
      if (LongCalls)
1271
2
        
RetOpc = IsPIC ? 2
Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC0
1272
2
                       : Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT;
1273
7
      else
1274
5
        
RetOpc = IsPIC ? 5
Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC3
1275
2
                       : Hexagon::RESTORE_DEALLOC_RET_JMP_V4;
1276
7
      DeallocCall = BuildMI(MBB, It, DL, HII.get(RetOpc))
1277
7
          .addExternalSymbol(RestoreFn);
1278
7
      // Transfer the function live-out registers.
1279
7
      DeallocCall->copyImplicitOps(MF, *It);
1280
7
    }
1281
8
    addCalleeSaveRegistersAsImpOperand(DeallocCall, CSI, true, false);
1282
8
    return true;
1283
8
  }
1284
45
1285
145
  
for (unsigned i = 0; 45
i < CSI.size()145
;
++i100
) {
1286
100
    unsigned Reg = CSI[i].getReg();
1287
100
    const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
1288
100
    int FI = CSI[i].getFrameIdx();
1289
100
    HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
1290
100
  }
1291
2.40k
1292
2.40k
  return true;
1293
2.40k
}
1294
1295
MachineBasicBlock::iterator HexagonFrameLowering::eliminateCallFramePseudoInstr(
1296
    MachineFunction &MF, MachineBasicBlock &MBB,
1297
410
    MachineBasicBlock::iterator I) const {
1298
410
  MachineInstr &MI = *I;
1299
410
  unsigned Opc = MI.getOpcode();
1300
410
  (void)Opc; // Silence compiler warning.
1301
410
  assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) &&
1302
410
         "Cannot handle this call frame pseudo instruction");
1303
410
  return MBB.erase(I);
1304
410
}
1305
1306
void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
1307
2.40k
    MachineFunction &MF, RegScavenger *RS) const {
1308
2.40k
  // If this function has uses aligned stack and also has variable sized stack
1309
2.40k
  // objects, then we need to map all spill slots to fixed positions, so that
1310
2.40k
  // they can be accessed through FP. Otherwise they would have to be accessed
1311
2.40k
  // via AP, which may not be available at the particular place in the program.
1312
2.40k
  MachineFrameInfo &MFI = MF.getFrameInfo();
1313
2.40k
  bool HasAlloca = MFI.hasVarSizedObjects();
1314
2.40k
  bool NeedsAlign = (MFI.getMaxAlignment() > getStackAlignment());
1315
2.40k
1316
2.40k
  if (
!HasAlloca || 2.40k
!NeedsAlign3
)
1317
2.40k
    return;
1318
2
1319
2
  unsigned LFS = MFI.getLocalFrameSize();
1320
11
  for (int i = 0, e = MFI.getObjectIndexEnd(); 
i != e11
;
++i9
) {
1321
9
    if (
!MFI.isSpillSlotObjectIndex(i) || 9
MFI.isDeadObjectIndex(i)5
)
1322
4
      continue;
1323
5
    unsigned S = MFI.getObjectSize(i);
1324
5
    // Reduce the alignment to at most 8. This will require unaligned vector
1325
5
    // stores if they happen here.
1326
5
    unsigned A = std::max(MFI.getObjectAlignment(i), 8U);
1327
5
    MFI.setObjectAlignment(i, 8);
1328
5
    LFS = alignTo(LFS+S, A);
1329
5
    MFI.mapLocalFrameObject(i, -LFS);
1330
5
  }
1331
2
1332
2
  MFI.setLocalFrameSize(LFS);
1333
2
  unsigned A = MFI.getLocalFrameMaxAlign();
1334
2
  assert(A <= 8 && "Unexpected local frame alignment");
1335
2
  if (A == 0)
1336
2
    MFI.setLocalFrameMaxAlign(8);
1337
2
  MFI.setUseLocalStackAllocationBlock(true);
1338
2
1339
2
  // Set the physical aligned-stack base address register.
1340
2
  unsigned AP = 0;
1341
2
  if (const MachineInstr *AI = getAlignaInstr(MF))
1342
1
    AP = AI->getOperand(0).getReg();
1343
2.40k
  auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1344
2.40k
  HMFI.setStackAlignBasePhysReg(AP);
1345
2.40k
}
1346
1347
/// Returns true if there are no caller-saved registers available in class RC.
1348
static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
1349
15
      const HexagonRegisterInfo &HRI, const TargetRegisterClass *RC) {
1350
15
  MachineRegisterInfo &MRI = MF.getRegInfo();
1351
15
1352
273
  auto IsUsed = [&HRI,&MRI] (unsigned Reg) -> bool {
1353
281
    for (MCRegAliasIterator AI(Reg, &HRI, true); 
AI.isValid()281
;
++AI8
)
1354
278
      
if (278
MRI.isPhysRegUsed(*AI)278
)
1355
270
        return true;
1356
3
    return false;
1357
273
  };
1358
15
1359
15
  // Check for an unused caller-saved register. Callee-saved registers
1360
15
  // have become pristine by now.
1361
285
  for (const MCPhysReg *P = HRI.getCallerSavedRegs(&MF, RC); 
*P285
;
++P270
)
1362
273
    
if (273
!IsUsed(*P)273
)
1363
3
      return false;
1364
15
1365
15
  // All caller-saved registers are used.
1366
12
  return true;
1367
15
}
1368
1369
#ifndef NDEBUG
1370
static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) {
1371
  dbgs() << '{';
1372
  for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) {
1373
    unsigned R = x;
1374
    dbgs() << ' ' << PrintReg(R, &TRI);
1375
  }
1376
  dbgs() << " }";
1377
}
1378
#endif
1379
1380
bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
1381
2.40k
      const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const {
1382
2.40k
  DEBUG(dbgs() << __func__ << " on "
1383
2.40k
               << MF.getFunction()->getName() << '\n');
1384
2.40k
  MachineFrameInfo &MFI = MF.getFrameInfo();
1385
2.40k
  BitVector SRegs(Hexagon::NUM_TARGET_REGS);
1386
2.40k
1387
2.40k
  // Generate a set of unique, callee-saved registers (SRegs), where each
1388
2.40k
  // register in the set is maximal in terms of sub-/super-register relation,
1389
2.40k
  // i.e. for each R in SRegs, no proper super-register of R is also in SRegs.
1390
2.40k
1391
2.40k
  // (1) For each callee-saved register, add that register and all of its
1392
2.40k
  // sub-registers to SRegs.
1393
2.40k
  DEBUG(dbgs() << "Initial CS registers: {");
1394
2.62k
  for (unsigned i = 0, n = CSI.size(); 
i < n2.62k
;
++i212
) {
1395
212
    unsigned R = CSI[i].getReg();
1396
212
    DEBUG(dbgs() << ' ' << PrintReg(R, TRI));
1397
424
    for (MCSubRegIterator SR(R, TRI, true); 
SR.isValid()424
;
++SR212
)
1398
212
      SRegs[*SR] = true;
1399
212
  }
1400
2.40k
  DEBUG(dbgs() << " }\n");
1401
2.40k
  DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
1402
2.40k
1403
2.40k
  // (2) For each reserved register, remove that register and all of its
1404
2.40k
  // sub- and super-registers from SRegs.
1405
2.40k
  BitVector Reserved = TRI->getReservedRegs(MF);
1406
89.0k
  for (int x = Reserved.find_first(); 
x >= 089.0k
;
x = Reserved.find_next(x)86.6k
) {
1407
86.6k
    unsigned R = x;
1408
228k
    for (MCSuperRegIterator SR(R, TRI, true); 
SR.isValid()228k
;
++SR142k
)
1409
142k
      SRegs[*SR] = false;
1410
86.6k
  }
1411
2.40k
  DEBUG(dbgs() << "Res:     "; dump_registers(Reserved, *TRI); dbgs() << "\n");
1412
2.40k
  DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
1413
2.40k
1414
2.40k
  // (3) Collect all registers that have at least one sub-register in SRegs,
1415
2.40k
  // and also have no sub-registers that are reserved. These will be the can-
1416
2.40k
  // didates for saving as a whole instead of their individual sub-registers.
1417
2.40k
  // (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.)
1418
2.40k
  BitVector TmpSup(Hexagon::NUM_TARGET_REGS);
1419
2.62k
  for (int x = SRegs.find_first(); 
x >= 02.62k
;
x = SRegs.find_next(x)212
) {
1420
212
    unsigned R = x;
1421
424
    for (MCSuperRegIterator SR(R, TRI); 
SR.isValid()424
;
++SR212
)
1422
212
      TmpSup[*SR] = true;
1423
212
  }
1424
2.52k
  for (int x = TmpSup.find_first(); 
x >= 02.52k
;
x = TmpSup.find_next(x)120
) {
1425
120
    unsigned R = x;
1426
480
    for (MCSubRegIterator SR(R, TRI, true); 
SR.isValid()480
;
++SR360
) {
1427
360
      if (!Reserved[*SR])
1428
360
        continue;
1429
0
      TmpSup[R] = false;
1430
0
      break;
1431
0
    }
1432
120
  }
1433
2.40k
  DEBUG(dbgs() << "TmpSup:  "; dump_registers(TmpSup, *TRI); dbgs() << "\n");
1434
2.40k
1435
2.40k
  // (4) Include all super-registers found in (3) into SRegs.
1436
2.40k
  SRegs |= TmpSup;
1437
2.40k
  DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
1438
2.40k
1439
2.40k
  // (5) For each register R in SRegs, if any super-register of R is in SRegs,
1440
2.40k
  // remove R from SRegs.
1441
2.74k
  for (int x = SRegs.find_first(); 
x >= 02.74k
;
x = SRegs.find_next(x)332
) {
1442
332
    unsigned R = x;
1443
332
    for (MCSuperRegIterator SR(R, TRI); 
SR.isValid()332
;
++SR0
) {
1444
212
      if (!SRegs[*SR])
1445
0
        continue;
1446
212
      SRegs[R] = false;
1447
212
      break;
1448
212
    }
1449
332
  }
1450
2.40k
  DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
1451
2.40k
1452
2.40k
  // Now, for each register that has a fixed stack slot, create the stack
1453
2.40k
  // object for it.
1454
2.40k
  CSI.clear();
1455
2.40k
1456
2.40k
  using SpillSlot = TargetFrameLowering::SpillSlot;
1457
2.40k
1458
2.40k
  unsigned NumFixed;
1459
2.40k
  int MinOffset = 0;  // CS offsets are negative.
1460
2.40k
  const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed);
1461
45.7k
  for (const SpillSlot *S = FixedSlots; 
S != FixedSlots+NumFixed45.7k
;
++S43.3k
) {
1462
43.3k
    if (!SRegs[S->Reg])
1463
43.2k
      continue;
1464
118
    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg);
1465
118
    int FI = MFI.CreateFixedSpillStackObject(TRI->getSpillSize(*RC), S->Offset);
1466
118
    MinOffset = std::min(MinOffset, S->Offset);
1467
118
    CSI.push_back(CalleeSavedInfo(S->Reg, FI));
1468
118
    SRegs[S->Reg] = false;
1469
118
  }
1470
2.40k
1471
2.40k
  // There can be some registers that don't have fixed slots. For example,
1472
2.40k
  // we need to store R0-R3 in functions with exception handling. For each
1473
2.40k
  // such register, create a non-fixed stack object.
1474
2.41k
  for (int x = SRegs.find_first(); 
x >= 02.41k
;
x = SRegs.find_next(x)2
) {
1475
2
    unsigned R = x;
1476
2
    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R);
1477
2
    unsigned Size = TRI->getSpillSize(*RC);
1478
2
    int Off = MinOffset - Size;
1479
2
    unsigned Align = std::min(TRI->getSpillAlignment(*RC), getStackAlignment());
1480
2
    assert(isPowerOf2_32(Align));
1481
2
    Off &= -Align;
1482
2
    int FI = MFI.CreateFixedSpillStackObject(Size, Off);
1483
2
    MinOffset = std::min(MinOffset, Off);
1484
2
    CSI.push_back(CalleeSavedInfo(R, FI));
1485
2
    SRegs[R] = false;
1486
2
  }
1487
2.40k
1488
2.40k
  DEBUG({
1489
2.40k
    dbgs() << "CS information: {";
1490
2.40k
    for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
1491
2.40k
      int FI = CSI[i].getFrameIdx();
1492
2.40k
      int Off = MFI.getObjectOffset(FI);
1493
2.40k
      dbgs() << ' ' << PrintReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp";
1494
2.40k
      if (Off >= 0)
1495
2.40k
        dbgs() << '+';
1496
2.40k
      dbgs() << Off;
1497
2.40k
    }
1498
2.40k
    dbgs() << " }\n";
1499
2.40k
  });
1500
2.40k
1501
#ifndef NDEBUG
1502
  // Verify that all registers were handled.
1503
  bool MissedReg = false;
1504
  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1505
    unsigned R = x;
1506
    dbgs() << PrintReg(R, TRI) << ' ';
1507
    MissedReg = true;
1508
  }
1509
  if (MissedReg)
1510
    llvm_unreachable("...there are unhandled callee-saved registers!");
1511
#endif
1512
1513
2.40k
  return true;
1514
2.40k
}
1515
1516
bool HexagonFrameLowering::expandCopy(MachineBasicBlock &B,
1517
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1518
3.55k
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1519
3.55k
  MachineInstr *MI = &*It;
1520
3.55k
  DebugLoc DL = MI->getDebugLoc();
1521
3.55k
  unsigned DstR = MI->getOperand(0).getReg();
1522
3.55k
  unsigned SrcR = MI->getOperand(1).getReg();
1523
3.55k
  if (!Hexagon::ModRegsRegClass.contains(DstR) ||
1524
16
      !Hexagon::ModRegsRegClass.contains(SrcR))
1525
3.55k
    return false;
1526
0
1527
0
  unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1528
0
  BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR).add(MI->getOperand(1));
1529
0
  BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), DstR)
1530
0
    .addReg(TmpR, RegState::Kill);
1531
0
1532
0
  NewRegs.push_back(TmpR);
1533
0
  B.erase(It);
1534
0
  return true;
1535
0
}
1536
1537
bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B,
1538
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1539
17
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1540
17
  MachineInstr *MI = &*It;
1541
17
  if (!MI->getOperand(0).isFI())
1542
0
    return false;
1543
17
1544
17
  DebugLoc DL = MI->getDebugLoc();
1545
17
  unsigned Opc = MI->getOpcode();
1546
17
  unsigned SrcR = MI->getOperand(2).getReg();
1547
17
  bool IsKill = MI->getOperand(2).isKill();
1548
17
  int FI = MI->getOperand(0).getIndex();
1549
17
1550
17
  // TmpR = C2_tfrpr SrcR   if SrcR is a predicate register
1551
17
  // TmpR = A2_tfrcrr SrcR  if SrcR is a modifier register
1552
17
  unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1553
17
  unsigned TfrOpc = (Opc == Hexagon::STriw_pred) ? Hexagon::C2_tfrpr
1554
0
                                                 : Hexagon::A2_tfrcrr;
1555
17
  BuildMI(B, It, DL, HII.get(TfrOpc), TmpR)
1556
17
    .addReg(SrcR, getKillRegState(IsKill));
1557
17
1558
17
  // S2_storeri_io FI, 0, TmpR
1559
17
  BuildMI(B, It, DL, HII.get(Hexagon::S2_storeri_io))
1560
17
    .addFrameIndex(FI)
1561
17
    .addImm(0)
1562
17
    .addReg(TmpR, RegState::Kill)
1563
17
    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1564
17
1565
17
  NewRegs.push_back(TmpR);
1566
17
  B.erase(It);
1567
17
  return true;
1568
17
}
1569
1570
bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B,
1571
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1572
17
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1573
17
  MachineInstr *MI = &*It;
1574
17
  if (!MI->getOperand(1).isFI())
1575
0
    return false;
1576
17
1577
17
  DebugLoc DL = MI->getDebugLoc();
1578
17
  unsigned Opc = MI->getOpcode();
1579
17
  unsigned DstR = MI->getOperand(0).getReg();
1580
17
  int FI = MI->getOperand(1).getIndex();
1581
17
1582
17
  // TmpR = L2_loadri_io FI, 0
1583
17
  unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1584
17
  BuildMI(B, It, DL, HII.get(Hexagon::L2_loadri_io), TmpR)
1585
17
    .addFrameIndex(FI)
1586
17
    .addImm(0)
1587
17
    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1588
17
1589
17
  // DstR = C2_tfrrp TmpR   if DstR is a predicate register
1590
17
  // DstR = A2_tfrrcr TmpR  if DstR is a modifier register
1591
17
  unsigned TfrOpc = (Opc == Hexagon::LDriw_pred) ? Hexagon::C2_tfrrp
1592
0
                                                 : Hexagon::A2_tfrrcr;
1593
17
  BuildMI(B, It, DL, HII.get(TfrOpc), DstR)
1594
17
    .addReg(TmpR, RegState::Kill);
1595
17
1596
17
  NewRegs.push_back(TmpR);
1597
17
  B.erase(It);
1598
17
  return true;
1599
17
}
1600
1601
bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B,
1602
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1603
6
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1604
6
  MachineInstr *MI = &*It;
1605
6
  if (!MI->getOperand(0).isFI())
1606
0
    return false;
1607
6
1608
6
  DebugLoc DL = MI->getDebugLoc();
1609
6
  unsigned SrcR = MI->getOperand(2).getReg();
1610
6
  bool IsKill = MI->getOperand(2).isKill();
1611
6
  int FI = MI->getOperand(0).getIndex();
1612
6
  auto *RC = &Hexagon::HvxVRRegClass;
1613
6
1614
6
  // Insert transfer to general vector register.
1615
6
  //   TmpR0 = A2_tfrsi 0x01010101
1616
6
  //   TmpR1 = V6_vandqrt Qx, TmpR0
1617
6
  //   store FI, 0, TmpR1
1618
6
  unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1619
6
  unsigned TmpR1 = MRI.createVirtualRegister(RC);
1620
6
1621
6
  BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)
1622
6
    .addImm(0x01010101);
1623
6
1624
6
  BuildMI(B, It, DL, HII.get(Hexagon::V6_vandqrt), TmpR1)
1625
6
    .addReg(SrcR, getKillRegState(IsKill))
1626
6
    .addReg(TmpR0, RegState::Kill);
1627
6
1628
6
  auto *HRI = B.getParent()->getSubtarget<HexagonSubtarget>().getRegisterInfo();
1629
6
  HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, HRI);
1630
6
  expandStoreVec(B, std::prev(It), MRI, HII, NewRegs);
1631
6
1632
6
  NewRegs.push_back(TmpR0);
1633
6
  NewRegs.push_back(TmpR1);
1634
6
  B.erase(It);
1635
6
  return true;
1636
6
}
1637
1638
bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B,
1639
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1640
9
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1641
9
  MachineInstr *MI = &*It;
1642
9
  if (!MI->getOperand(1).isFI())
1643
0
    return false;
1644
9
1645
9
  DebugLoc DL = MI->getDebugLoc();
1646
9
  unsigned DstR = MI->getOperand(0).getReg();
1647
9
  int FI = MI->getOperand(1).getIndex();
1648
9
  auto *RC = &Hexagon::HvxVRRegClass;
1649
9
1650
9
  // TmpR0 = A2_tfrsi 0x01010101
1651
9
  // TmpR1 = load FI, 0
1652
9
  // DstR = V6_vandvrt TmpR1, TmpR0
1653
9
  unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1654
9
  unsigned TmpR1 = MRI.createVirtualRegister(RC);
1655
9
1656
9
  BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)
1657
9
    .addImm(0x01010101);
1658
9
  MachineFunction &MF = *B.getParent();
1659
9
  auto *HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1660
9
  HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, HRI);
1661
9
  expandLoadVec(B, std::prev(It), MRI, HII, NewRegs);
1662
9
1663
9
  BuildMI(B, It, DL, HII.get(Hexagon::V6_vandvrt), DstR)
1664
9
    .addReg(TmpR1, RegState::Kill)
1665
9
    .addReg(TmpR0, RegState::Kill);
1666
9
1667
9
  NewRegs.push_back(TmpR0);
1668
9
  NewRegs.push_back(TmpR1);
1669
9
  B.erase(It);
1670
9
  return true;
1671
9
}
1672
1673
bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
1674
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1675
89
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1676
89
  MachineFunction &MF = *B.getParent();
1677
89
  auto &MFI = MF.getFrameInfo();
1678
89
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1679
89
  MachineInstr *MI = &*It;
1680
89
  if (!MI->getOperand(0).isFI())
1681
86
    return false;
1682
3
1683
3
  // It is possible that the double vector being stored is only partially
1684
3
  // defined. From the point of view of the liveness tracking, it is ok to
1685
3
  // store it as a whole, but if we break it up we may end up storing a
1686
3
  // register that is entirely undefined.
1687
3
  LivePhysRegs LPR(HRI);
1688
3
  LPR.addLiveIns(B);
1689
3
  SmallVector<std::pair<unsigned, const MachineOperand*>,2> Clobbers;
1690
22
  for (auto R = B.begin(); 
R != It22
;
++R19
) {
1691
19
    Clobbers.clear();
1692
19
    LPR.stepForward(*R, Clobbers);
1693
19
    // Dead defs are recorded in Clobbers, but are not automatically removed
1694
19
    // from the live set.
1695
19
    for (auto &C : Clobbers)
1696
22
      
if (22
C.second->isReg() && 22
C.second->isDead()22
)
1697
4
        LPR.removeReg(C.first);
1698
19
  }
1699
3
1700
3
  DebugLoc DL = MI->getDebugLoc();
1701
3
  unsigned SrcR = MI->getOperand(2).getReg();
1702
3
  unsigned SrcLo = HRI.getSubReg(SrcR, Hexagon::vsub_lo);
1703
3
  unsigned SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi);
1704
3
  bool IsKill = MI->getOperand(2).isKill();
1705
3
  int FI = MI->getOperand(0).getIndex();
1706
3
1707
3
  unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
1708
3
  unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
1709
3
  unsigned HasAlign = MFI.getObjectAlignment(FI);
1710
3
  unsigned StoreOpc;
1711
3
1712
3
  // Store low part.
1713
3
  if (
LPR.contains(SrcLo)3
) {
1714
3
    StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
1715
0
                                     : Hexagon::V6_vS32Ub_ai;
1716
3
    BuildMI(B, It, DL, HII.get(StoreOpc))
1717
3
      .addFrameIndex(FI)
1718
3
      .addImm(0)
1719
3
      .addReg(SrcLo, getKillRegState(IsKill))
1720
3
      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1721
3
  }
1722
3
1723
3
  // Store high part.
1724
3
  if (
LPR.contains(SrcHi)3
) {
1725
2
    StoreOpc = NeedAlign <= MinAlign(HasAlign, Size) ? Hexagon::V6_vS32b_ai
1726
0
                                                     : Hexagon::V6_vS32Ub_ai;
1727
2
    BuildMI(B, It, DL, HII.get(StoreOpc))
1728
2
      .addFrameIndex(FI)
1729
2
      .addImm(Size)
1730
2
      .addReg(SrcHi, getKillRegState(IsKill))
1731
2
      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1732
2
  }
1733
89
1734
89
  B.erase(It);
1735
89
  return true;
1736
89
}
1737
1738
bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,
1739
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1740
94
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1741
94
  MachineFunction &MF = *B.getParent();
1742
94
  auto &MFI = MF.getFrameInfo();
1743
94
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1744
94
  MachineInstr *MI = &*It;
1745
94
  if (!MI->getOperand(1).isFI())
1746
92
    return false;
1747
2
1748
2
  DebugLoc DL = MI->getDebugLoc();
1749
2
  unsigned DstR = MI->getOperand(0).getReg();
1750
2
  unsigned DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi);
1751
2
  unsigned DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo);
1752
2
  int FI = MI->getOperand(1).getIndex();
1753
2
1754
2
  unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
1755
2
  unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
1756
2
  unsigned HasAlign = MFI.getObjectAlignment(FI);
1757
2
  unsigned LoadOpc;
1758
2
1759
2
  // Load low part.
1760
2
  LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
1761
0
                                  : Hexagon::V6_vL32Ub_ai;
1762
2
  BuildMI(B, It, DL, HII.get(LoadOpc), DstLo)
1763
2
    .addFrameIndex(FI)
1764
2
    .addImm(0)
1765
2
    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1766
2
1767
2
  // Load high part.
1768
2
  LoadOpc = NeedAlign <= MinAlign(HasAlign, Size) ? Hexagon::V6_vL32b_ai
1769
0
                                                  : Hexagon::V6_vL32Ub_ai;
1770
94
  BuildMI(B, It, DL, HII.get(LoadOpc), DstHi)
1771
94
    .addFrameIndex(FI)
1772
94
    .addImm(Size)
1773
94
    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1774
94
1775
94
  B.erase(It);
1776
94
  return true;
1777
94
}
1778
1779
bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
1780
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1781
6
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1782
6
  MachineFunction &MF = *B.getParent();
1783
6
  auto &MFI = MF.getFrameInfo();
1784
6
  MachineInstr *MI = &*It;
1785
6
  if (!MI->getOperand(0).isFI())
1786
0
    return false;
1787
6
1788
6
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1789
6
  DebugLoc DL = MI->getDebugLoc();
1790
6
  unsigned SrcR = MI->getOperand(2).getReg();
1791
6
  bool IsKill = MI->getOperand(2).isKill();
1792
6
  int FI = MI->getOperand(0).getIndex();
1793
6
1794
6
  unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
1795
6
  unsigned HasAlign = MFI.getObjectAlignment(FI);
1796
6
  unsigned StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
1797
0
                                            : Hexagon::V6_vS32Ub_ai;
1798
6
  BuildMI(B, It, DL, HII.get(StoreOpc))
1799
6
    .addFrameIndex(FI)
1800
6
    .addImm(0)
1801
6
    .addReg(SrcR, getKillRegState(IsKill))
1802
6
    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1803
6
1804
6
  B.erase(It);
1805
6
  return true;
1806
6
}
1807
1808
bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
1809
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1810
9
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1811
9
  MachineFunction &MF = *B.getParent();
1812
9
  auto &MFI = MF.getFrameInfo();
1813
9
  MachineInstr *MI = &*It;
1814
9
  if (!MI->getOperand(1).isFI())
1815
0
    return false;
1816
9
1817
9
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1818
9
  DebugLoc DL = MI->getDebugLoc();
1819
9
  unsigned DstR = MI->getOperand(0).getReg();
1820
9
  int FI = MI->getOperand(1).getIndex();
1821
9
1822
9
  unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
1823
9
  unsigned HasAlign = MFI.getObjectAlignment(FI);
1824
9
  unsigned LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
1825
0
                                           : Hexagon::V6_vL32Ub_ai;
1826
9
  BuildMI(B, It, DL, HII.get(LoadOpc), DstR)
1827
9
    .addFrameIndex(FI)
1828
9
    .addImm(0)
1829
9
    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1830
9
1831
9
  B.erase(It);
1832
9
  return true;
1833
9
}
1834
1835
bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF,
1836
2.40k
      SmallVectorImpl<unsigned> &NewRegs) const {
1837
2.40k
  auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
1838
2.40k
  MachineRegisterInfo &MRI = MF.getRegInfo();
1839
2.40k
  bool Changed = false;
1840
2.40k
1841
3.61k
  for (auto &B : MF) {
1842
3.61k
    // Traverse the basic block.
1843
3.61k
    MachineBasicBlock::iterator NextI;
1844
24.6k
    for (auto I = B.begin(), E = B.end(); 
I != E24.6k
;
I = NextI20.9k
) {
1845
20.9k
      MachineInstr *MI = &*I;
1846
20.9k
      NextI = std::next(I);
1847
20.9k
      unsigned Opc = MI->getOpcode();
1848
20.9k
1849
20.9k
      switch (Opc) {
1850
3.55k
        case TargetOpcode::COPY:
1851
3.55k
          Changed |= expandCopy(B, I, MRI, HII, NewRegs);
1852
3.55k
          break;
1853
17
        case Hexagon::STriw_pred:
1854
17
        case Hexagon::STriw_mod:
1855
17
          Changed |= expandStoreInt(B, I, MRI, HII, NewRegs);
1856
17
          break;
1857
17
        case Hexagon::LDriw_pred:
1858
17
        case Hexagon::LDriw_mod:
1859
17
          Changed |= expandLoadInt(B, I, MRI, HII, NewRegs);
1860
17
          break;
1861
6
        case Hexagon::PS_vstorerq_ai:
1862
6
          Changed |= expandStoreVecPred(B, I, MRI, HII, NewRegs);
1863
6
          break;
1864
9
        case Hexagon::PS_vloadrq_ai:
1865
9
          Changed |= expandLoadVecPred(B, I, MRI, HII, NewRegs);
1866
9
          break;
1867
94
        case Hexagon::PS_vloadrw_ai:
1868
94
        case Hexagon::PS_vloadrwu_ai:
1869
94
          Changed |= expandLoadVec2(B, I, MRI, HII, NewRegs);
1870
94
          break;
1871
89
        case Hexagon::PS_vstorerw_ai:
1872
89
        case Hexagon::PS_vstorerwu_ai:
1873
89
          Changed |= expandStoreVec2(B, I, MRI, HII, NewRegs);
1874
89
          break;
1875
20.9k
      }
1876
20.9k
    }
1877
3.61k
  }
1878
2.40k
1879
2.40k
  return Changed;
1880
2.40k
}
1881
1882
void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
1883
                                                BitVector &SavedRegs,
1884
2.40k
                                                RegScavenger *RS) const {
1885
2.40k
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1886
2.40k
1887
2.40k
  SavedRegs.resize(HRI.getNumRegs());
1888
2.40k
1889
2.40k
  // If we have a function containing __builtin_eh_return we want to spill and
1890
2.40k
  // restore all callee saved registers. Pretend that they are used.
1891
2.40k
  if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
1892
17
    
for (const MCPhysReg *R = HRI.getCalleeSavedRegs(&MF); 1
*R17
;
++R16
)
1893
16
      SavedRegs.set(*R);
1894
2.40k
1895
2.40k
  // Replace predicate register pseudo spill code.
1896
2.40k
  SmallVector<unsigned,8> NewRegs;
1897
2.40k
  expandSpillMacros(MF, NewRegs);
1898
2.40k
  if (
OptimizeSpillSlots && 2.40k
!isOptNone(MF)2.40k
)
1899
860
    optimizeSpillSlots(MF, NewRegs);
1900
2.40k
1901
2.40k
  // We need to reserve a a spill slot if scavenging could potentially require
1902
2.40k
  // spilling a scavenged register.
1903
2.40k
  if (
!NewRegs.empty() || 2.40k
mayOverflowFrameOffset(MF)2.39k
) {
1904
12
    MachineFrameInfo &MFI = MF.getFrameInfo();
1905
12
    MachineRegisterInfo &MRI = MF.getRegInfo();
1906
12
    SetVector<const TargetRegisterClass*> SpillRCs;
1907
12
    // Reserve an int register in any case, because it could be used to hold
1908
12
    // the stack offset in case it does not fit into a spill instruction.
1909
12
    SpillRCs.insert(&Hexagon::IntRegsRegClass);
1910
12
1911
12
    for (unsigned VR : NewRegs)
1912
64
      SpillRCs.insert(MRI.getRegClass(VR));
1913
12
1914
15
    for (auto *RC : SpillRCs) {
1915
15
      if (!needToReserveScavengingSpillSlots(MF, HRI, RC))
1916
3
        continue;
1917
12
      
unsigned Num = RC == &Hexagon::IntRegsRegClass ? 12
NumberScavengerSlots10
:
12
;
1918
12
      unsigned S = HRI.getSpillSize(*RC), A = HRI.getSpillAlignment(*RC);
1919
34
      for (unsigned i = 0; 
i < Num34
;
i++22
) {
1920
22
        int NewFI = MFI.CreateSpillStackObject(S, A);
1921
22
        RS->addScavengingFrameIndex(NewFI);
1922
22
      }
1923
15
    }
1924
12
  }
1925
2.40k
1926
2.40k
  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1927
2.40k
}
1928
1929
unsigned HexagonFrameLowering::findPhysReg(MachineFunction &MF,
1930
      HexagonBlockRanges::IndexRange &FIR,
1931
      HexagonBlockRanges::InstrIndexMap &IndexMap,
1932
      HexagonBlockRanges::RegToRangeMap &DeadMap,
1933
84
      const TargetRegisterClass *RC) const {
1934
84
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1935
84
  auto &MRI = MF.getRegInfo();
1936
84
1937
2.26k
  auto isDead = [&FIR,&DeadMap] (unsigned Reg) -> bool {
1938
2.26k
    auto F = DeadMap.find({Reg,0});
1939
2.26k
    if (F == DeadMap.end())
1940
205
      return false;
1941
2.06k
    for (auto &DR : F->second)
1942
13.1k
      
if (13.1k
DR.contains(FIR)13.1k
)
1943
19
        return true;
1944
2.04k
    return false;
1945
2.04k
  };
1946
84
1947
2.26k
  for (unsigned Reg : RC->getRawAllocationOrder(MF)) {
1948
2.26k
    bool Dead = true;
1949
2.26k
    for (auto R : HexagonBlockRanges::expandToSubRegs({Reg,0}, MRI, HRI)) {
1950
2.26k
      if (isDead(R.Reg))
1951
19
        continue;
1952
2.24k
      Dead = false;
1953
2.24k
      break;
1954
2.24k
    }
1955
2.26k
    if (Dead)
1956
16
      return Reg;
1957
68
  }
1958
68
  return 0;
1959
68
}
1960
1961
void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
1962
860
      SmallVectorImpl<unsigned> &VRegs) const {
1963
860
  auto &HST = MF.getSubtarget<HexagonSubtarget>();
1964
860
  auto &HII = *HST.getInstrInfo();
1965
860
  auto &HRI = *HST.getRegisterInfo();
1966
860
  auto &MRI = MF.getRegInfo();
1967
860
  HexagonBlockRanges HBR(MF);
1968
860
1969
860
  using BlockIndexMap =
1970
860
      std::map<MachineBasicBlock *, HexagonBlockRanges::InstrIndexMap>;
1971
860
  using BlockRangeMap =
1972
860
      std::map<MachineBasicBlock *, HexagonBlockRanges::RangeList>;
1973
860
  using IndexType = HexagonBlockRanges::IndexType;
1974
860
1975
860
  struct SlotInfo {
1976
860
    BlockRangeMap Map;
1977
860
    unsigned Size = 0;
1978
860
    const TargetRegisterClass *RC = nullptr;
1979
860
1980
282
    SlotInfo() = default;
1981
860
  };
1982
860
1983
860
  BlockIndexMap BlockIndexes;
1984
860
  SmallSet<int,4> BadFIs;
1985
860
  std::map<int,SlotInfo> FIRangeMap;
1986
860
1987
860
  // Accumulate register classes: get a common class for a pre-existing
1988
860
  // class HaveRC and a new class NewRC. Return nullptr if a common class
1989
860
  // cannot be found, otherwise return the resulting class. If HaveRC is
1990
860
  // nullptr, assume that it is still unset.
1991
860
  auto getCommonRC =
1992
860
      [](const TargetRegisterClass *HaveRC,
1993
471
         const TargetRegisterClass *NewRC) -> const TargetRegisterClass * {
1994
471
    if (
HaveRC == nullptr || 471
HaveRC == NewRC189
)
1995
468
      return NewRC;
1996
3
    // Different classes, both non-null. Pick the more general one.
1997
3
    
if (3
HaveRC->hasSubClassEq(NewRC)3
)
1998
0
      return HaveRC;
1999
3
    
if (3
NewRC->hasSubClassEq(HaveRC)3
)
2000
0
      return NewRC;
2001
3
    return nullptr;
2002
3
  };
2003
860
2004
860
  // Scan all blocks in the function. Check all occurrences of frame indexes,
2005
860
  // and collect relevant information.
2006
2.06k
  for (auto &B : MF) {
2007
2.06k
    std::map<int,IndexType> LastStore, LastLoad;
2008
2.06k
    // Emplace appears not to be supported in gcc 4.7.2-4.
2009
2.06k
    //auto P = BlockIndexes.emplace(&B, HexagonBlockRanges::InstrIndexMap(B));
2010
2.06k
    auto P = BlockIndexes.insert(
2011
2.06k
                std::make_pair(&B, HexagonBlockRanges::InstrIndexMap(B)));
2012
2.06k
    auto &IndexMap = P.first->second;
2013
2.06k
    DEBUG(dbgs() << "Index map for BB#" << B.getNumber() << "\n"
2014
2.06k
                 << IndexMap << '\n');
2015
2.06k
2016
11.3k
    for (auto &In : B) {
2017
11.3k
      int LFI, SFI;
2018
169
      bool Load = HII.isLoadFromStackSlot(In, LFI) && !HII.isPredicated(In);
2019
302
      bool Store = HII.isStoreToStackSlot(In, SFI) && !HII.isPredicated(In);
2020
11.3k
      if (
Load && 11.3k
Store169
) {
2021
0
        // If it's both a load and a store, then we won't handle it.
2022
0
        BadFIs.insert(LFI);
2023
0
        BadFIs.insert(SFI);
2024
0
        continue;
2025
0
      }
2026
11.3k
      // Check for register classes of the register used as the source for
2027
11.3k
      // the store, and the register used as the destination for the load.
2028
11.3k
      // Also, only accept base+imm_offset addressing modes. Other addressing
2029
11.3k
      // modes can have side-effects (post-increments, etc.). For stack
2030
11.3k
      // slots they are very unlikely, so there is not much loss due to
2031
11.3k
      // this restriction.
2032
11.3k
      
if (11.3k
Load || 11.3k
Store11.1k
) {
2033
471
        int TFI = Load ? 
LFI169
:
SFI302
;
2034
471
        unsigned AM = HII.getAddrMode(In);
2035
471
        SlotInfo &SI = FIRangeMap[TFI];
2036
471
        bool Bad = (AM != HexagonII::BaseImmOffset);
2037
471
        if (
!Bad471
) {
2038
471
          // If the addressing mode is ok, check the register class.
2039
471
          unsigned OpNum = Load ? 
0169
:
2302
;
2040
471
          auto *RC = HII.getRegClass(In.getDesc(), OpNum, &HRI, MF);
2041
471
          RC = getCommonRC(SI.RC, RC);
2042
471
          if (RC == nullptr)
2043
3
            Bad = true;
2044
471
          else
2045
468
            SI.RC = RC;
2046
471
        }
2047
471
        if (
!Bad471
) {
2048
468
          // Check sizes.
2049
468
          unsigned S = HII.getMemAccessSize(In);
2050
468
          if (
SI.Size != 0 && 468
SI.Size != S186
)
2051
0
            Bad = true;
2052
468
          else
2053
468
            SI.Size = S;
2054
468
        }
2055
471
        if (
!Bad471
) {
2056
468
          for (auto *Mo : In.memoperands()) {
2057
468
            if (!Mo->isVolatile())
2058
428
              continue;
2059
40
            Bad = true;
2060
40
            break;
2061
40
          }
2062
468
        }
2063
471
        if (Bad)
2064
43
          BadFIs.insert(TFI);
2065
471
      }
2066
11.3k
2067
11.3k
      // Locate uses of frame indices.
2068
45.4k
      for (unsigned i = 0, n = In.getNumOperands(); 
i < n45.4k
;
++i34.1k
) {
2069
34.1k
        const MachineOperand &Op = In.getOperand(i);
2070
34.1k
        if (!Op.isFI())
2071
32.5k
          continue;
2072
1.65k
        int FI = Op.getIndex();
2073
1.65k
        // Make sure that the following operand is an immediate and that
2074
1.65k
        // it is 0. This is the offset in the stack object.
2075
1.65k
        if (
i+1 >= n || 1.65k
!In.getOperand(i+1).isImm()1.65k
||
2076
1.65k
            In.getOperand(i+1).getImm() != 0)
2077
1.04k
          BadFIs.insert(FI);
2078
1.65k
        if (BadFIs.count(FI))
2079
1.18k
          continue;
2080
470
2081
470
        IndexType Index = IndexMap.getIndex(&In);
2082
470
        if (
Load470
) {
2083
129
          if (LastStore[FI] == IndexType::None)
2084
38
            LastStore[FI] = IndexType::Entry;
2085
129
          LastLoad[FI] = Index;
2086
470
        } else 
if (341
Store341
) {
2087
267
          HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
2088
267
          if (LastStore[FI] != IndexType::None)
2089
20
            RL.add(LastStore[FI], LastLoad[FI], false, false);
2090
247
          else 
if (247
LastLoad[FI] != IndexType::None247
)
2091
0
            RL.add(IndexType::Entry, LastLoad[FI], false, false);
2092
267
          LastLoad[FI] = IndexType::None;
2093
267
          LastStore[FI] = Index;
2094
341
        } else {
2095
74
          BadFIs.insert(FI);
2096
74
        }
2097
34.1k
      }
2098
11.3k
    }
2099
2.06k
2100
285
    for (auto &I : LastLoad) {
2101
285
      IndexType LL = I.second;
2102
285
      if (LL == IndexType::None)
2103
169
        continue;
2104
116
      auto &RL = FIRangeMap[I.first].Map[&B];
2105
116
      IndexType &LS = LastStore[I.first];
2106
116
      if (LS != IndexType::None)
2107
116
        RL.add(LS, LL, false, false);
2108
116
      else
2109
0
        RL.add(IndexType::Entry, LL, false, false);
2110
285
      LS = IndexType::None;
2111
285
    }
2112
285
    for (auto &I : LastStore) {
2113
285
      IndexType LS = I.second;
2114
285
      if (LS == IndexType::None)
2115
116
        continue;
2116
169
      auto &RL = FIRangeMap[I.first].Map[&B];
2117
169
      RL.add(LS, IndexType::None, false, false);
2118
169
    }
2119
2.06k
  }
2120
860
2121
860
  DEBUG({
2122
860
    for (auto &P : FIRangeMap) {
2123
860
      dbgs() << "fi#" << P.first;
2124
860
      if (BadFIs.count(P.first))
2125
860
        dbgs() << " (bad)";
2126
860
      dbgs() << "  RC: ";
2127
860
      if (P.second.RC != nullptr)
2128
860
        dbgs() << HRI.getRegClassName(P.second.RC) << '\n';
2129
860
      else
2130
860
        dbgs() << "<null>\n";
2131
860
      for (auto &R : P.second.Map)
2132
860
        dbgs() << "  BB#" << R.first->getNumber() << " { " << R.second << "}\n";
2133
860
    }
2134
860
  });
2135
860
2136
860
  // When a slot is loaded from in a block without being stored to in the
2137
860
  // same block, it is live-on-entry to this block. To avoid CFG analysis,
2138
860
  // consider this slot to be live-on-exit from all blocks.
2139
860
  SmallSet<int,4> LoxFIs;
2140
860
2141
860
  std::map<MachineBasicBlock*,std::vector<int>> BlockFIMap;
2142
860
2143
282
  for (auto &P : FIRangeMap) {
2144
282
    // P = pair(FI, map: BB->RangeList)
2145
282
    if (BadFIs.count(P.first))
2146
58
      continue;
2147
224
    
for (auto &B : MF) 224
{
2148
1.30k
      auto F = P.second.Map.find(&B);
2149
1.30k
      // F = pair(BB, RangeList)
2150
1.30k
      if (
F == P.second.Map.end() || 1.30k
F->second.empty()262
)
2151
1.04k
        continue;
2152
262
      HexagonBlockRanges::IndexRange &IR = F->second.front();
2153
262
      if (IR.start() == IndexType::Entry)
2154
35
        LoxFIs.insert(P.first);
2155
1.30k
      BlockFIMap[&B].push_back(P.first);
2156
1.30k
    }
2157
282
  }
2158
860
2159
860
  DEBUG({
2160
860
    dbgs() << "Block-to-FI map (* -- live-on-exit):\n";
2161
860
    for (auto &P : BlockFIMap) {
2162
860
      auto &FIs = P.second;
2163
860
      if (FIs.empty())
2164
860
        continue;
2165
860
      dbgs() << "  BB#" << P.first->getNumber() << ": {";
2166
860
      for (auto I : FIs) {
2167
860
        dbgs() << " fi#" << I;
2168
860
        if (LoxFIs.count(I))
2169
860
          dbgs() << '*';
2170
860
      }
2171
860
      dbgs() << " }\n";
2172
860
    }
2173
860
  });
2174
860
2175
#ifndef NDEBUG
2176
  bool HasOptLimit = SpillOptMax.getPosition();
2177
#endif
2178
2179
860
  // eliminate loads, when all loads eliminated, eliminate all stores.
2180
2.06k
  for (auto &B : MF) {
2181
2.06k
    auto F = BlockIndexes.find(&B);
2182
2.06k
    assert(F != BlockIndexes.end());
2183
2.06k
    HexagonBlockRanges::InstrIndexMap &IM = F->second;
2184
2.06k
    HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM);
2185
2.06k
    HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM);
2186
2.06k
    DEBUG(dbgs() << "BB#" << B.getNumber() << " dead map\n"
2187
2.06k
                 << HexagonBlockRanges::PrintRangeMap(DM, HRI));
2188
2.06k
2189
262
    for (auto FI : BlockFIMap[&B]) {
2190
262
      if (BadFIs.count(FI))
2191
0
        continue;
2192
262
      
DEBUG262
(dbgs() << "Working on fi#" << FI << '\n');
2193
262
      HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
2194
282
      for (auto &Range : RL) {
2195
282
        DEBUG(dbgs() << "--Examining range:" << RL << '\n');
2196
282
        if (!IndexType::isInstr(Range.start()) ||
2197
247
            !IndexType::isInstr(Range.end()))
2198
198
          continue;
2199
84
        MachineInstr &SI = *IM.getInstr(Range.start());
2200
84
        MachineInstr &EI = *IM.getInstr(Range.end());
2201
84
        assert(SI.mayStore() && "Unexpected start instruction");
2202
84
        assert(EI.mayLoad() && "Unexpected end instruction");
2203
84
        MachineOperand &SrcOp = SI.getOperand(2);
2204
84
2205
84
        HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(),
2206
84
                                                  SrcOp.getSubReg() };
2207
84
        auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI, MF);
2208
84
        // The this-> is needed to unconfuse MSVC.
2209
84
        unsigned FoundR = this->findPhysReg(MF, Range, IM, DM, RC);
2210
84
        DEBUG(dbgs() << "Replacement reg:" << PrintReg(FoundR, &HRI) << '\n');
2211
84
        if (FoundR == 0)
2212
68
          continue;
2213
#ifndef NDEBUG
2214
        if (HasOptLimit) {
2215
          if (SpillOptCount >= SpillOptMax)
2216
            return;
2217
          SpillOptCount++;
2218
        }
2219
#endif
2220
2221
16
        // Generate the copy-in: "FoundR = COPY SrcR" at the store location.
2222
16
        MachineBasicBlock::iterator StartIt = SI.getIterator(), NextIt;
2223
16
        MachineInstr *CopyIn = nullptr;
2224
16
        if (
SrcRR.Reg != FoundR || 16
SrcRR.Sub != 00
) {
2225
16
          const DebugLoc &DL = SI.getDebugLoc();
2226
16
          CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR)
2227
16
                       .add(SrcOp);
2228
16
        }
2229
16
2230
16
        ++StartIt;
2231
16
        // Check if this is a last store and the FI is live-on-exit.
2232
16
        if (
LoxFIs.count(FI) && 16
(&Range == &RL.back())0
) {
2233
0
          // Update store's source register.
2234
0
          if (unsigned SR = SrcOp.getSubReg())
2235
0
            SrcOp.setReg(HRI.getSubReg(FoundR, SR));
2236
0
          else
2237
0
            SrcOp.setReg(FoundR);
2238
0
          SrcOp.setSubReg(0);
2239
0
          // We are keeping this register live.
2240
0
          SrcOp.setIsKill(false);
2241
16
        } else {
2242
16
          B.erase(&SI);
2243
16
          IM.replaceInstr(&SI, CopyIn);
2244
16
        }
2245
16
2246
16
        auto EndIt = std::next(EI.getIterator());
2247
357
        for (auto It = StartIt; 
It != EndIt357
;
It = NextIt341
) {
2248
341
          MachineInstr &MI = *It;
2249
341
          NextIt = std::next(It);
2250
341
          int TFI;
2251
341
          if (
!HII.isLoadFromStackSlot(MI, TFI) || 341
TFI != FI38
)
2252
324
            continue;
2253
17
          unsigned DstR = MI.getOperand(0).getReg();
2254
17
          assert(MI.getOperand(0).getSubReg() == 0);
2255
17
          MachineInstr *CopyOut = nullptr;
2256
17
          if (
DstR != FoundR17
) {
2257
17
            DebugLoc DL = MI.getDebugLoc();
2258
17
            unsigned MemSize = HII.getMemAccessSize(MI);
2259
17
            assert(HII.getAddrMode(MI) == HexagonII::BaseImmOffset);
2260
17
            unsigned CopyOpc = TargetOpcode::COPY;
2261
17
            if (HII.isSignExtendingLoad(MI))
2262
0
              
CopyOpc = (MemSize == 1) ? 0
Hexagon::A2_sxtb0
:
Hexagon::A2_sxth0
;
2263
17
            else 
if (17
HII.isZeroExtendingLoad(MI)17
)
2264
0
              
CopyOpc = (MemSize == 1) ? 0
Hexagon::A2_zxtb0
:
Hexagon::A2_zxth0
;
2265
17
            CopyOut = BuildMI(B, It, DL, HII.get(CopyOpc), DstR)
2266
17
                        .addReg(FoundR, getKillRegState(&MI == &EI));
2267
17
          }
2268
341
          IM.replaceInstr(&MI, CopyOut);
2269
341
          B.erase(It);
2270
341
        }
2271
16
2272
16
        // Update the dead map.
2273
16
        HexagonBlockRanges::RegisterRef FoundRR = { FoundR, 0 };
2274
16
        for (auto RR : HexagonBlockRanges::expandToSubRegs(FoundRR, MRI, HRI))
2275
16
          DM[RR].subtract(Range);
2276
282
      } // for Range in range list
2277
262
    }
2278
2.06k
  }
2279
860
}
2280
2281
void HexagonFrameLowering::expandAlloca(MachineInstr *AI,
2282
3
      const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const {
2283
3
  MachineBasicBlock &MB = *AI->getParent();
2284
3
  DebugLoc DL = AI->getDebugLoc();
2285
3
  unsigned A = AI->getOperand(2).getImm();
2286
3
2287
3
  // Have
2288
3
  //    Rd  = alloca Rs, #A
2289
3
  //
2290
3
  // If Rs and Rd are different registers, use this sequence:
2291
3
  //    Rd  = sub(r29, Rs)
2292
3
  //    r29 = sub(r29, Rs)
2293
3
  //    Rd  = and(Rd, #-A)    ; if necessary
2294
3
  //    r29 = and(r29, #-A)   ; if necessary
2295
3
  //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
2296
3
  // otherwise, do
2297
3
  //    Rd  = sub(r29, Rs)
2298
3
  //    Rd  = and(Rd, #-A)    ; if necessary
2299
3
  //    r29 = Rd
2300
3
  //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
2301
3
2302
3
  MachineOperand &RdOp = AI->getOperand(0);
2303
3
  MachineOperand &RsOp = AI->getOperand(1);
2304
3
  unsigned Rd = RdOp.getReg(), Rs = RsOp.getReg();
2305
3
2306
3
  // Rd = sub(r29, Rs)
2307
3
  BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd)
2308
3
      .addReg(SP)
2309
3
      .addReg(Rs);
2310
3
  if (
Rs != Rd3
) {
2311
1
    // r29 = sub(r29, Rs)
2312
1
    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP)
2313
1
        .addReg(SP)
2314
1
        .addReg(Rs);
2315
1
  }
2316
3
  if (
A > 83
) {
2317
0
    // Rd  = and(Rd, #-A)
2318
0
    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd)
2319
0
        .addReg(Rd)
2320
0
        .addImm(-int64_t(A));
2321
0
    if (Rs != Rd)
2322
0
      BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP)
2323
0
          .addReg(SP)
2324
0
          .addImm(-int64_t(A));
2325
0
  }
2326
3
  if (
Rs == Rd3
) {
2327
2
    // r29 = Rd
2328
2
    BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP)
2329
2
        .addReg(Rd);
2330
2
  }
2331
3
  if (
CF > 03
) {
2332
1
    // Rd = add(Rd, #CF)
2333
1
    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd)
2334
1
        .addReg(Rd)
2335
1
        .addImm(CF);
2336
1
  }
2337
3
}
2338
2339
3.79k
bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {
2340
3.79k
  const MachineFrameInfo &MFI = MF.getFrameInfo();
2341
3.79k
  if (!MFI.hasVarSizedObjects())
2342
3.78k
    return false;
2343
3
  unsigned MaxA = MFI.getMaxAlignment();
2344
3
  if (MaxA <= getStackAlignment())
2345
2
    return false;
2346
1
  return true;
2347
1
}
2348
2349
const MachineInstr *HexagonFrameLowering::getAlignaInstr(
2350
2
      const MachineFunction &MF) const {
2351
2
  for (auto &B : MF)
2352
8
    for (auto &I : B)
2353
29
      
if (29
I.getOpcode() == Hexagon::PS_aligna29
)
2354
1
        return &I;
2355
1
  return nullptr;
2356
1
}
2357
2358
/// Adds all callee-saved registers as implicit uses or defs to the
2359
/// instruction.
2360
void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI,
2361
13
      const CSIVect &CSI, bool IsDef, bool IsKill) const {
2362
13
  // Add the callee-saved registers as implicit uses.
2363
13
  for (auto &R : CSI)
2364
47
    MI->addOperand(MachineOperand::CreateReg(R.getReg(), IsDef, true, IsKill));
2365
13
}
2366
2367
/// Determine whether the callee-saved register saves and restores should
2368
/// be generated via inline code. If this function returns "true", inline
2369
/// code will be generated. If this function returns "false", additional
2370
/// checks are performed, which may still lead to the inline code.
2371
bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
2372
107
      const CSIVect &CSI) const {
2373
107
  if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
2374
3
    return true;
2375
104
  
if (104
!hasFP(MF)104
)
2376
14
    return true;
2377
90
  
if (90
!isOptSize(MF) && 90
!isMinSize(MF)72
)
2378
66
    
if (66
MF.getTarget().getOptLevel() > CodeGenOpt::Default66
)
2379
0
      return true;
2380
90
2381
90
  // Check if CSI only has double registers, and if the registers form
2382
90
  // a contiguous block starting from D8.
2383
90
  BitVector Regs(Hexagon::NUM_TARGET_REGS);
2384
263
  for (unsigned i = 0, n = CSI.size(); 
i < n263
;
++i173
) {
2385
173
    unsigned R = CSI[i].getReg();
2386
173
    if (!Hexagon::DoubleRegsRegClass.contains(R))
2387
0
      return true;
2388
173
    Regs[R] = true;
2389
173
  }
2390
90
  int F = Regs.find_first();
2391
90
  if (F != Hexagon::D8)
2392
0
    return true;
2393
263
  
while (90
F >= 0263
) {
2394
173
    int N = Regs.find_next(F);
2395
173
    if (
N >= 0 && 173
N != F+183
)
2396
0
      return true;
2397
173
    F = N;
2398
173
  }
2399
90
2400
90
  return false;
2401
107
}
2402
2403
bool HexagonFrameLowering::useSpillFunction(const MachineFunction &MF,
2404
54
      const CSIVect &CSI) const {
2405
54
  if (shouldInlineCSR(MF, CSI))
2406
8
    return false;
2407
46
  unsigned NumCSI = CSI.size();
2408
46
  if (NumCSI <= 1)
2409
28
    return false;
2410
18
2411
18
  
unsigned Threshold = isOptSize(MF) ? 18
SpillFuncThresholdOs5
2412
13
                                     : SpillFuncThreshold;
2413
54
  return Threshold < NumCSI;
2414
54
}
2415
2416
bool HexagonFrameLowering::useRestoreFunction(const MachineFunction &MF,
2417
53
      const CSIVect &CSI) const {
2418
53
  if (shouldInlineCSR(MF, CSI))
2419
9
    return false;
2420
44
  // The restore functions do a bit more than just restoring registers.
2421
44
  // The non-returning versions will go back directly to the caller's
2422
44
  // caller, others will clean up the stack frame in preparation for
2423
44
  // a tail call. Using them can still save code size even if only one
2424
44
  // register is getting restores. Make the decision based on -Oz:
2425
44
  // using -Os will use inline restore for a single register.
2426
44
  
if (44
isMinSize(MF)44
)
2427
3
    return true;
2428
41
  unsigned NumCSI = CSI.size();
2429
41
  if (NumCSI <= 1)
2430
24
    return false;
2431
17
2432
17
  
unsigned Threshold = isOptSize(MF) ? 17
SpillFuncThresholdOs-15
2433
12
                                     : SpillFuncThreshold;
2434
53
  return Threshold < NumCSI;
2435
53
}
2436
2437
2.39k
bool HexagonFrameLowering::mayOverflowFrameOffset(MachineFunction &MF) const {
2438
2.39k
  unsigned StackSize = MF.getFrameInfo().estimateStackSize(MF);
2439
2.39k
  auto &HST = MF.getSubtarget<HexagonSubtarget>();
2440
2.39k
  // A fairly simplistic guess as to whether a potential load/store to a
2441
2.39k
  // stack location could require an extra register.
2442
2.39k
  if (
HST.useHVXOps() && 2.39k
StackSize > 2561.20k
)
2443
2
    return true;
2444
2.39k
2445
2.39k
  // Check if the function has store-immediate instructions that access
2446
2.39k
  // the stack. Since the offset field is not extendable, if the stack
2447
2.39k
  // size exceeds the offset limit (6 bits, shifted), the stores will
2448
2.39k
  // require a new base register.
2449
2.39k
  bool HasImmStack = false;
2450
2.39k
  unsigned MinLS = ~0u;   // Log_2 of the memory access size.
2451
2.39k
2452
3.54k
  for (const MachineBasicBlock &B : MF) {
2453
19.4k
    for (const MachineInstr &MI : B) {
2454
19.4k
      unsigned LS = 0;
2455
19.4k
      switch (MI.getOpcode()) {
2456
95
        case Hexagon::S4_storeirit_io:
2457
95
        case Hexagon::S4_storeirif_io:
2458
95
        case Hexagon::S4_storeiri_io:
2459
95
          ++LS;
2460
95
          LLVM_FALLTHROUGH;
2461
98
        case Hexagon::S4_storeirht_io:
2462
98
        case Hexagon::S4_storeirhf_io:
2463
98
        case Hexagon::S4_storeirh_io:
2464
98
          ++LS;
2465
98
          LLVM_FALLTHROUGH;
2466
102
        case Hexagon::S4_storeirbt_io:
2467
102
        case Hexagon::S4_storeirbf_io:
2468
102
        case Hexagon::S4_storeirb_io:
2469
102
          if (MI.getOperand(0).isFI())
2470
29
            HasImmStack = true;
2471
95
          MinLS = std::min(MinLS, LS);
2472
95
          break;
2473
2.39k
      }
2474
2.39k
    }
2475
3.54k
  }
2476
2.39k
2477
2.39k
  
if (2.39k
HasImmStack2.39k
)
2478
15
    return !isUInt<6>(StackSize >> MinLS);
2479
2.38k
2480
2.38k
  return false;
2481
2.38k
}