Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- HexagonFrameLowering.cpp - Define frame lowering -------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//
8
//===----------------------------------------------------------------------===//
9
10
#include "HexagonFrameLowering.h"
11
#include "HexagonBlockRanges.h"
12
#include "HexagonInstrInfo.h"
13
#include "HexagonMachineFunctionInfo.h"
14
#include "HexagonRegisterInfo.h"
15
#include "HexagonSubtarget.h"
16
#include "HexagonTargetMachine.h"
17
#include "MCTargetDesc/HexagonBaseInfo.h"
18
#include "llvm/ADT/BitVector.h"
19
#include "llvm/ADT/DenseMap.h"
20
#include "llvm/ADT/None.h"
21
#include "llvm/ADT/Optional.h"
22
#include "llvm/ADT/PostOrderIterator.h"
23
#include "llvm/ADT/SetVector.h"
24
#include "llvm/ADT/SmallSet.h"
25
#include "llvm/ADT/SmallVector.h"
26
#include "llvm/CodeGen/LivePhysRegs.h"
27
#include "llvm/CodeGen/MachineBasicBlock.h"
28
#include "llvm/CodeGen/MachineDominators.h"
29
#include "llvm/CodeGen/MachineFrameInfo.h"
30
#include "llvm/CodeGen/MachineFunction.h"
31
#include "llvm/CodeGen/MachineFunctionPass.h"
32
#include "llvm/CodeGen/MachineInstr.h"
33
#include "llvm/CodeGen/MachineInstrBuilder.h"
34
#include "llvm/CodeGen/MachineMemOperand.h"
35
#include "llvm/CodeGen/MachineModuleInfo.h"
36
#include "llvm/CodeGen/MachineOperand.h"
37
#include "llvm/CodeGen/MachinePostDominators.h"
38
#include "llvm/CodeGen/MachineRegisterInfo.h"
39
#include "llvm/CodeGen/RegisterScavenging.h"
40
#include "llvm/CodeGen/TargetRegisterInfo.h"
41
#include "llvm/IR/Attributes.h"
42
#include "llvm/IR/DebugLoc.h"
43
#include "llvm/IR/Function.h"
44
#include "llvm/MC/MCDwarf.h"
45
#include "llvm/MC/MCRegisterInfo.h"
46
#include "llvm/Pass.h"
47
#include "llvm/Support/CodeGen.h"
48
#include "llvm/Support/CommandLine.h"
49
#include "llvm/Support/Compiler.h"
50
#include "llvm/Support/Debug.h"
51
#include "llvm/Support/ErrorHandling.h"
52
#include "llvm/Support/MathExtras.h"
53
#include "llvm/Support/raw_ostream.h"
54
#include "llvm/Target/TargetMachine.h"
55
#include "llvm/Target/TargetOptions.h"
56
#include <algorithm>
57
#include <cassert>
58
#include <cstdint>
59
#include <iterator>
60
#include <limits>
61
#include <map>
62
#include <utility>
63
#include <vector>
64
65
#define DEBUG_TYPE "hexagon-pei"
66
67
// Hexagon stack frame layout as defined by the ABI:
68
//
69
//                                                       Incoming arguments
70
//                                                       passed via stack
71
//                                                                      |
72
//                                                                      |
73
//        SP during function's                 FP during function's     |
74
//    +-- runtime (top of stack)               runtime (bottom) --+     |
75
//    |                                                           |     |
76
// --++---------------------+------------------+-----------------++-+-------
77
//   |  parameter area for  |  variable-size   |   fixed-size    |LR|  arg
78
//   |   called functions   |  local objects   |  local objects  |FP|
79
// --+----------------------+------------------+-----------------+--+-------
80
//    <-    size known    -> <- size unknown -> <- size known  ->
81
//
82
// Low address                                                 High address
83
//
84
// <--- stack growth
85
//
86
//
87
// - In any circumstances, the outgoing function arguments are always accessi-
88
//   ble using the SP, and the incoming arguments are accessible using the FP.
89
// - If the local objects are not aligned, they can always be accessed using
90
//   the FP.
91
// - If there are no variable-sized objects, the local objects can always be
92
//   accessed using the SP, regardless whether they are aligned or not. (The
93
//   alignment padding will be at the bottom of the stack (highest address),
94
//   and so the offset with respect to the SP will be known at the compile-
95
//   -time.)
96
//
97
// The only complication occurs if there are both, local aligned objects, and
98
// dynamically allocated (variable-sized) objects. The alignment pad will be
99
// placed between the FP and the local objects, thus preventing the use of the
100
// FP to access the local objects. At the same time, the variable-sized objects
101
// will be between the SP and the local objects, thus introducing an unknown
102
// distance from the SP to the locals.
103
//
104
// To avoid this problem, a new register is created that holds the aligned
105
// address of the bottom of the stack, referred in the sources as AP (aligned
106
// pointer). The AP will be equal to "FP-p", where "p" is the smallest pad
107
// that aligns AP to the required boundary (a maximum of the alignments of
108
// all stack objects, fixed- and variable-sized). All local objects[1] will
109
// then use AP as the base pointer.
110
// [1] The exception is with "fixed" stack objects. "Fixed" stack objects get
111
// their name from being allocated at fixed locations on the stack, relative
112
// to the FP. In the presence of dynamic allocation and local alignment, such
113
// objects can only be accessed through the FP.
114
//
115
// Illustration of the AP:
116
//                                                                FP --+
117
//                                                                     |
118
// ---------------+---------------------+-----+-----------------------++-+--
119
//   Rest of the  | Local stack objects | Pad |  Fixed stack objects  |LR|
120
//   stack frame  | (aligned)           |     |  (CSR, spills, etc.)  |FP|
121
// ---------------+---------------------+-----+-----------------+-----+--+--
122
//                                      |<-- Multiple of the -->|
123
//                                           stack alignment    +-- AP
124
//
125
// The AP is set up at the beginning of the function. Since it is not a dedi-
126
// cated (reserved) register, it needs to be kept live throughout the function
127
// to be available as the base register for local object accesses.
128
// Normally, an address of a stack objects is obtained by a pseudo-instruction
129
// PS_fi. To access local objects with the AP register present, a different
130
// pseudo-instruction needs to be used: PS_fia. The PS_fia takes one extra
131
// argument compared to PS_fi: the first input register is the AP register.
132
// This keeps the register live between its definition and its uses.
133
134
// The AP register is originally set up using pseudo-instruction PS_aligna:
135
//   AP = PS_aligna A
136
// where
137
//   A  - required stack alignment
138
// The alignment value must be the maximum of all alignments required by
139
// any stack object.
140
141
// The dynamic allocation uses a pseudo-instruction PS_alloca:
142
//   Rd = PS_alloca Rs, A
143
// where
144
//   Rd - address of the allocated space
145
//   Rs - minimum size (the actual allocated can be larger to accommodate
146
//        alignment)
147
//   A  - required alignment
148
149
using namespace llvm;
150
151
static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret",
152
    cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target"));
153
154
static cl::opt<unsigned> NumberScavengerSlots("number-scavenger-slots",
155
    cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2),
156
    cl::ZeroOrMore);
157
158
static cl::opt<int> SpillFuncThreshold("spill-func-threshold",
159
    cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"),
160
    cl::init(6), cl::ZeroOrMore);
161
162
static cl::opt<int> SpillFuncThresholdOs("spill-func-threshold-Os",
163
    cl::Hidden, cl::desc("Specify Os spill func threshold"),
164
    cl::init(1), cl::ZeroOrMore);
165
166
static cl::opt<bool> EnableStackOVFSanitizer("enable-stackovf-sanitizer",
167
    cl::Hidden, cl::desc("Enable runtime checks for stack overflow."),
168
    cl::init(false), cl::ZeroOrMore);
169
170
static cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame",
171
    cl::init(true), cl::Hidden, cl::ZeroOrMore,
172
    cl::desc("Enable stack frame shrink wrapping"));
173
174
static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit",
175
    cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden, cl::ZeroOrMore,
176
    cl::desc("Max count of stack frame shrink-wraps"));
177
178
static cl::opt<bool> EnableSaveRestoreLong("enable-save-restore-long",
179
    cl::Hidden, cl::desc("Enable long calls for save-restore stubs."),
180
    cl::init(false), cl::ZeroOrMore);
181
182
static cl::opt<bool> EliminateFramePointer("hexagon-fp-elim", cl::init(true),
183
    cl::Hidden, cl::desc("Refrain from using FP whenever possible"));
184
185
static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden,
186
    cl::init(true), cl::desc("Optimize spill slots"));
187
188
#ifndef NDEBUG
189
static cl::opt<unsigned> SpillOptMax("spill-opt-max", cl::Hidden,
190
    cl::init(std::numeric_limits<unsigned>::max()));
191
static unsigned SpillOptCount = 0;
192
#endif
193
194
namespace llvm {
195
196
  void initializeHexagonCallFrameInformationPass(PassRegistry&);
197
  FunctionPass *createHexagonCallFrameInformation();
198
199
} // end namespace llvm
200
201
namespace {
202
203
  class HexagonCallFrameInformation : public MachineFunctionPass {
204
  public:
205
    static char ID;
206
207
919
    HexagonCallFrameInformation() : MachineFunctionPass(ID) {
208
919
      PassRegistry &PR = *PassRegistry::getPassRegistry();
209
919
      initializeHexagonCallFrameInformationPass(PR);
210
919
    }
211
212
    bool runOnMachineFunction(MachineFunction &MF) override;
213
214
918
    MachineFunctionProperties getRequiredProperties() const override {
215
918
      return MachineFunctionProperties().set(
216
918
          MachineFunctionProperties::Property::NoVRegs);
217
918
    }
218
  };
219
220
  char HexagonCallFrameInformation::ID = 0;
221
222
} // end anonymous namespace
223
224
4.97k
bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) {
225
4.97k
  auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();
226
4.97k
  bool NeedCFI = MF.getMMI().hasDebugInfo() ||
227
4.97k
                 
MF.getFunction().needsUnwindTableEntry()4.96k
;
228
4.97k
229
4.97k
  if (!NeedCFI)
230
3.04k
    return false;
231
1.93k
  HFI.insertCFIInstructions(MF);
232
1.93k
  return true;
233
1.93k
}
234
235
INITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi",
236
                "Hexagon call frame information", false, false)
237
238
919
FunctionPass *llvm::createHexagonCallFrameInformation() {
239
919
  return new HexagonCallFrameInformation();
240
919
}
241
242
/// Map a register pair Reg to the subregister that has the greater "number",
243
/// i.e. D3 (aka R7:6) will be mapped to R7, etc.
244
static unsigned getMax32BitSubRegister(unsigned Reg,
245
                                       const TargetRegisterInfo &TRI,
246
81
                                       bool hireg = true) {
247
81
    if (Reg < Hexagon::D0 || Reg > Hexagon::D15)
248
0
      return Reg;
249
81
250
81
    unsigned RegNo = 0;
251
243
    for (MCSubRegIterator SubRegs(Reg, &TRI); SubRegs.isValid(); 
++SubRegs162
) {
252
162
      if (hireg) {
253
162
        if (*SubRegs > RegNo)
254
162
          RegNo = *SubRegs;
255
162
      } else {
256
0
        if (!RegNo || *SubRegs < RegNo)
257
0
          RegNo = *SubRegs;
258
0
      }
259
162
    }
260
81
    return RegNo;
261
81
}
262
263
/// Returns the callee saved register with the largest id in the vector.
264
static unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI,
265
25
                                     const TargetRegisterInfo &TRI) {
266
25
    static_assert(Hexagon::R1 > 0,
267
25
                  "Assume physical registers are encoded as positive integers");
268
25
    if (CSI.empty())
269
0
      return 0;
270
25
271
25
    unsigned Max = getMax32BitSubRegister(CSI[0].getReg(), TRI);
272
81
    for (unsigned I = 1, E = CSI.size(); I < E; 
++I56
) {
273
56
      unsigned Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI);
274
56
      if (Reg > Max)
275
56
        Max = Reg;
276
56
    }
277
25
    return Max;
278
25
}
279
280
/// Checks if the basic block contains any instruction that needs a stack
281
/// frame to be already in place.
282
static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
283
5.11k
                            const HexagonRegisterInfo &HRI) {
284
16.1k
    for (auto &I : MBB) {
285
16.1k
      const MachineInstr *MI = &I;
286
16.1k
      if (MI->isCall())
287
160
        return true;
288
15.9k
      unsigned Opc = MI->getOpcode();
289
15.9k
      switch (Opc) {
290
15.9k
        case Hexagon::PS_alloca:
291
4
        case Hexagon::PS_aligna:
292
4
          return true;
293
15.9k
        default:
294
15.9k
          break;
295
15.9k
      }
296
15.9k
      // Check individual operands.
297
45.6k
      
for (const MachineOperand &MO : MI->operands())15.9k
{
298
45.6k
        // While the presence of a frame index does not prove that a stack
299
45.6k
        // frame will be required, all frame indexes should be within alloc-
300
45.6k
        // frame/deallocframe. Otherwise, the code that translates a frame
301
45.6k
        // index into an offset would have to be aware of the placement of
302
45.6k
        // the frame creation/destruction instructions.
303
45.6k
        if (MO.isFI())
304
262
          return true;
305
45.4k
        if (MO.isReg()) {
306
39.4k
          unsigned R = MO.getReg();
307
39.4k
          // Virtual registers will need scavenging, which then may require
308
39.4k
          // a stack slot.
309
39.4k
          if (TargetRegisterInfo::isVirtualRegister(R))
310
6
            return true;
311
90.2k
          
for (MCSubRegIterator S(R, &HRI, true); 39.4k
S.isValid();
++S50.7k
)
312
50.9k
            if (CSR[*S])
313
180
              return true;
314
39.4k
          
continue39.3k
;
315
5.91k
        }
316
5.91k
        if (MO.isRegMask()) {
317
0
          // A regmask would normally have all callee-saved registers marked
318
0
          // as preserved, so this check would not be needed, but in case of
319
0
          // ever having other regmasks (for other calling conventions),
320
0
          // make sure they would be processed correctly.
321
0
          const uint32_t *BM = MO.getRegMask();
322
0
          for (int x = CSR.find_first(); x >= 0; x = CSR.find_next(x)) {
323
0
            unsigned R = x;
324
0
            // If this regmask does not preserve a CSR, a frame will be needed.
325
0
            if (!(BM[R/32] & (1u << (R%32))))
326
0
              return true;
327
0
          }
328
0
        }
329
5.91k
      }
330
15.9k
    }
331
5.11k
    
return false4.50k
;
332
5.11k
}
333
334
  /// Returns true if MBB has a machine instructions that indicates a tail call
335
  /// in the block.
336
15
static bool hasTailCall(const MachineBasicBlock &MBB) {
337
15
    MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
338
15
    if (I == MBB.end())
339
0
      return false;
340
15
    unsigned RetOpc = I->getOpcode();
341
15
    return RetOpc == Hexagon::PS_tailcall_i || 
RetOpc == Hexagon::PS_tailcall_r14
;
342
15
}
343
344
/// Returns true if MBB contains an instruction that returns.
345
14
static bool hasReturn(const MachineBasicBlock &MBB) {
346
14
    for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E; 
++I0
)
347
14
      if (I->isReturn())
348
14
        return true;
349
14
    
return false0
;
350
14
}
351
352
/// Returns the "return" instruction from this block, or nullptr if there
353
/// isn't any.
354
8.30k
static MachineInstr *getReturn(MachineBasicBlock &MBB) {
355
8.30k
    for (auto &I : MBB)
356
37.4k
      if (I.isReturn())
357
6.44k
        return &I;
358
8.30k
    
return nullptr1.86k
;
359
8.30k
}
360
361
4.92k
static bool isRestoreCall(unsigned Opc) {
362
4.92k
    switch (Opc) {
363
4.92k
      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
364
14
      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC:
365
14
      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT:
366
14
      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC:
367
14
      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT:
368
14
      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC:
369
14
      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4:
370
14
      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC:
371
14
        return true;
372
4.91k
    }
373
4.91k
    return false;
374
4.91k
}
375
376
4.98k
static inline bool isOptNone(const MachineFunction &MF) {
377
4.98k
    return MF.getFunction().hasOptNone() ||
378
4.98k
           
MF.getTarget().getOptLevel() == CodeGenOpt::None4.97k
;
379
4.98k
}
380
381
310
static inline bool isOptSize(const MachineFunction &MF) {
382
310
    const Function &F = MF.getFunction();
383
310
    return F.hasOptSize() && 
!F.hasMinSize()44
;
384
310
}
385
386
314
static inline bool isMinSize(const MachineFunction &MF) {
387
314
    return MF.getFunction().hasMinSize();
388
314
}
389
390
/// Implements shrink-wrapping of the stack frame. By default, stack frame
391
/// is created in the function entry block, and is cleaned up in every block
392
/// that returns. This function finds alternate blocks: one for the frame
393
/// setup (prolog) and one for the cleanup (epilog).
394
void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
395
4.96k
      MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const {
396
4.96k
  static unsigned ShrinkCounter = 0;
397
4.96k
398
4.96k
  if (ShrinkLimit.getPosition()) {
399
0
    if (ShrinkCounter >= ShrinkLimit)
400
0
      return;
401
0
    ShrinkCounter++;
402
0
  }
403
4.96k
404
4.96k
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
405
4.96k
406
4.96k
  MachineDominatorTree MDT;
407
4.96k
  MDT.runOnMachineFunction(MF);
408
4.96k
  MachinePostDominatorTree MPT;
409
4.96k
  MPT.runOnMachineFunction(MF);
410
4.96k
411
4.96k
  using UnsignedMap = DenseMap<unsigned, unsigned>;
412
4.96k
  using RPOTType = ReversePostOrderTraversal<const MachineFunction *>;
413
4.96k
414
4.96k
  UnsignedMap RPO;
415
4.96k
  RPOTType RPOT(&MF);
416
4.96k
  unsigned RPON = 0;
417
12.1k
  for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; 
++I7.20k
)
418
7.20k
    RPO[(*I)->getNumber()] = RPON++;
419
4.96k
420
4.96k
  // Don't process functions that have loops, at least for now. Placement
421
4.96k
  // of prolog and epilog must take loop structure into account. For simpli-
422
4.96k
  // city don't do it right now.
423
6.40k
  for (auto &I : MF) {
424
6.40k
    unsigned BN = RPO[I.getNumber()];
425
8.45k
    for (auto SI = I.succ_begin(), SE = I.succ_end(); SI != SE; 
++SI2.05k
) {
426
2.37k
      // If found a back-edge, return.
427
2.37k
      if (RPO[(*SI)->getNumber()] <= BN)
428
326
        return;
429
2.37k
    }
430
6.40k
  }
431
4.96k
432
4.96k
  // Collect the set of blocks that need a stack frame to execute. Scan
433
4.96k
  // each block for uses/defs of callee-saved registers, calls, etc.
434
4.96k
  SmallVector<MachineBasicBlock*,16> SFBlocks;
435
4.64k
  BitVector CSR(Hexagon::NUM_TARGET_REGS);
436
60.3k
  for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; 
++P55.7k
)
437
111k
    
for (MCSubRegIterator S(*P, &HRI, true); 55.7k
S.isValid();
++S55.7k
)
438
55.7k
      CSR[*S] = true;
439
4.64k
440
4.64k
  for (auto &I : MF)
441
5.11k
    if (needsStackFrame(I, CSR, HRI))
442
612
      SFBlocks.push_back(&I);
443
4.64k
444
4.64k
  LLVM_DEBUG({
445
4.64k
    dbgs() << "Blocks needing SF: {";
446
4.64k
    for (auto &B : SFBlocks)
447
4.64k
      dbgs() << " " << printMBBReference(*B);
448
4.64k
    dbgs() << " }\n";
449
4.64k
  });
450
4.64k
  // No frame needed?
451
4.64k
  if (SFBlocks.empty())
452
4.23k
    return;
453
406
454
406
  // Pick a common dominator and a common post-dominator.
455
406
  MachineBasicBlock *DomB = SFBlocks[0];
456
612
  for (unsigned i = 1, n = SFBlocks.size(); i < n; 
++i206
) {
457
206
    DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]);
458
206
    if (!DomB)
459
0
      break;
460
206
  }
461
406
  MachineBasicBlock *PDomB = SFBlocks[0];
462
518
  for (unsigned i = 1, n = SFBlocks.size(); i < n; 
++i112
) {
463
136
    PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]);
464
136
    if (!PDomB)
465
24
      break;
466
136
  }
467
406
  LLVM_DEBUG({
468
406
    dbgs() << "Computed dom block: ";
469
406
    if (DomB)
470
406
      dbgs() << printMBBReference(*DomB);
471
406
    else
472
406
      dbgs() << "<null>";
473
406
    dbgs() << ", computed pdom block: ";
474
406
    if (PDomB)
475
406
      dbgs() << printMBBReference(*PDomB);
476
406
    else
477
406
      dbgs() << "<null>";
478
406
    dbgs() << "\n";
479
406
  });
480
406
  if (!DomB || !PDomB)
481
24
    return;
482
382
483
382
  // Make sure that DomB dominates PDomB and PDomB post-dominates DomB.
484
382
  if (!MDT.dominates(DomB, PDomB)) {
485
1
    LLVM_DEBUG(dbgs() << "Dom block does not dominate pdom block\n");
486
1
    return;
487
1
  }
488
381
  if (!MPT.dominates(PDomB, DomB)) {
489
0
    LLVM_DEBUG(dbgs() << "PDom block does not post-dominate dom block\n");
490
0
    return;
491
0
  }
492
381
493
381
  // Finally, everything seems right.
494
381
  PrologB = DomB;
495
381
  EpilogB = PDomB;
496
381
}
497
498
/// Perform most of the PEI work here:
499
/// - saving/restoring of the callee-saved registers,
500
/// - stack frame creation and destruction.
501
/// Normally, this work is distributed among various functions, but doing it
502
/// in one place allows shrink-wrapping of the stack frame.
503
void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
504
4.96k
                                        MachineBasicBlock &MBB) const {
505
4.96k
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
506
4.96k
507
4.96k
  MachineFrameInfo &MFI = MF.getFrameInfo();
508
4.96k
  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
509
4.96k
510
4.96k
  MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr;
511
4.96k
  if (EnableShrinkWrapping)
512
4.96k
    findShrunkPrologEpilog(MF, PrologB, EpilogB);
513
4.96k
514
4.96k
  bool PrologueStubs = false;
515
4.96k
  insertCSRSpillsInBlock(*PrologB, CSI, HRI, PrologueStubs);
516
4.96k
  insertPrologueInBlock(*PrologB, PrologueStubs);
517
4.96k
  updateEntryPaths(MF, *PrologB);
518
4.96k
519
4.96k
  if (EpilogB) {
520
381
    insertCSRRestoresInBlock(*EpilogB, CSI, HRI);
521
381
    insertEpilogueInBlock(*EpilogB);
522
4.58k
  } else {
523
4.58k
    for (auto &B : MF)
524
6.61k
      if (B.isReturnBlock())
525
4.57k
        insertCSRRestoresInBlock(B, CSI, HRI);
526
4.58k
527
4.58k
    for (auto &B : MF)
528
6.61k
      if (B.isReturnBlock())
529
4.57k
        insertEpilogueInBlock(B);
530
4.58k
531
6.61k
    for (auto &B : MF) {
532
6.61k
      if (B.empty())
533
207
        continue;
534
6.40k
      MachineInstr *RetI = getReturn(B);
535
6.40k
      if (!RetI || 
isRestoreCall(RetI->getOpcode())4.57k
)
536
1.83k
        continue;
537
4.56k
      for (auto &R : CSI)
538
131
        RetI->addOperand(MachineOperand::CreateReg(R.getReg(), false, true));
539
4.56k
    }
540
4.58k
  }
541
4.96k
542
4.96k
  if (EpilogB) {
543
381
    // If there is an epilog block, it may not have a return instruction.
544
381
    // In such case, we need to add the callee-saved registers as live-ins
545
381
    // in all blocks on all paths from the epilog to any return block.
546
381
    unsigned MaxBN = MF.getNumBlockIDs();
547
381
    BitVector DoneT(MaxBN+1), DoneF(MaxBN+1), Path(MaxBN+1);
548
381
    updateExitPaths(*EpilogB, *EpilogB, DoneT, DoneF, Path);
549
381
  }
550
4.96k
}
551
552
/// Returns true if the target can safely skip saving callee-saved registers
553
/// for noreturn nounwind functions.
554
bool HexagonFrameLowering::enableCalleeSaveSkip(
555
14
    const MachineFunction &MF) const {
556
14
  const auto &F = MF.getFunction();
557
14
  assert(F.hasFnAttribute(Attribute::NoReturn) &&
558
14
         F.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
559
14
         !F.getFunction().hasFnAttribute(Attribute::UWTable));
560
14
  (void)F;
561
14
562
14
  // No need to save callee saved registers if the function does not return.
563
14
  return MF.getSubtarget<HexagonSubtarget>().noreturnStackElim();
564
14
}
565
566
// Helper function used to determine when to eliminate the stack frame for
567
// functions marked as noreturn and when the noreturn-stack-elim options are
568
// specified. When both these conditions are true, then a FP may not be needed
569
// if the function makes a call. It is very similar to enableCalleeSaveSkip,
570
// but it used to check if the allocframe can be eliminated as well.
571
1.93k
static bool enableAllocFrameElim(const MachineFunction &MF) {
572
1.93k
  const auto &F = MF.getFunction();
573
1.93k
  const auto &MFI = MF.getFrameInfo();
574
1.93k
  const auto &HST = MF.getSubtarget<HexagonSubtarget>();
575
1.93k
  assert(!MFI.hasVarSizedObjects() &&
576
1.93k
         !HST.getRegisterInfo()->needsStackRealignment(MF));
577
1.93k
  return F.hasFnAttribute(Attribute::NoReturn) &&
578
1.93k
    
F.hasFnAttribute(Attribute::NoUnwind)61
&&
579
1.93k
    
!F.hasFnAttribute(Attribute::UWTable)43
&&
HST.noreturnStackElim()43
&&
580
1.93k
    
MFI.getStackSize() == 010
;
581
1.93k
}
582
583
void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
584
4.96k
      bool PrologueStubs) const {
585
4.96k
  MachineFunction &MF = *MBB.getParent();
586
4.96k
  MachineFrameInfo &MFI = MF.getFrameInfo();
587
4.96k
  auto &HST = MF.getSubtarget<HexagonSubtarget>();
588
4.96k
  auto &HII = *HST.getInstrInfo();
589
4.96k
  auto &HRI = *HST.getRegisterInfo();
590
4.96k
591
4.96k
  unsigned MaxAlign = std::max(MFI.getMaxAlignment(), getStackAlignment());
592
4.96k
593
4.96k
  // Calculate the total stack frame size.
594
4.96k
  // Get the number of bytes to allocate from the FrameInfo.
595
4.96k
  unsigned FrameSize = MFI.getStackSize();
596
4.96k
  // Round up the max call frame size to the max alignment on the stack.
597
4.96k
  unsigned MaxCFA = alignTo(MFI.getMaxCallFrameSize(), MaxAlign);
598
4.96k
  MFI.setMaxCallFrameSize(MaxCFA);
599
4.96k
600
4.96k
  FrameSize = MaxCFA + alignTo(FrameSize, MaxAlign);
601
4.96k
  MFI.setStackSize(FrameSize);
602
4.96k
603
4.96k
  bool AlignStack = (MaxAlign > getStackAlignment());
604
4.96k
605
4.96k
  // Get the number of bytes to allocate from the FrameInfo.
606
4.96k
  unsigned NumBytes = MFI.getStackSize();
607
4.96k
  unsigned SP = HRI.getStackRegister();
608
4.96k
  unsigned MaxCF = MFI.getMaxCallFrameSize();
609
4.96k
  MachineBasicBlock::iterator InsertPt = MBB.begin();
610
4.96k
611
4.96k
  SmallVector<MachineInstr *, 4> AdjustRegs;
612
4.96k
  for (auto &MBB : MF)
613
7.20k
    for (auto &MI : MBB)
614
32.7k
      if (MI.getOpcode() == Hexagon::PS_alloca)
615
6
        AdjustRegs.push_back(&MI);
616
4.96k
617
4.96k
  for (auto MI : AdjustRegs) {
618
6
    assert((MI->getOpcode() == Hexagon::PS_alloca) && "Expected alloca");
619
6
    expandAlloca(MI, HII, SP, MaxCF);
620
6
    MI->eraseFromParent();
621
6
  }
622
4.96k
623
4.96k
  DebugLoc dl = MBB.findDebugLoc(InsertPt);
624
4.96k
625
4.96k
  if (hasFP(MF)) {
626
1.90k
    insertAllocframe(MBB, InsertPt, NumBytes);
627
1.90k
    if (AlignStack) {
628
68
      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
629
68
          .addReg(SP)
630
68
          .addImm(-int64_t(MaxAlign));
631
68
    }
632
1.90k
    // If the stack-checking is enabled, and we spilled the callee-saved
633
1.90k
    // registers inline (i.e. did not use a spill function), then call
634
1.90k
    // the stack checker directly.
635
1.90k
    if (EnableStackOVFSanitizer && 
!PrologueStubs2
)
636
1
      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk))
637
1
             .addExternalSymbol("__runtime_stack_check");
638
3.06k
  } else if (NumBytes > 0) {
639
115
    assert(alignTo(NumBytes, 8) == NumBytes);
640
115
    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
641
115
      .addReg(SP)
642
115
      .addImm(-int(NumBytes));
643
115
  }
644
4.96k
}
645
646
4.95k
void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
647
4.95k
  MachineFunction &MF = *MBB.getParent();
648
4.95k
  auto &HST = MF.getSubtarget<HexagonSubtarget>();
649
4.95k
  auto &HII = *HST.getInstrInfo();
650
4.95k
  auto &HRI = *HST.getRegisterInfo();
651
4.95k
  unsigned SP = HRI.getStackRegister();
652
4.95k
653
4.95k
  MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
654
4.95k
  DebugLoc dl = MBB.findDebugLoc(InsertPt);
655
4.95k
656
4.95k
  if (!hasFP(MF)) {
657
3.05k
    MachineFrameInfo &MFI = MF.getFrameInfo();
658
3.05k
    if (unsigned NumBytes = MFI.getStackSize()) {
659
114
      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
660
114
        .addReg(SP)
661
114
        .addImm(NumBytes);
662
114
    }
663
3.05k
    return;
664
3.05k
  }
665
1.89k
666
1.89k
  MachineInstr *RetI = getReturn(MBB);
667
1.89k
  unsigned RetOpc = RetI ? 
RetI->getOpcode()1.87k
:
027
;
668
1.89k
669
1.89k
  // Handle EH_RETURN.
670
1.89k
  if (RetOpc == Hexagon::EH_RETURN_JMPR) {
671
1
    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
672
1
        .addDef(Hexagon::D15)
673
1
        .addReg(Hexagon::R30);
674
1
    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_add), SP)
675
1
        .addReg(SP)
676
1
        .addReg(Hexagon::R28);
677
1
    return;
678
1
  }
679
1.89k
680
1.89k
  // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc-
681
1.89k
  // frame instruction if we encounter it.
682
1.89k
  if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4 ||
683
1.89k
      
RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC1.89k
||
684
1.89k
      
RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT1.88k
||
685
1.89k
      
RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC1.88k
) {
686
14
    MachineBasicBlock::iterator It = RetI;
687
14
    ++It;
688
14
    // Delete all instructions after the RESTORE (except labels).
689
28
    while (It != MBB.end()) {
690
14
      if (!It->isLabel())
691
14
        It = MBB.erase(It);
692
0
      else
693
0
        ++It;
694
14
    }
695
14
    return;
696
14
  }
697
1.88k
698
1.88k
  // It is possible that the restoring code is a call to a library function.
699
1.88k
  // All of the restore* functions include "deallocframe", so we need to make
700
1.88k
  // sure that we don't add an extra one.
701
1.88k
  bool NeedsDeallocframe = true;
702
1.88k
  if (!MBB.empty() && InsertPt != MBB.begin()) {
703
1.86k
    MachineBasicBlock::iterator PrevIt = std::prev(InsertPt);
704
1.86k
    unsigned COpc = PrevIt->getOpcode();
705
1.86k
    if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 ||
706
1.86k
        
COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC1.86k
||
707
1.86k
        
COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT1.86k
||
708
1.86k
        
COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC1.86k
||
709
1.86k
        
COpc == Hexagon::PS_call_nr1.86k
||
COpc == Hexagon::PS_callr_nr1.85k
)
710
18
      NeedsDeallocframe = false;
711
1.86k
  }
712
1.88k
713
1.88k
  if (!NeedsDeallocframe)
714
18
    return;
715
1.86k
  // If the returning instruction is PS_jmpret, replace it with dealloc_return,
716
1.86k
  // otherwise just add deallocframe. The function could be returning via a
717
1.86k
  // tail call.
718
1.86k
  if (RetOpc != Hexagon::PS_jmpret || 
DisableDeallocRet1.84k
) {
719
19
    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
720
19
      .addDef(Hexagon::D15)
721
19
      .addReg(Hexagon::R30);
722
19
    return;
723
19
  }
724
1.84k
  unsigned NewOpc = Hexagon::L4_return;
725
1.84k
  MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc))
726
1.84k
      .addDef(Hexagon::D15)
727
1.84k
      .addReg(Hexagon::R30);
728
1.84k
  // Transfer the function live-out registers.
729
1.84k
  NewI->copyImplicitOps(MF, *RetI);
730
1.84k
  MBB.erase(RetI);
731
1.84k
}
732
733
void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB,
734
1.90k
      MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const {
735
1.90k
  MachineFunction &MF = *MBB.getParent();
736
1.90k
  auto &HST = MF.getSubtarget<HexagonSubtarget>();
737
1.90k
  auto &HII = *HST.getInstrInfo();
738
1.90k
  auto &HRI = *HST.getRegisterInfo();
739
1.90k
740
1.90k
  // Check for overflow.
741
1.90k
  // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
742
1.90k
  const unsigned int ALLOCFRAME_MAX = 16384;
743
1.90k
744
1.90k
  // Create a dummy memory operand to avoid allocframe from being treated as
745
1.90k
  // a volatile memory reference.
746
1.90k
  auto *MMO = MF.getMachineMemOperand(MachinePointerInfo::getStack(MF, 0),
747
1.90k
                                      MachineMemOperand::MOStore, 4, 4);
748
1.90k
749
1.90k
  DebugLoc dl = MBB.findDebugLoc(InsertPt);
750
1.90k
  unsigned SP = HRI.getStackRegister();
751
1.90k
752
1.90k
  if (NumBytes >= ALLOCFRAME_MAX) {
753
0
    // Emit allocframe(#0).
754
0
    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
755
0
      .addDef(SP)
756
0
      .addReg(SP)
757
0
      .addImm(0)
758
0
      .addMemOperand(MMO);
759
0
760
0
    // Subtract the size from the stack pointer.
761
0
    unsigned SP = HRI.getStackRegister();
762
0
    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
763
0
      .addReg(SP)
764
0
      .addImm(-int(NumBytes));
765
1.90k
  } else {
766
1.90k
    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
767
1.90k
      .addDef(SP)
768
1.90k
      .addReg(SP)
769
1.90k
      .addImm(NumBytes)
770
1.90k
      .addMemOperand(MMO);
771
1.90k
  }
772
1.90k
}
773
774
void HexagonFrameLowering::updateEntryPaths(MachineFunction &MF,
775
4.96k
      MachineBasicBlock &SaveB) const {
776
4.96k
  SetVector<unsigned> Worklist;
777
4.96k
778
4.96k
  MachineBasicBlock &EntryB = MF.front();
779
4.96k
  Worklist.insert(EntryB.getNumber());
780
4.96k
781
4.96k
  unsigned SaveN = SaveB.getNumber();
782
4.96k
  auto &CSI = MF.getFrameInfo().getCalleeSavedInfo();
783
4.96k
784
9.99k
  for (unsigned i = 0; i < Worklist.size(); 
++i5.02k
) {
785
5.02k
    unsigned BN = Worklist[i];
786
5.02k
    MachineBasicBlock &MBB = *MF.getBlockNumbered(BN);
787
5.02k
    for (auto &R : CSI)
788
278
      if (!MBB.isLiveIn(R.getReg()))
789
7
        MBB.addLiveIn(R.getReg());
790
5.02k
    if (BN != SaveN)
791
52
      for (auto &SB : MBB.successors())
792
60
        Worklist.insert(SB->getNumber());
793
5.02k
  }
794
4.96k
}
795
796
bool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB,
797
      MachineBasicBlock &RestoreB, BitVector &DoneT, BitVector &DoneF,
798
401
      BitVector &Path) const {
799
401
  assert(MBB.getNumber() >= 0);
800
401
  unsigned BN = MBB.getNumber();
801
401
  if (Path[BN] || DoneF[BN])
802
0
    return false;
803
401
  if (DoneT[BN])
804
7
    return true;
805
394
806
394
  auto &CSI = MBB.getParent()->getFrameInfo().getCalleeSavedInfo();
807
394
808
394
  Path[BN] = true;
809
394
  bool ReachedExit = false;
810
394
  for (auto &SB : MBB.successors())
811
20
    ReachedExit |= updateExitPaths(*SB, RestoreB, DoneT, DoneF, Path);
812
394
813
394
  if (!MBB.empty() && 
MBB.back().isReturn()393
) {
814
356
    // Add implicit uses of all callee-saved registers to the reached
815
356
    // return instructions. This is to prevent the anti-dependency breaker
816
356
    // from renaming these registers.
817
356
    MachineInstr &RetI = MBB.back();
818
356
    if (!isRestoreCall(RetI.getOpcode()))
819
344
      for (auto &R : CSI)
820
94
        RetI.addOperand(MachineOperand::CreateReg(R.getReg(), false, true));
821
356
    ReachedExit = true;
822
356
  }
823
394
824
394
  // We don't want to add unnecessary live-ins to the restore block: since
825
394
  // the callee-saved registers are being defined in it, the entry of the
826
394
  // restore block cannot be on the path from the definitions to any exit.
827
394
  if (ReachedExit && 
&MBB != &RestoreB368
) {
828
12
    for (auto &R : CSI)
829
1
      if (!MBB.isLiveIn(R.getReg()))
830
1
        MBB.addLiveIn(R.getReg());
831
12
    DoneT[BN] = true;
832
12
  }
833
394
  if (!ReachedExit)
834
26
    DoneF[BN] = true;
835
394
836
394
  Path[BN] = false;
837
394
  return ReachedExit;
838
394
}
839
840
static Optional<MachineBasicBlock::iterator>
841
2.31k
findCFILocation(MachineBasicBlock &B) {
842
2.31k
    // The CFI instructions need to be inserted right after allocframe.
843
2.31k
    // An exception to this is a situation where allocframe is bundled
844
2.31k
    // with a call: then the CFI instructions need to be inserted before
845
2.31k
    // the packet with the allocframe+call (in case the call throws an
846
2.31k
    // exception).
847
2.31k
    auto End = B.instr_end();
848
2.31k
849
3.19k
    for (MachineInstr &I : B) {
850
3.19k
      MachineBasicBlock::iterator It = I.getIterator();
851
3.19k
      if (!I.isBundle()) {
852
2.29k
        if (I.getOpcode() == Hexagon::S2_allocframe)
853
1.51k
          return std::next(It);
854
783
        continue;
855
783
      }
856
896
      // I is a bundle.
857
896
      bool HasCall = false, HasAllocFrame = false;
858
896
      auto T = It.getInstrIterator();
859
4.41k
      while (++T != End && 
T->isBundled()3.68k
) {
860
3.51k
        if (T->getOpcode() == Hexagon::S2_allocframe)
861
51
          HasAllocFrame = true;
862
3.46k
        else if (T->isCall())
863
159
          HasCall = true;
864
3.51k
      }
865
896
      if (HasAllocFrame)
866
51
        return HasCall ? 
It28
:
std::next(It)23
;
867
896
    }
868
2.31k
    
return None751
;
869
2.31k
}
870
871
1.93k
void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const {
872
2.31k
  for (auto &B : MF) {
873
2.31k
    auto At = findCFILocation(B);
874
2.31k
    if (At.hasValue())
875
1.56k
      insertCFIInstructionsAt(B, At.getValue());
876
2.31k
  }
877
1.93k
}
878
879
void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
880
1.56k
      MachineBasicBlock::iterator At) const {
881
1.56k
  MachineFunction &MF = *MBB.getParent();
882
1.56k
  MachineFrameInfo &MFI = MF.getFrameInfo();
883
1.56k
  MachineModuleInfo &MMI = MF.getMMI();
884
1.56k
  auto &HST = MF.getSubtarget<HexagonSubtarget>();
885
1.56k
  auto &HII = *HST.getInstrInfo();
886
1.56k
  auto &HRI = *HST.getRegisterInfo();
887
1.56k
888
1.56k
  // If CFI instructions have debug information attached, something goes
889
1.56k
  // wrong with the final assembly generation: the prolog_end is placed
890
1.56k
  // in a wrong location.
891
1.56k
  DebugLoc DL;
892
1.56k
  const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION);
893
1.56k
894
1.56k
  MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
895
1.56k
  bool HasFP = hasFP(MF);
896
1.56k
897
1.56k
  if (HasFP) {
898
1.56k
    unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
899
1.56k
    unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
900
1.56k
901
1.56k
    // Define CFA via an offset from the value of FP.
902
1.56k
    //
903
1.56k
    //  -8   -4    0 (SP)
904
1.56k
    // --+----+----+---------------------
905
1.56k
    //   | FP | LR |          increasing addresses -->
906
1.56k
    // --+----+----+---------------------
907
1.56k
    //   |         +-- Old SP (before allocframe)
908
1.56k
    //   +-- New FP (after allocframe)
909
1.56k
    //
910
1.56k
    // MCCFIInstruction::createDefCfa subtracts the offset from the register.
911
1.56k
    // MCCFIInstruction::createOffset takes the offset without sign change.
912
1.56k
    auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8);
913
1.56k
    BuildMI(MBB, At, DL, CFID)
914
1.56k
        .addCFIIndex(MF.addFrameInst(DefCfa));
915
1.56k
    // R31 (return addr) = CFA - 4
916
1.56k
    auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4);
917
1.56k
    BuildMI(MBB, At, DL, CFID)
918
1.56k
        .addCFIIndex(MF.addFrameInst(OffR31));
919
1.56k
    // R30 (frame ptr) = CFA - 8
920
1.56k
    auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8);
921
1.56k
    BuildMI(MBB, At, DL, CFID)
922
1.56k
        .addCFIIndex(MF.addFrameInst(OffR30));
923
1.56k
  }
924
1.56k
925
1.56k
  static unsigned int RegsToMove[] = {
926
1.56k
    Hexagon::R1,  Hexagon::R0,  Hexagon::R3,  Hexagon::R2,
927
1.56k
    Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
928
1.56k
    Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
929
1.56k
    Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
930
1.56k
    Hexagon::D0,  Hexagon::D1,  Hexagon::D8,  Hexagon::D9,
931
1.56k
    Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13,
932
1.56k
    Hexagon::NoRegister
933
1.56k
  };
934
1.56k
935
1.56k
  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
936
1.56k
937
39.1k
  for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; 
++i37.5k
) {
938
37.5k
    unsigned Reg = RegsToMove[i];
939
37.5k
    auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool {
940
938
      return C.getReg() == Reg;
941
938
    };
942
37.5k
    auto F = find_if(CSI, IfR);
943
37.5k
    if (F == CSI.end())
944
37.5k
      continue;
945
40
946
40
    int64_t Offset;
947
40
    if (HasFP) {
948
40
      // If the function has a frame pointer (i.e. has an allocframe),
949
40
      // then the CFA has been defined in terms of FP. Any offsets in
950
40
      // the following CFI instructions have to be defined relative
951
40
      // to FP, which points to the bottom of the stack frame.
952
40
      // The function getFrameIndexReference can still choose to use SP
953
40
      // for the offset calculation, so we cannot simply call it here.
954
40
      // Instead, get the offset (relative to the FP) directly.
955
40
      Offset = MFI.getObjectOffset(F->getFrameIdx());
956
40
    } else {
957
0
      unsigned FrameReg;
958
0
      Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg);
959
0
    }
960
40
    // Subtract 8 to make room for R30 and R31, which are added above.
961
40
    Offset -= 8;
962
40
963
40
    if (Reg < Hexagon::D0 || Reg > Hexagon::D15) {
964
0
      unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true);
965
0
      auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg,
966
0
                                                   Offset);
967
0
      BuildMI(MBB, At, DL, CFID)
968
0
          .addCFIIndex(MF.addFrameInst(OffReg));
969
40
    } else {
970
40
      // Split the double regs into subregs, and generate appropriate
971
40
      // cfi_offsets.
972
40
      // The only reason, we are split double regs is, llvm-mc does not
973
40
      // understand paired registers for cfi_offset.
974
40
      // Eg .cfi_offset r1:0, -64
975
40
976
40
      unsigned HiReg = HRI.getSubReg(Reg, Hexagon::isub_hi);
977
40
      unsigned LoReg = HRI.getSubReg(Reg, Hexagon::isub_lo);
978
40
      unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
979
40
      unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
980
40
      auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg,
981
40
                                                  Offset+4);
982
40
      BuildMI(MBB, At, DL, CFID)
983
40
          .addCFIIndex(MF.addFrameInst(OffHi));
984
40
      auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg,
985
40
                                                  Offset);
986
40
      BuildMI(MBB, At, DL, CFID)
987
40
          .addCFIIndex(MF.addFrameInst(OffLo));
988
40
    }
989
40
  }
990
1.56k
}
991
992
20.4k
bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
993
20.4k
  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
994
0
    return false;
995
20.4k
996
20.4k
  auto &MFI = MF.getFrameInfo();
997
20.4k
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
998
20.4k
  bool HasExtraAlign = HRI.needsStackRealignment(MF);
999
20.4k
  bool HasAlloca = MFI.hasVarSizedObjects();
1000
20.4k
1001
20.4k
  // Insert ALLOCFRAME if we need to or at -O0 for the debugger.  Think
1002
20.4k
  // that this shouldn't be required, but doing so now because gcc does and
1003
20.4k
  // gdb can't break at the start of the function without it.  Will remove if
1004
20.4k
  // this turns out to be a gdb bug.
1005
20.4k
  //
1006
20.4k
  if (MF.getTarget().getOptLevel() == CodeGenOpt::None)
1007
8.18k
    return true;
1008
12.2k
1009
12.2k
  // By default we want to use SP (since it's always there). FP requires
1010
12.2k
  // some setup (i.e. ALLOCFRAME).
1011
12.2k
  // Both, alloca and stack alignment modify the stack pointer by an
1012
12.2k
  // undetermined value, so we need to save it at the entry to the function
1013
12.2k
  // (i.e. use allocframe).
1014
12.2k
  if (HasAlloca || 
HasExtraAlign12.2k
)
1015
445
    return true;
1016
11.8k
1017
11.8k
  if (MFI.getStackSize() > 0) {
1018
2.20k
    // If FP-elimination is disabled, we have to use FP at this point.
1019
2.20k
    const TargetMachine &TM = MF.getTarget();
1020
2.20k
    if (TM.Options.DisableFramePointerElim(MF) || 
!EliminateFramePointer1.91k
)
1021
285
      return true;
1022
1.91k
    if (EnableStackOVFSanitizer)
1023
0
      return true;
1024
11.5k
  }
1025
11.5k
1026
11.5k
  const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1027
11.5k
  if ((MFI.hasCalls() && 
!enableAllocFrameElim(MF)1.93k
) ||
HMFI.hasClobberLR()9.59k
)
1028
1.93k
    return true;
1029
9.59k
1030
9.59k
  return false;
1031
9.59k
}
1032
1033
enum SpillKind {
1034
  SK_ToMem,
1035
  SK_FromMem,
1036
  SK_FromMemTailcall
1037
};
1038
1039
static const char *getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType,
1040
25
      bool Stkchk = false) {
1041
25
  const char * V4SpillToMemoryFunctions[] = {
1042
25
    "__save_r16_through_r17",
1043
25
    "__save_r16_through_r19",
1044
25
    "__save_r16_through_r21",
1045
25
    "__save_r16_through_r23",
1046
25
    "__save_r16_through_r25",
1047
25
    "__save_r16_through_r27" };
1048
25
1049
25
  const char * V4SpillToMemoryStkchkFunctions[] = {
1050
25
    "__save_r16_through_r17_stkchk",
1051
25
    "__save_r16_through_r19_stkchk",
1052
25
    "__save_r16_through_r21_stkchk",
1053
25
    "__save_r16_through_r23_stkchk",
1054
25
    "__save_r16_through_r25_stkchk",
1055
25
    "__save_r16_through_r27_stkchk" };
1056
25
1057
25
  const char * V4SpillFromMemoryFunctions[] = {
1058
25
    "__restore_r16_through_r17_and_deallocframe",
1059
25
    "__restore_r16_through_r19_and_deallocframe",
1060
25
    "__restore_r16_through_r21_and_deallocframe",
1061
25
    "__restore_r16_through_r23_and_deallocframe",
1062
25
    "__restore_r16_through_r25_and_deallocframe",
1063
25
    "__restore_r16_through_r27_and_deallocframe" };
1064
25
1065
25
  const char * V4SpillFromMemoryTailcallFunctions[] = {
1066
25
    "__restore_r16_through_r17_and_deallocframe_before_tailcall",
1067
25
    "__restore_r16_through_r19_and_deallocframe_before_tailcall",
1068
25
    "__restore_r16_through_r21_and_deallocframe_before_tailcall",
1069
25
    "__restore_r16_through_r23_and_deallocframe_before_tailcall",
1070
25
    "__restore_r16_through_r25_and_deallocframe_before_tailcall",
1071
25
    "__restore_r16_through_r27_and_deallocframe_before_tailcall"
1072
25
  };
1073
25
1074
25
  const char **SpillFunc = nullptr;
1075
25
1076
25
  switch(SpillType) {
1077
25
  case SK_ToMem:
1078
10
    SpillFunc = Stkchk ? 
V4SpillToMemoryStkchkFunctions1
1079
10
                       : 
V4SpillToMemoryFunctions9
;
1080
10
    break;
1081
25
  case SK_FromMem:
1082
14
    SpillFunc = V4SpillFromMemoryFunctions;
1083
14
    break;
1084
25
  case SK_FromMemTailcall:
1085
1
    SpillFunc = V4SpillFromMemoryTailcallFunctions;
1086
1
    break;
1087
25
  }
1088
25
  assert(SpillFunc && "Unknown spill kind");
1089
25
1090
25
  // Spill all callee-saved registers up to the highest register used.
1091
25
  switch (MaxReg) {
1092
25
  case Hexagon::R17:
1093
5
    return SpillFunc[0];
1094
25
  case Hexagon::R19:
1095
4
    return SpillFunc[1];
1096
25
  case Hexagon::R21:
1097
8
    return SpillFunc[2];
1098
25
  case Hexagon::R23:
1099
2
    return SpillFunc[3];
1100
25
  case Hexagon::R25:
1101
0
    return SpillFunc[4];
1102
25
  case Hexagon::R27:
1103
6
    return SpillFunc[5];
1104
25
  default:
1105
0
    llvm_unreachable("Unhandled maximum callee save register");
1106
0
  }
1107
0
  return nullptr;
1108
0
}
1109
1110
int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1111
1.79k
      int FI, unsigned &FrameReg) const {
1112
1.79k
  auto &MFI = MF.getFrameInfo();
1113
1.79k
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1114
1.79k
1115
1.79k
  int Offset = MFI.getObjectOffset(FI);
1116
1.79k
  bool HasAlloca = MFI.hasVarSizedObjects();
1117
1.79k
  bool HasExtraAlign = HRI.needsStackRealignment(MF);
1118
1.79k
  bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
1119
1.79k
1120
1.79k
  auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1121
1.79k
  unsigned FrameSize = MFI.getStackSize();
1122
1.79k
  unsigned SP = HRI.getStackRegister();
1123
1.79k
  unsigned FP = HRI.getFrameRegister();
1124
1.79k
  unsigned AP = HMFI.getStackAlignBasePhysReg();
1125
1.79k
  // It may happen that AP will be absent even HasAlloca && HasExtraAlign
1126
1.79k
  // is true. HasExtraAlign may be set because of vector spills, without
1127
1.79k
  // aligned locals or aligned outgoing function arguments. Since vector
1128
1.79k
  // spills will ultimately be "unaligned", it is safe to use FP as the
1129
1.79k
  // base register.
1130
1.79k
  // In fact, in such a scenario the stack is actually not required to be
1131
1.79k
  // aligned, although it may end up being aligned anyway, since this
1132
1.79k
  // particular case is not easily detectable. The alignment will be
1133
1.79k
  // unnecessary, but not incorrect.
1134
1.79k
  // Unfortunately there is no quick way to verify that the above is
1135
1.79k
  // indeed the case (and that it's not a result of an error), so just
1136
1.79k
  // assume that missing AP will be replaced by FP.
1137
1.79k
  // (A better fix would be to rematerialize AP from FP and always align
1138
1.79k
  // vector spills.)
1139
1.79k
  if (AP == 0)
1140
1.79k
    AP = FP;
1141
1.79k
1142
1.79k
  bool UseFP = false, UseAP = false;  // Default: use SP (except at -O0).
1143
1.79k
  // Use FP at -O0, except when there are objects with extra alignment.
1144
1.79k
  // That additional alignment requirement may cause a pad to be inserted,
1145
1.79k
  // which will make it impossible to use FP to access objects located
1146
1.79k
  // past the pad.
1147
1.79k
  if (NoOpt && 
!HasExtraAlign242
)
1148
96
    UseFP = true;
1149
1.79k
  if (MFI.isFixedObjectIndex(FI) || 
MFI.isObjectPreAllocated(FI)1.30k
) {
1150
495
    // Fixed and preallocated objects will be located before any padding
1151
495
    // so FP must be used to access them.
1152
495
    UseFP |= (HasAlloca || 
HasExtraAlign482
);
1153
1.29k
  } else {
1154
1.29k
    if (HasAlloca) {
1155
2
      if (HasExtraAlign)
1156
2
        UseAP = true;
1157
0
      else
1158
0
        UseFP = true;
1159
2
    }
1160
1.29k
  }
1161
1.79k
1162
1.79k
  // If FP was picked, then there had better be FP.
1163
1.79k
  bool HasFP = hasFP(MF);
1164
1.79k
  assert((HasFP || !UseFP) && "This function must have frame pointer");
1165
1.79k
1166
1.79k
  // Having FP implies allocframe. Allocframe will store extra 8 bytes:
1167
1.79k
  // FP/LR. If the base register is used to access an object across these
1168
1.79k
  // 8 bytes, then the offset will need to be adjusted by 8.
1169
1.79k
  //
1170
1.79k
  // After allocframe:
1171
1.79k
  //                    HexagonISelLowering adds 8 to ---+
1172
1.79k
  //                    the offsets of all stack-based   |
1173
1.79k
  //                    arguments (*)                    |
1174
1.79k
  //                                                     |
1175
1.79k
  //   getObjectOffset < 0   0     8  getObjectOffset >= 8
1176
1.79k
  // ------------------------+-----+------------------------> increasing
1177
1.79k
  //     <local objects>     |FP/LR|    <input arguments>     addresses
1178
1.79k
  // -----------------+------+-----+------------------------>
1179
1.79k
  //                  |      |
1180
1.79k
  //    SP/AP point --+      +-- FP points here (**)
1181
1.79k
  //    somewhere on
1182
1.79k
  //    this side of FP/LR
1183
1.79k
  //
1184
1.79k
  // (*) See LowerFormalArguments. The FP/LR is assumed to be present.
1185
1.79k
  // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR.
1186
1.79k
1187
1.79k
  // The lowering assumes that FP/LR is present, and so the offsets of
1188
1.79k
  // the formal arguments start at 8. If FP/LR is not there we need to
1189
1.79k
  // reduce the offset by 8.
1190
1.79k
  if (Offset > 0 && 
!HasFP30
)
1191
23
    Offset -= 8;
1192
1.79k
1193
1.79k
  if (UseFP)
1194
132
    FrameReg = FP;
1195
1.66k
  else if (UseAP)
1196
2
    FrameReg = AP;
1197
1.65k
  else
1198
1.65k
    FrameReg = SP;
1199
1.79k
1200
1.79k
  // Calculate the actual offset in the instruction. If there is no FP
1201
1.79k
  // (in other words, no allocframe), then SP will not be adjusted (i.e.
1202
1.79k
  // there will be no SP -= FrameSize), so the frame size should not be
1203
1.79k
  // added to the calculated offset.
1204
1.79k
  int RealOffset = Offset;
1205
1.79k
  if (!UseFP && 
!UseAP1.66k
)
1206
1.65k
    RealOffset = FrameSize+Offset;
1207
1.79k
  return RealOffset;
1208
1.79k
}
1209
1210
bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
1211
      const CSIVect &CSI, const HexagonRegisterInfo &HRI,
1212
4.96k
      bool &PrologueStubs) const {
1213
4.96k
  if (CSI.empty())
1214
4.83k
    return true;
1215
131
1216
131
  MachineBasicBlock::iterator MI = MBB.begin();
1217
131
  PrologueStubs = false;
1218
131
  MachineFunction &MF = *MBB.getParent();
1219
131
  auto &HST = MF.getSubtarget<HexagonSubtarget>();
1220
131
  auto &HII = *HST.getInstrInfo();
1221
131
1222
131
  if (useSpillFunction(MF, CSI)) {
1223
10
    PrologueStubs = true;
1224
10
    unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI);
1225
10
    bool StkOvrFlowEnabled = EnableStackOVFSanitizer;
1226
10
    const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem,
1227
10
                                               StkOvrFlowEnabled);
1228
10
    auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
1229
10
    bool IsPIC = HTM.isPositionIndependent();
1230
10
    bool LongCalls = HST.useLongCalls() || 
EnableSaveRestoreLong9
;
1231
10
1232
10
    // Call spill function.
1233
10
    DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : 
DebugLoc()0
;
1234
10
    unsigned SpillOpc;
1235
10
    if (StkOvrFlowEnabled) {
1236
1
      if (LongCalls)
1237
0
        SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT_PIC
1238
0
                         : Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT;
1239
1
      else
1240
1
        SpillOpc = IsPIC ? 
Hexagon::SAVE_REGISTERS_CALL_V4STK_PIC0
1241
1
                         : Hexagon::SAVE_REGISTERS_CALL_V4STK;
1242
9
    } else {
1243
9
      if (LongCalls)
1244
1
        SpillOpc = IsPIC ? 
Hexagon::SAVE_REGISTERS_CALL_V4_EXT_PIC0
1245
1
                         : Hexagon::SAVE_REGISTERS_CALL_V4_EXT;
1246
8
      else
1247
8
        SpillOpc = IsPIC ? 
Hexagon::SAVE_REGISTERS_CALL_V4_PIC3
1248
8
                         : 
Hexagon::SAVE_REGISTERS_CALL_V45
;
1249
9
    }
1250
10
1251
10
    MachineInstr *SaveRegsCall =
1252
10
        BuildMI(MBB, MI, DL, HII.get(SpillOpc))
1253
10
          .addExternalSymbol(SpillFun);
1254
10
1255
10
    // Add callee-saved registers as use.
1256
10
    addCalleeSaveRegistersAsImpOperand(SaveRegsCall, CSI, false, true);
1257
10
    // Add live in registers.
1258
48
    for (unsigned I = 0; I < CSI.size(); 
++I38
)
1259
38
      MBB.addLiveIn(CSI[I].getReg());
1260
10
    return true;
1261
10
  }
1262
121
1263
351
  
for (unsigned i = 0, n = CSI.size(); 121
i < n;
++i230
) {
1264
230
    unsigned Reg = CSI[i].getReg();
1265
230
    // Add live in registers. We treat eh_return callee saved register r0 - r3
1266
230
    // specially. They are not really callee saved registers as they are not
1267
230
    // supposed to be killed.
1268
230
    bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
1269
230
    int FI = CSI[i].getFrameIdx();
1270
230
    const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
1271
230
    HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
1272
230
    if (IsKill)
1273
228
      MBB.addLiveIn(Reg);
1274
230
  }
1275
121
  return true;
1276
121
}
1277
1278
bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
1279
4.95k
      const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
1280
4.95k
  if (CSI.empty())
1281
4.82k
    return false;
1282
129
1283
129
  MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
1284
129
  MachineFunction &MF = *MBB.getParent();
1285
129
  auto &HST = MF.getSubtarget<HexagonSubtarget>();
1286
129
  auto &HII = *HST.getInstrInfo();
1287
129
1288
129
  if (useRestoreFunction(MF, CSI)) {
1289
15
    bool HasTC = hasTailCall(MBB) || 
!hasReturn(MBB)14
;
1290
15
    unsigned MaxR = getMaxCalleeSavedReg(CSI, HRI);
1291
15
    SpillKind Kind = HasTC ? 
SK_FromMemTailcall1
:
SK_FromMem14
;
1292
15
    const char *RestoreFn = getSpillFunctionFor(MaxR, Kind);
1293
15
    auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
1294
15
    bool IsPIC = HTM.isPositionIndependent();
1295
15
    bool LongCalls = HST.useLongCalls() || 
EnableSaveRestoreLong12
;
1296
15
1297
15
    // Call spill function.
1298
15
    DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()
1299
15
                                  : 
MBB.findDebugLoc(MBB.end())0
;
1300
15
    MachineInstr *DeallocCall = nullptr;
1301
15
1302
15
    if (HasTC) {
1303
1
      unsigned RetOpc;
1304
1
      if (LongCalls)
1305
0
        RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC
1306
0
                       : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT;
1307
1
      else
1308
1
        RetOpc = IsPIC ? 
Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC0
1309
1
                       : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;
1310
1
      DeallocCall = BuildMI(MBB, MI, DL, HII.get(RetOpc))
1311
1
          .addExternalSymbol(RestoreFn);
1312
14
    } else {
1313
14
      // The block has a return.
1314
14
      MachineBasicBlock::iterator It = MBB.getFirstTerminator();
1315
14
      assert(It->isReturn() && std::next(It) == MBB.end());
1316
14
      unsigned RetOpc;
1317
14
      if (LongCalls)
1318
4
        RetOpc = IsPIC ? 
Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC0
1319
4
                       : Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT;
1320
10
      else
1321
10
        RetOpc = IsPIC ? 
Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC3
1322
10
                       : 
Hexagon::RESTORE_DEALLOC_RET_JMP_V47
;
1323
14
      DeallocCall = BuildMI(MBB, It, DL, HII.get(RetOpc))
1324
14
          .addExternalSymbol(RestoreFn);
1325
14
      // Transfer the function live-out registers.
1326
14
      DeallocCall->copyImplicitOps(MF, *It);
1327
14
    }
1328
15
    addCalleeSaveRegistersAsImpOperand(DeallocCall, CSI, true, false);
1329
15
    return true;
1330
15
  }
1331
114
1332
342
  
for (unsigned i = 0; 114
i < CSI.size();
++i228
) {
1333
228
    unsigned Reg = CSI[i].getReg();
1334
228
    const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
1335
228
    int FI = CSI[i].getFrameIdx();
1336
228
    HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
1337
228
  }
1338
114
1339
114
  return true;
1340
114
}
1341
1342
MachineBasicBlock::iterator HexagonFrameLowering::eliminateCallFramePseudoInstr(
1343
    MachineFunction &MF, MachineBasicBlock &MBB,
1344
1.36k
    MachineBasicBlock::iterator I) const {
1345
1.36k
  MachineInstr &MI = *I;
1346
1.36k
  unsigned Opc = MI.getOpcode();
1347
1.36k
  (void)Opc; // Silence compiler warning.
1348
1.36k
  assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) &&
1349
1.36k
         "Cannot handle this call frame pseudo instruction");
1350
1.36k
  return MBB.erase(I);
1351
1.36k
}
1352
1353
void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
1354
4.96k
    MachineFunction &MF, RegScavenger *RS) const {
1355
4.96k
  // If this function has uses aligned stack and also has variable sized stack
1356
4.96k
  // objects, then we need to map all spill slots to fixed positions, so that
1357
4.96k
  // they can be accessed through FP. Otherwise they would have to be accessed
1358
4.96k
  // via AP, which may not be available at the particular place in the program.
1359
4.96k
  MachineFrameInfo &MFI = MF.getFrameInfo();
1360
4.96k
  bool HasAlloca = MFI.hasVarSizedObjects();
1361
4.96k
  bool NeedsAlign = (MFI.getMaxAlignment() > getStackAlignment());
1362
4.96k
1363
4.96k
  if (!HasAlloca || 
!NeedsAlign6
)
1364
4.96k
    return;
1365
3
1366
3
  unsigned LFS = MFI.getLocalFrameSize();
1367
16
  for (int i = 0, e = MFI.getObjectIndexEnd(); i != e; 
++i13
) {
1368
13
    if (!MFI.isSpillSlotObjectIndex(i) || 
MFI.isDeadObjectIndex(i)8
)
1369
5
      continue;
1370
8
    unsigned S = MFI.getObjectSize(i);
1371
8
    // Reduce the alignment to at most 8. This will require unaligned vector
1372
8
    // stores if they happen here.
1373
8
    unsigned A = std::max(MFI.getObjectAlignment(i), 8U);
1374
8
    MFI.setObjectAlignment(i, 8);
1375
8
    LFS = alignTo(LFS+S, A);
1376
8
    MFI.mapLocalFrameObject(i, -LFS);
1377
8
  }
1378
3
1379
3
  MFI.setLocalFrameSize(LFS);
1380
3
  unsigned A = MFI.getLocalFrameMaxAlign();
1381
3
  assert(A <= 8 && "Unexpected local frame alignment");
1382
3
  if (A == 0)
1383
3
    MFI.setLocalFrameMaxAlign(8);
1384
3
  MFI.setUseLocalStackAllocationBlock(true);
1385
3
1386
3
  // Set the physical aligned-stack base address register.
1387
3
  unsigned AP = 0;
1388
3
  if (const MachineInstr *AI = getAlignaInstr(MF))
1389
1
    AP = AI->getOperand(0).getReg();
1390
3
  auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1391
3
  HMFI.setStackAlignBasePhysReg(AP);
1392
3
}
1393
1394
/// Returns true if there are no caller-saved registers available in class RC.
1395
static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
1396
37
      const HexagonRegisterInfo &HRI, const TargetRegisterClass *RC) {
1397
37
  MachineRegisterInfo &MRI = MF.getRegInfo();
1398
37
1399
461
  auto IsUsed = [&HRI,&MRI] (unsigned Reg) -> bool {
1400
506
    for (MCRegAliasIterator AI(Reg, &HRI, true); AI.isValid(); 
++AI45
)
1401
487
      if (MRI.isPhysRegUsed(*AI))
1402
442
        return true;
1403
461
    
return false19
;
1404
461
  };
1405
37
1406
37
  // Check for an unused caller-saved register. Callee-saved registers
1407
37
  // have become pristine by now.
1408
479
  for (const MCPhysReg *P = HRI.getCallerSavedRegs(&MF, RC); *P; 
++P442
)
1409
461
    if (!IsUsed(*P))
1410
19
      return false;
1411
37
1412
37
  // All caller-saved registers are used.
1413
37
  
return true18
;
1414
37
}
1415
1416
#ifndef NDEBUG
1417
static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) {
1418
  dbgs() << '{';
1419
  for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) {
1420
    unsigned R = x;
1421
    dbgs() << ' ' << printReg(R, &TRI);
1422
  }
1423
  dbgs() << " }";
1424
}
1425
#endif
1426
1427
bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
1428
4.96k
      const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const {
1429
4.96k
  LLVM_DEBUG(dbgs() << __func__ << " on " << MF.getName() << '\n');
1430
4.96k
  MachineFrameInfo &MFI = MF.getFrameInfo();
1431
4.96k
  BitVector SRegs(Hexagon::NUM_TARGET_REGS);
1432
4.96k
1433
4.96k
  // Generate a set of unique, callee-saved registers (SRegs), where each
1434
4.96k
  // register in the set is maximal in terms of sub-/super-register relation,
1435
4.96k
  // i.e. for each R in SRegs, no proper super-register of R is also in SRegs.
1436
4.96k
1437
4.96k
  // (1) For each callee-saved register, add that register and all of its
1438
4.96k
  // sub-registers to SRegs.
1439
4.96k
  LLVM_DEBUG(dbgs() << "Initial CS registers: {");
1440
5.43k
  for (unsigned i = 0, n = CSI.size(); i < n; 
++i466
) {
1441
466
    unsigned R = CSI[i].getReg();
1442
466
    LLVM_DEBUG(dbgs() << ' ' << printReg(R, TRI));
1443
932
    for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); 
++SR466
)
1444
466
      SRegs[*SR] = true;
1445
466
  }
1446
4.96k
  LLVM_DEBUG(dbgs() << " }\n");
1447
4.96k
  LLVM_DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI);
1448
4.96k
             dbgs() << "\n");
1449
4.96k
1450
4.96k
  // (2) For each reserved register, remove that register and all of its
1451
4.96k
  // sub- and super-registers from SRegs.
1452
4.96k
  BitVector Reserved = TRI->getReservedRegs(MF);
1453
218k
  for (int x = Reserved.find_first(); x >= 0; 
x = Reserved.find_next(x)213k
) {
1454
213k
    unsigned R = x;
1455
561k
    for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); 
++SR347k
)
1456
347k
      SRegs[*SR] = false;
1457
213k
  }
1458
4.96k
  LLVM_DEBUG(dbgs() << "Res:     "; dump_registers(Reserved, *TRI);
1459
4.96k
             dbgs() << "\n");
1460
4.96k
  LLVM_DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI);
1461
4.96k
             dbgs() << "\n");
1462
4.96k
1463
4.96k
  // (3) Collect all registers that have at least one sub-register in SRegs,
1464
4.96k
  // and also have no sub-registers that are reserved. These will be the can-
1465
4.96k
  // didates for saving as a whole instead of their individual sub-registers.
1466
4.96k
  // (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.)
1467
4.96k
  BitVector TmpSup(Hexagon::NUM_TARGET_REGS);
1468
5.43k
  for (int x = SRegs.find_first(); x >= 0; 
x = SRegs.find_next(x)466
) {
1469
466
    unsigned R = x;
1470
932
    for (MCSuperRegIterator SR(R, TRI); SR.isValid(); 
++SR466
)
1471
466
      TmpSup[*SR] = true;
1472
466
  }
1473
5.23k
  for (int x = TmpSup.find_first(); x >= 0; 
x = TmpSup.find_next(x)268
) {
1474
268
    unsigned R = x;
1475
1.07k
    for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); 
++SR804
) {
1476
804
      if (!Reserved[*SR])
1477
804
        continue;
1478
0
      TmpSup[R] = false;
1479
0
      break;
1480
0
    }
1481
268
  }
1482
4.96k
  LLVM_DEBUG(dbgs() << "TmpSup:  "; dump_registers(TmpSup, *TRI);
1483
4.96k
             dbgs() << "\n");
1484
4.96k
1485
4.96k
  // (4) Include all super-registers found in (3) into SRegs.
1486
4.96k
  SRegs |= TmpSup;
1487
4.96k
  LLVM_DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI);
1488
4.96k
             dbgs() << "\n");
1489
4.96k
1490
4.96k
  // (5) For each register R in SRegs, if any super-register of R is in SRegs,
1491
4.96k
  // remove R from SRegs.
1492
5.70k
  for (int x = SRegs.find_first(); x >= 0; 
x = SRegs.find_next(x)734
) {
1493
734
    unsigned R = x;
1494
734
    for (MCSuperRegIterator SR(R, TRI); SR.isValid(); 
++SR0
) {
1495
466
      if (!SRegs[*SR])
1496
0
        continue;
1497
466
      SRegs[R] = false;
1498
466
      break;
1499
466
    }
1500
734
  }
1501
4.96k
  LLVM_DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI);
1502
4.96k
             dbgs() << "\n");
1503
4.96k
1504
4.96k
  // Now, for each register that has a fixed stack slot, create the stack
1505
4.96k
  // object for it.
1506
4.96k
  CSI.clear();
1507
4.96k
1508
4.96k
  using SpillSlot = TargetFrameLowering::SpillSlot;
1509
4.96k
1510
4.96k
  unsigned NumFixed;
1511
4.96k
  int MinOffset = 0;  // CS offsets are negative.
1512
4.96k
  const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed);
1513
94.4k
  for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed; 
++S89.4k
) {
1514
89.4k
    if (!SRegs[S->Reg])
1515
89.1k
      continue;
1516
266
    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg);
1517
266
    int FI = MFI.CreateFixedSpillStackObject(TRI->getSpillSize(*RC), S->Offset);
1518
266
    MinOffset = std::min(MinOffset, S->Offset);
1519
266
    CSI.push_back(CalleeSavedInfo(S->Reg, FI));
1520
266
    SRegs[S->Reg] = false;
1521
266
  }
1522
4.96k
1523
4.96k
  // There can be some registers that don't have fixed slots. For example,
1524
4.96k
  // we need to store R0-R3 in functions with exception handling. For each
1525
4.96k
  // such register, create a non-fixed stack object.
1526
4.97k
  for (int x = SRegs.find_first(); x >= 0; 
x = SRegs.find_next(x)2
) {
1527
2
    unsigned R = x;
1528
2
    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R);
1529
2
    unsigned Size = TRI->getSpillSize(*RC);
1530
2
    int Off = MinOffset - Size;
1531
2
    unsigned Align = std::min(TRI->getSpillAlignment(*RC), getStackAlignment());
1532
2
    assert(isPowerOf2_32(Align));
1533
2
    Off &= -Align;
1534
2
    int FI = MFI.CreateFixedSpillStackObject(Size, Off);
1535
2
    MinOffset = std::min(MinOffset, Off);
1536
2
    CSI.push_back(CalleeSavedInfo(R, FI));
1537
2
    SRegs[R] = false;
1538
2
  }
1539
4.96k
1540
4.96k
  LLVM_DEBUG({
1541
4.96k
    dbgs() << "CS information: {";
1542
4.96k
    for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
1543
4.96k
      int FI = CSI[i].getFrameIdx();
1544
4.96k
      int Off = MFI.getObjectOffset(FI);
1545
4.96k
      dbgs() << ' ' << printReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp";
1546
4.96k
      if (Off >= 0)
1547
4.96k
        dbgs() << '+';
1548
4.96k
      dbgs() << Off;
1549
4.96k
    }
1550
4.96k
    dbgs() << " }\n";
1551
4.96k
  });
1552
4.96k
1553
#ifndef NDEBUG
1554
  // Verify that all registers were handled.
1555
  bool MissedReg = false;
1556
  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1557
    unsigned R = x;
1558
    dbgs() << printReg(R, TRI) << ' ';
1559
    MissedReg = true;
1560
  }
1561
  if (MissedReg)
1562
    llvm_unreachable("...there are unhandled callee-saved registers!");
1563
#endif
1564
1565
4.96k
  return true;
1566
4.96k
}
1567
1568
bool HexagonFrameLowering::expandCopy(MachineBasicBlock &B,
1569
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1570
2.04k
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1571
2.04k
  MachineInstr *MI = &*It;
1572
2.04k
  DebugLoc DL = MI->getDebugLoc();
1573
2.04k
  unsigned DstR = MI->getOperand(0).getReg();
1574
2.04k
  unsigned SrcR = MI->getOperand(1).getReg();
1575
2.04k
  if (!Hexagon::ModRegsRegClass.contains(DstR) ||
1576
2.04k
      
!Hexagon::ModRegsRegClass.contains(SrcR)110
)
1577
2.04k
    return false;
1578
0
1579
0
  unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1580
0
  BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR).add(MI->getOperand(1));
1581
0
  BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), DstR)
1582
0
    .addReg(TmpR, RegState::Kill);
1583
0
1584
0
  NewRegs.push_back(TmpR);
1585
0
  B.erase(It);
1586
0
  return true;
1587
0
}
1588
1589
bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B,
1590
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1591
26
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1592
26
  MachineInstr *MI = &*It;
1593
26
  if (!MI->getOperand(0).isFI())
1594
0
    return false;
1595
26
1596
26
  DebugLoc DL = MI->getDebugLoc();
1597
26
  unsigned Opc = MI->getOpcode();
1598
26
  unsigned SrcR = MI->getOperand(2).getReg();
1599
26
  bool IsKill = MI->getOperand(2).isKill();
1600
26
  int FI = MI->getOperand(0).getIndex();
1601
26
1602
26
  // TmpR = C2_tfrpr SrcR   if SrcR is a predicate register
1603
26
  // TmpR = A2_tfrcrr SrcR  if SrcR is a modifier register
1604
26
  unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1605
26
  unsigned TfrOpc = (Opc == Hexagon::STriw_pred) ? Hexagon::C2_tfrpr
1606
26
                                                 : 
Hexagon::A2_tfrcrr0
;
1607
26
  BuildMI(B, It, DL, HII.get(TfrOpc), TmpR)
1608
26
    .addReg(SrcR, getKillRegState(IsKill));
1609
26
1610
26
  // S2_storeri_io FI, 0, TmpR
1611
26
  BuildMI(B, It, DL, HII.get(Hexagon::S2_storeri_io))
1612
26
      .addFrameIndex(FI)
1613
26
      .addImm(0)
1614
26
      .addReg(TmpR, RegState::Kill)
1615
26
      .cloneMemRefs(*MI);
1616
26
1617
26
  NewRegs.push_back(TmpR);
1618
26
  B.erase(It);
1619
26
  return true;
1620
26
}
1621
1622
bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B,
1623
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1624
26
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1625
26
  MachineInstr *MI = &*It;
1626
26
  if (!MI->getOperand(1).isFI())
1627
0
    return false;
1628
26
1629
26
  DebugLoc DL = MI->getDebugLoc();
1630
26
  unsigned Opc = MI->getOpcode();
1631
26
  unsigned DstR = MI->getOperand(0).getReg();
1632
26
  int FI = MI->getOperand(1).getIndex();
1633
26
1634
26
  // TmpR = L2_loadri_io FI, 0
1635
26
  unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1636
26
  BuildMI(B, It, DL, HII.get(Hexagon::L2_loadri_io), TmpR)
1637
26
      .addFrameIndex(FI)
1638
26
      .addImm(0)
1639
26
      .cloneMemRefs(*MI);
1640
26
1641
26
  // DstR = C2_tfrrp TmpR   if DstR is a predicate register
1642
26
  // DstR = A2_tfrrcr TmpR  if DstR is a modifier register
1643
26
  unsigned TfrOpc = (Opc == Hexagon::LDriw_pred) ? Hexagon::C2_tfrrp
1644
26
                                                 : 
Hexagon::A2_tfrrcr0
;
1645
26
  BuildMI(B, It, DL, HII.get(TfrOpc), DstR)
1646
26
    .addReg(TmpR, RegState::Kill);
1647
26
1648
26
  NewRegs.push_back(TmpR);
1649
26
  B.erase(It);
1650
26
  return true;
1651
26
}
1652
1653
bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B,
1654
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1655
7
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1656
7
  MachineInstr *MI = &*It;
1657
7
  if (!MI->getOperand(0).isFI())
1658
0
    return false;
1659
7
1660
7
  DebugLoc DL = MI->getDebugLoc();
1661
7
  unsigned SrcR = MI->getOperand(2).getReg();
1662
7
  bool IsKill = MI->getOperand(2).isKill();
1663
7
  int FI = MI->getOperand(0).getIndex();
1664
7
  auto *RC = &Hexagon::HvxVRRegClass;
1665
7
1666
7
  // Insert transfer to general vector register.
1667
7
  //   TmpR0 = A2_tfrsi 0x01010101
1668
7
  //   TmpR1 = V6_vandqrt Qx, TmpR0
1669
7
  //   store FI, 0, TmpR1
1670
7
  unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1671
7
  unsigned TmpR1 = MRI.createVirtualRegister(RC);
1672
7
1673
7
  BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)
1674
7
    .addImm(0x01010101);
1675
7
1676
7
  BuildMI(B, It, DL, HII.get(Hexagon::V6_vandqrt), TmpR1)
1677
7
    .addReg(SrcR, getKillRegState(IsKill))
1678
7
    .addReg(TmpR0, RegState::Kill);
1679
7
1680
7
  auto *HRI = B.getParent()->getSubtarget<HexagonSubtarget>().getRegisterInfo();
1681
7
  HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, HRI);
1682
7
  expandStoreVec(B, std::prev(It), MRI, HII, NewRegs);
1683
7
1684
7
  NewRegs.push_back(TmpR0);
1685
7
  NewRegs.push_back(TmpR1);
1686
7
  B.erase(It);
1687
7
  return true;
1688
7
}
1689
1690
bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B,
1691
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1692
8
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1693
8
  MachineInstr *MI = &*It;
1694
8
  if (!MI->getOperand(1).isFI())
1695
0
    return false;
1696
8
1697
8
  DebugLoc DL = MI->getDebugLoc();
1698
8
  unsigned DstR = MI->getOperand(0).getReg();
1699
8
  int FI = MI->getOperand(1).getIndex();
1700
8
  auto *RC = &Hexagon::HvxVRRegClass;
1701
8
1702
8
  // TmpR0 = A2_tfrsi 0x01010101
1703
8
  // TmpR1 = load FI, 0
1704
8
  // DstR = V6_vandvrt TmpR1, TmpR0
1705
8
  unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1706
8
  unsigned TmpR1 = MRI.createVirtualRegister(RC);
1707
8
1708
8
  BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)
1709
8
    .addImm(0x01010101);
1710
8
  MachineFunction &MF = *B.getParent();
1711
8
  auto *HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1712
8
  HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, HRI);
1713
8
  expandLoadVec(B, std::prev(It), MRI, HII, NewRegs);
1714
8
1715
8
  BuildMI(B, It, DL, HII.get(Hexagon::V6_vandvrt), DstR)
1716
8
    .addReg(TmpR1, RegState::Kill)
1717
8
    .addReg(TmpR0, RegState::Kill);
1718
8
1719
8
  NewRegs.push_back(TmpR0);
1720
8
  NewRegs.push_back(TmpR1);
1721
8
  B.erase(It);
1722
8
  return true;
1723
8
}
1724
1725
bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
1726
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1727
4
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1728
4
  MachineFunction &MF = *B.getParent();
1729
4
  auto &MFI = MF.getFrameInfo();
1730
4
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1731
4
  MachineInstr *MI = &*It;
1732
4
  if (!MI->getOperand(0).isFI())
1733
0
    return false;
1734
4
1735
4
  // It is possible that the double vector being stored is only partially
1736
4
  // defined. From the point of view of the liveness tracking, it is ok to
1737
4
  // store it as a whole, but if we break it up we may end up storing a
1738
4
  // register that is entirely undefined.
1739
4
  LivePhysRegs LPR(HRI);
1740
4
  LPR.addLiveIns(B);
1741
4
  SmallVector<std::pair<MCPhysReg, const MachineOperand*>,2> Clobbers;
1742
46
  for (auto R = B.begin(); R != It; 
++R42
) {
1743
42
    Clobbers.clear();
1744
42
    LPR.stepForward(*R, Clobbers);
1745
42
  }
1746
4
1747
4
  DebugLoc DL = MI->getDebugLoc();
1748
4
  unsigned SrcR = MI->getOperand(2).getReg();
1749
4
  unsigned SrcLo = HRI.getSubReg(SrcR, Hexagon::vsub_lo);
1750
4
  unsigned SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi);
1751
4
  bool IsKill = MI->getOperand(2).isKill();
1752
4
  int FI = MI->getOperand(0).getIndex();
1753
4
1754
4
  unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
1755
4
  unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
1756
4
  unsigned HasAlign = MFI.getObjectAlignment(FI);
1757
4
  unsigned StoreOpc;
1758
4
1759
4
  // Store low part.
1760
4
  if (LPR.contains(SrcLo)) {
1761
4
    StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
1762
4
                                     : 
Hexagon::V6_vS32Ub_ai0
;
1763
4
    BuildMI(B, It, DL, HII.get(StoreOpc))
1764
4
        .addFrameIndex(FI)
1765
4
        .addImm(0)
1766
4
        .addReg(SrcLo, getKillRegState(IsKill))
1767
4
        .cloneMemRefs(*MI);
1768
4
  }
1769
4
1770
4
  // Store high part.
1771
4
  if (LPR.contains(SrcHi)) {
1772
3
    StoreOpc = NeedAlign <= MinAlign(HasAlign, Size) ? Hexagon::V6_vS32b_ai
1773
3
                                                     : 
Hexagon::V6_vS32Ub_ai0
;
1774
3
    BuildMI(B, It, DL, HII.get(StoreOpc))
1775
3
        .addFrameIndex(FI)
1776
3
        .addImm(Size)
1777
3
        .addReg(SrcHi, getKillRegState(IsKill))
1778
3
        .cloneMemRefs(*MI);
1779
3
  }
1780
4
1781
4
  B.erase(It);
1782
4
  return true;
1783
4
}
1784
1785
bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,
1786
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1787
3
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1788
3
  MachineFunction &MF = *B.getParent();
1789
3
  auto &MFI = MF.getFrameInfo();
1790
3
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1791
3
  MachineInstr *MI = &*It;
1792
3
  if (!MI->getOperand(1).isFI())
1793
0
    return false;
1794
3
1795
3
  DebugLoc DL = MI->getDebugLoc();
1796
3
  unsigned DstR = MI->getOperand(0).getReg();
1797
3
  unsigned DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi);
1798
3
  unsigned DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo);
1799
3
  int FI = MI->getOperand(1).getIndex();
1800
3
1801
3
  unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
1802
3
  unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
1803
3
  unsigned HasAlign = MFI.getObjectAlignment(FI);
1804
3
  unsigned LoadOpc;
1805
3
1806
3
  // Load low part.
1807
3
  LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
1808
3
                                  : 
Hexagon::V6_vL32Ub_ai0
;
1809
3
  BuildMI(B, It, DL, HII.get(LoadOpc), DstLo)
1810
3
      .addFrameIndex(FI)
1811
3
      .addImm(0)
1812
3
      .cloneMemRefs(*MI);
1813
3
1814
3
  // Load high part.
1815
3
  LoadOpc = NeedAlign <= MinAlign(HasAlign, Size) ? Hexagon::V6_vL32b_ai
1816
3
                                                  : 
Hexagon::V6_vL32Ub_ai0
;
1817
3
  BuildMI(B, It, DL, HII.get(LoadOpc), DstHi)
1818
3
      .addFrameIndex(FI)
1819
3
      .addImm(Size)
1820
3
      .cloneMemRefs(*MI);
1821
3
1822
3
  B.erase(It);
1823
3
  return true;
1824
3
}
1825
1826
bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
1827
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1828
7
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1829
7
  MachineFunction &MF = *B.getParent();
1830
7
  auto &MFI = MF.getFrameInfo();
1831
7
  MachineInstr *MI = &*It;
1832
7
  if (!MI->getOperand(0).isFI())
1833
0
    return false;
1834
7
1835
7
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1836
7
  DebugLoc DL = MI->getDebugLoc();
1837
7
  unsigned SrcR = MI->getOperand(2).getReg();
1838
7
  bool IsKill = MI->getOperand(2).isKill();
1839
7
  int FI = MI->getOperand(0).getIndex();
1840
7
1841
7
  unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
1842
7
  unsigned HasAlign = MFI.getObjectAlignment(FI);
1843
7
  unsigned StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
1844
7
                                            : 
Hexagon::V6_vS32Ub_ai0
;
1845
7
  BuildMI(B, It, DL, HII.get(StoreOpc))
1846
7
      .addFrameIndex(FI)
1847
7
      .addImm(0)
1848
7
      .addReg(SrcR, getKillRegState(IsKill))
1849
7
      .cloneMemRefs(*MI);
1850
7
1851
7
  B.erase(It);
1852
7
  return true;
1853
7
}
1854
1855
bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
1856
      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1857
8
      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1858
8
  MachineFunction &MF = *B.getParent();
1859
8
  auto &MFI = MF.getFrameInfo();
1860
8
  MachineInstr *MI = &*It;
1861
8
  if (!MI->getOperand(1).isFI())
1862
0
    return false;
1863
8
1864
8
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1865
8
  DebugLoc DL = MI->getDebugLoc();
1866
8
  unsigned DstR = MI->getOperand(0).getReg();
1867
8
  int FI = MI->getOperand(1).getIndex();
1868
8
1869
8
  unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
1870
8
  unsigned HasAlign = MFI.getObjectAlignment(FI);
1871
8
  unsigned LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
1872
8
                                           : 
Hexagon::V6_vL32Ub_ai0
;
1873
8
  BuildMI(B, It, DL, HII.get(LoadOpc), DstR)
1874
8
      .addFrameIndex(FI)
1875
8
      .addImm(0)
1876
8
      .cloneMemRefs(*MI);
1877
8
1878
8
  B.erase(It);
1879
8
  return true;
1880
8
}
1881
1882
bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF,
1883
4.98k
      SmallVectorImpl<unsigned> &NewRegs) const {
1884
4.98k
  auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
1885
4.98k
  MachineRegisterInfo &MRI = MF.getRegInfo();
1886
4.98k
  bool Changed = false;
1887
4.98k
1888
7.22k
  for (auto &B : MF) {
1889
7.22k
    // Traverse the basic block.
1890
7.22k
    MachineBasicBlock::iterator NextI;
1891
39.7k
    for (auto I = B.begin(), E = B.end(); I != E; 
I = NextI32.5k
) {
1892
32.5k
      MachineInstr *MI = &*I;
1893
32.5k
      NextI = std::next(I);
1894
32.5k
      unsigned Opc = MI->getOpcode();
1895
32.5k
1896
32.5k
      switch (Opc) {
1897
32.5k
        case TargetOpcode::COPY:
1898
2.04k
          Changed |= expandCopy(B, I, MRI, HII, NewRegs);
1899
2.04k
          break;
1900
32.5k
        case Hexagon::STriw_pred:
1901
26
        case Hexagon::STriw_ctr:
1902
26
          Changed |= expandStoreInt(B, I, MRI, HII, NewRegs);
1903
26
          break;
1904
26
        case Hexagon::LDriw_pred:
1905
26
        case Hexagon::LDriw_ctr:
1906
26
          Changed |= expandLoadInt(B, I, MRI, HII, NewRegs);
1907
26
          break;
1908
26
        case Hexagon::PS_vstorerq_ai:
1909
7
          Changed |= expandStoreVecPred(B, I, MRI, HII, NewRegs);
1910
7
          break;
1911
26
        case Hexagon::PS_vloadrq_ai:
1912
8
          Changed |= expandLoadVecPred(B, I, MRI, HII, NewRegs);
1913
8
          break;
1914
26
        case Hexagon::PS_vloadrw_ai:
1915
3
        case Hexagon::PS_vloadrwu_ai:
1916
3
          Changed |= expandLoadVec2(B, I, MRI, HII, NewRegs);
1917
3
          break;
1918
4
        case Hexagon::PS_vstorerw_ai:
1919
4
        case Hexagon::PS_vstorerwu_ai:
1920
4
          Changed |= expandStoreVec2(B, I, MRI, HII, NewRegs);
1921
4
          break;
1922
32.5k
      }
1923
32.5k
    }
1924
7.22k
  }
1925
4.98k
1926
4.98k
  return Changed;
1927
4.98k
}
1928
1929
void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
1930
                                                BitVector &SavedRegs,
1931
4.98k
                                                RegScavenger *RS) const {
1932
4.98k
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1933
4.98k
1934
4.98k
  SavedRegs.resize(HRI.getNumRegs());
1935
4.98k
1936
4.98k
  // If we have a function containing __builtin_eh_return we want to spill and
1937
4.98k
  // restore all callee saved registers. Pretend that they are used.
1938
4.98k
  if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
1939
17
    
for (const MCPhysReg *R = HRI.getCalleeSavedRegs(&MF); 1
*R;
++R16
)
1940
16
      SavedRegs.set(*R);
1941
4.98k
1942
4.98k
  // Replace predicate register pseudo spill code.
1943
4.98k
  SmallVector<unsigned,8> NewRegs;
1944
4.98k
  expandSpillMacros(MF, NewRegs);
1945
4.98k
  if (OptimizeSpillSlots && !isOptNone(MF))
1946
3.36k
    optimizeSpillSlots(MF, NewRegs);
1947
4.98k
1948
4.98k
  // We need to reserve a spill slot if scavenging could potentially require
1949
4.98k
  // spilling a scavenged register.
1950
4.98k
  if (!NewRegs.empty() || 
mayOverflowFrameOffset(MF)4.96k
) {
1951
34
    MachineFrameInfo &MFI = MF.getFrameInfo();
1952
34
    MachineRegisterInfo &MRI = MF.getRegInfo();
1953
34
    SetVector<const TargetRegisterClass*> SpillRCs;
1954
34
    // Reserve an int register in any case, because it could be used to hold
1955
34
    // the stack offset in case it does not fit into a spill instruction.
1956
34
    SpillRCs.insert(&Hexagon::IntRegsRegClass);
1957
34
1958
34
    for (unsigned VR : NewRegs)
1959
82
      SpillRCs.insert(MRI.getRegClass(VR));
1960
34
1961
37
    for (auto *RC : SpillRCs) {
1962
37
      if (!needToReserveScavengingSpillSlots(MF, HRI, RC))
1963
19
        continue;
1964
18
      unsigned Num = RC == &Hexagon::IntRegsRegClass ? 
NumberScavengerSlots16
:
12
;
1965
18
      unsigned S = HRI.getSpillSize(*RC), A = HRI.getSpillAlignment(*RC);
1966
52
      for (unsigned i = 0; i < Num; 
i++34
) {
1967
34
        int NewFI = MFI.CreateSpillStackObject(S, A);
1968
34
        RS->addScavengingFrameIndex(NewFI);
1969
34
      }
1970
18
    }
1971
34
  }
1972
4.98k
1973
4.98k
  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1974
4.98k
}
1975
1976
unsigned HexagonFrameLowering::findPhysReg(MachineFunction &MF,
1977
      HexagonBlockRanges::IndexRange &FIR,
1978
      HexagonBlockRanges::InstrIndexMap &IndexMap,
1979
      HexagonBlockRanges::RegToRangeMap &DeadMap,
1980
74
      const TargetRegisterClass *RC) const {
1981
74
  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1982
74
  auto &MRI = MF.getRegInfo();
1983
74
1984
1.25k
  auto isDead = [&FIR,&DeadMap] (unsigned Reg) -> bool {
1985
1.25k
    auto F = DeadMap.find({Reg,0});
1986
1.25k
    if (F == DeadMap.end())
1987
150
      return false;
1988
1.10k
    for (auto &DR : F->second)
1989
23.4k
      if (DR.contains(FIR))
1990
48
        return true;
1991
1.10k
    
return false1.05k
;
1992
1.10k
  };
1993
74
1994
1.25k
  for (unsigned Reg : RC->getRawAllocationOrder(MF)) {
1995
1.25k
    bool Dead = true;
1996
1.25k
    for (auto R : HexagonBlockRanges::expandToSubRegs({Reg,0}, MRI, HRI)) {
1997
1.25k
      if (isDead(R.Reg))
1998
48
        continue;
1999
1.20k
      Dead = false;
2000
1.20k
      break;
2001
1.20k
    }
2002
1.25k
    if (Dead)
2003
44
      return Reg;
2004
1.25k
  }
2005
74
  
return 030
;
2006
74
}
2007
2008
void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
2009
3.36k
      SmallVectorImpl<unsigned> &VRegs) const {
2010
3.36k
  auto &HST = MF.getSubtarget<HexagonSubtarget>();
2011
3.36k
  auto &HII = *HST.getInstrInfo();
2012
3.36k
  auto &HRI = *HST.getRegisterInfo();
2013
3.36k
  auto &MRI = MF.getRegInfo();
2014
3.36k
  HexagonBlockRanges HBR(MF);
2015
3.36k
2016
3.36k
  using BlockIndexMap =
2017
3.36k
      std::map<MachineBasicBlock *, HexagonBlockRanges::InstrIndexMap>;
2018
3.36k
  using BlockRangeMap =
2019
3.36k
      std::map<MachineBasicBlock *, HexagonBlockRanges::RangeList>;
2020
3.36k
  using IndexType = HexagonBlockRanges::IndexType;
2021
3.36k
2022
3.36k
  struct SlotInfo {
2023
3.36k
    BlockRangeMap Map;
2024
3.36k
    unsigned Size = 0;
2025
3.36k
    const TargetRegisterClass *RC = nullptr;
2026
3.36k
2027
3.36k
    SlotInfo() = defaul
t458
;
2028
3.36k
  };
2029
3.36k
2030
3.36k
  BlockIndexMap BlockIndexes;
2031
3.36k
  SmallSet<int,4> BadFIs;
2032
3.36k
  std::map<int,SlotInfo> FIRangeMap;
2033
3.36k
2034
3.36k
  // Accumulate register classes: get a common class for a pre-existing
2035
3.36k
  // class HaveRC and a new class NewRC. Return nullptr if a common class
2036
3.36k
  // cannot be found, otherwise return the resulting class. If HaveRC is
2037
3.36k
  // nullptr, assume that it is still unset.
2038
3.36k
  auto getCommonRC =
2039
3.36k
      [](const TargetRegisterClass *HaveRC,
2040
3.36k
         const TargetRegisterClass *NewRC) -> const TargetRegisterClass * {
2041
857
    if (HaveRC == nullptr || 
HaveRC == NewRC399
)
2042
856
      return NewRC;
2043
1
    // Different classes, both non-null. Pick the more general one.
2044
1
    if (HaveRC->hasSubClassEq(NewRC))
2045
0
      return HaveRC;
2046
1
    if (NewRC->hasSubClassEq(HaveRC))
2047
0
      return NewRC;
2048
1
    return nullptr;
2049
1
  };
2050
3.36k
2051
3.36k
  // Scan all blocks in the function. Check all occurrences of frame indexes,
2052
3.36k
  // and collect relevant information.
2053
5.57k
  for (auto &B : MF) {
2054
5.57k
    std::map<int,IndexType> LastStore, LastLoad;
2055
5.57k
    // Emplace appears not to be supported in gcc 4.7.2-4.
2056
5.57k
    //auto P = BlockIndexes.emplace(&B, HexagonBlockRanges::InstrIndexMap(B));
2057
5.57k
    auto P = BlockIndexes.insert(
2058
5.57k
                std::make_pair(&B, HexagonBlockRanges::InstrIndexMap(B)));
2059
5.57k
    auto &IndexMap = P.first->second;
2060
5.57k
    LLVM_DEBUG(dbgs() << "Index map for " << printMBBReference(B) << "\n"
2061
5.57k
                      << IndexMap << '\n');
2062
5.57k
2063
28.4k
    for (auto &In : B) {
2064
28.4k
      int LFI, SFI;
2065
28.4k
      bool Load = HII.isLoadFromStackSlot(In, LFI) && 
!HII.isPredicated(In)420
;
2066
28.4k
      bool Store = HII.isStoreToStackSlot(In, SFI) && 
!HII.isPredicated(In)437
;
2067
28.4k
      if (Load && 
Store420
) {
2068
0
        // If it's both a load and a store, then we won't handle it.
2069
0
        BadFIs.insert(LFI);
2070
0
        BadFIs.insert(SFI);
2071
0
        continue;
2072
0
      }
2073
28.4k
      // Check for register classes of the register used as the source for
2074
28.4k
      // the store, and the register used as the destination for the load.
2075
28.4k
      // Also, only accept base+imm_offset addressing modes. Other addressing
2076
28.4k
      // modes can have side-effects (post-increments, etc.). For stack
2077
28.4k
      // slots they are very unlikely, so there is not much loss due to
2078
28.4k
      // this restriction.
2079
28.4k
      if (Load || 
Store27.9k
) {
2080
857
        int TFI = Load ? 
LFI420
:
SFI437
;
2081
857
        unsigned AM = HII.getAddrMode(In);
2082
857
        SlotInfo &SI = FIRangeMap[TFI];
2083
857
        bool Bad = (AM != HexagonII::BaseImmOffset);
2084
857
        if (!Bad) {
2085
857
          // If the addressing mode is ok, check the register class.
2086
857
          unsigned OpNum = Load ? 
0420
:
2437
;
2087
857
          auto *RC = HII.getRegClass(In.getDesc(), OpNum, &HRI, MF);
2088
857
          RC = getCommonRC(SI.RC, RC);
2089
857
          if (RC == nullptr)
2090
1
            Bad = true;
2091
856
          else
2092
856
            SI.RC = RC;
2093
857
        }
2094
857
        if (!Bad) {
2095
856
          // Check sizes.
2096
856
          unsigned S = HII.getMemAccessSize(In);
2097
856
          if (SI.Size != 0 && 
SI.Size != S398
)
2098
0
            Bad = true;
2099
856
          else
2100
856
            SI.Size = S;
2101
856
        }
2102
857
        if (!Bad) {
2103
856
          for (auto *Mo : In.memoperands()) {
2104
849
            if (!Mo->isVolatile() && 
!Mo->isAtomic()803
)
2105
803
              continue;
2106
46
            Bad = true;
2107
46
            break;
2108
46
          }
2109
856
        }
2110
857
        if (Bad)
2111
47
          BadFIs.insert(TFI);
2112
857
      }
2113
28.4k
2114
28.4k
      // Locate uses of frame indices.
2115
114k
      for (unsigned i = 0, n = In.getNumOperands(); i < n; 
++i86.2k
) {
2116
86.2k
        const MachineOperand &Op = In.getOperand(i);
2117
86.2k
        if (!Op.isFI())
2118
85.0k
          continue;
2119
1.21k
        int FI = Op.getIndex();
2120
1.21k
        // Make sure that the following operand is an immediate and that
2121
1.21k
        // it is 0. This is the offset in the stack object.
2122
1.21k
        if (i+1 >= n || !In.getOperand(i+1).isImm() ||
2123
1.21k
            In.getOperand(i+1).getImm() != 0)
2124
76
          BadFIs.insert(FI);
2125
1.21k
        if (BadFIs.count(FI))
2126
272
          continue;
2127
946
2128
946
        IndexType Index = IndexMap.getIndex(&In);
2129
946
        if (Load) {
2130
373
          if (LastStore[FI] == IndexType::None)
2131
140
            LastStore[FI] = IndexType::Entry;
2132
373
          LastLoad[FI] = Index;
2133
573
        } else if (Store) {
2134
386
          HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
2135
386
          if (LastStore[FI] != IndexType::None)
2136
18
            RL.add(LastStore[FI], LastLoad[FI], false, false);
2137
368
          else if (LastLoad[FI] != IndexType::None)
2138
0
            RL.add(IndexType::Entry, LastLoad[FI], false, false);
2139
386
          LastLoad[FI] = IndexType::None;
2140
386
          LastStore[FI] = Index;
2141
386
        } else {
2142
187
          BadFIs.insert(FI);
2143
187
        }
2144
946
      }
2145
28.4k
    }
2146
5.57k
2147
5.57k
    for (auto &I : LastLoad) {
2148
508
      IndexType LL = I.second;
2149
508
      if (LL == IndexType::None)
2150
296
        continue;
2151
212
      auto &RL = FIRangeMap[I.first].Map[&B];
2152
212
      IndexType &LS = LastStore[I.first];
2153
212
      if (LS != IndexType::None)
2154
212
        RL.add(LS, LL, false, false);
2155
0
      else
2156
0
        RL.add(IndexType::Entry, LL, false, false);
2157
212
      LS = IndexType::None;
2158
212
    }
2159
5.57k
    for (auto &I : LastStore) {
2160
508
      IndexType LS = I.second;
2161
508
      if (LS == IndexType::None)
2162
212
        continue;
2163
296
      auto &RL = FIRangeMap[I.first].Map[&B];
2164
296
      RL.add(LS, IndexType::None, false, false);
2165
296
    }
2166
5.57k
  }
2167
3.36k
2168
3.36k
  LLVM_DEBUG({
2169
3.36k
    for (auto &P : FIRangeMap) {
2170
3.36k
      dbgs() << "fi#" << P.first;
2171
3.36k
      if (BadFIs.count(P.first))
2172
3.36k
        dbgs() << " (bad)";
2173
3.36k
      dbgs() << "  RC: ";
2174
3.36k
      if (P.second.RC != nullptr)
2175
3.36k
        dbgs() << HRI.getRegClassName(P.second.RC) << '\n';
2176
3.36k
      else
2177
3.36k
        dbgs() << "<null>\n";
2178
3.36k
      for (auto &R : P.second.Map)
2179
3.36k
        dbgs() << "  " << printMBBReference(*R.first) << " { " << R.second
2180
3.36k
               << "}\n";
2181
3.36k
    }
2182
3.36k
  });
2183
3.36k
2184
3.36k
  // When a slot is loaded from in a block without being stored to in the
2185
3.36k
  // same block, it is live-on-entry to this block. To avoid CFG analysis,
2186
3.36k
  // consider this slot to be live-on-exit from all blocks.
2187
3.36k
  SmallSet<int,4> LoxFIs;
2188
3.36k
2189
3.36k
  std::map<MachineBasicBlock*,std::vector<int>> BlockFIMap;
2190
3.36k
2191
3.36k
  for (auto &P : FIRangeMap) {
2192
458
    // P = pair(FI, map: BB->RangeList)
2193
458
    if (BadFIs.count(P.first))
2194
90
      continue;
2195
3.69k
    
for (auto &B : MF)368
{
2196
3.69k
      auto F = P.second.Map.find(&B);
2197
3.69k
      // F = pair(BB, RangeList)
2198
3.69k
      if (F == P.second.Map.end() || 
F->second.empty()484
)
2199
3.20k
        continue;
2200
484
      HexagonBlockRanges::IndexRange &IR = F->second.front();
2201
484
      if (IR.start() == IndexType::Entry)
2202
135
        LoxFIs.insert(P.first);
2203
484
      BlockFIMap[&B].push_back(P.first);
2204
484
    }
2205
368
  }
2206
3.36k
2207
3.36k
  LLVM_DEBUG({
2208
3.36k
    dbgs() << "Block-to-FI map (* -- live-on-exit):\n";
2209
3.36k
    for (auto &P : BlockFIMap) {
2210
3.36k
      auto &FIs = P.second;
2211
3.36k
      if (FIs.empty())
2212
3.36k
        continue;
2213
3.36k
      dbgs() << "  " << printMBBReference(*P.first) << ": {";
2214
3.36k
      for (auto I : FIs) {
2215
3.36k
        dbgs() << " fi#" << I;
2216
3.36k
        if (LoxFIs.count(I))
2217
3.36k
          dbgs() << '*';
2218
3.36k
      }
2219
3.36k
      dbgs() << " }\n";
2220
3.36k
    }
2221
3.36k
  });
2222
3.36k
2223
#ifndef NDEBUG
2224
  bool HasOptLimit = SpillOptMax.getPosition();
2225
#endif
2226
2227
3.36k
  // eliminate loads, when all loads eliminated, eliminate all stores.
2228
5.57k
  for (auto &B : MF) {
2229
5.57k
    auto F = BlockIndexes.find(&B);
2230
5.57k
    assert(F != BlockIndexes.end());
2231
5.57k
    HexagonBlockRanges::InstrIndexMap &IM = F->second;
2232
5.57k
    HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM);
2233
5.57k
    HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM);
2234
5.57k
    LLVM_DEBUG(dbgs() << printMBBReference(B) << " dead map\n"
2235
5.57k
                      << HexagonBlockRanges::PrintRangeMap(DM, HRI));
2236
5.57k
2237
5.57k
    for (auto FI : BlockFIMap[&B]) {
2238
484
      if (BadFIs.count(FI))
2239
0
        continue;
2240
484
      LLVM_DEBUG(dbgs() << "Working on fi#" << FI << '\n');
2241
484
      HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
2242
502
      for (auto &Range : RL) {
2243
502
        LLVM_DEBUG(dbgs() << "--Examining range:" << RL << '\n');
2244
502
        if (!IndexType::isInstr(Range.start()) ||
2245
502
            
!IndexType::isInstr(Range.end())367
)
2246
428
          continue;
2247
74
        MachineInstr &SI = *IM.getInstr(Range.start());
2248
74
        MachineInstr &EI = *IM.getInstr(Range.end());
2249
74
        assert(SI.mayStore() && "Unexpected start instruction");
2250
74
        assert(EI.mayLoad() && "Unexpected end instruction");
2251
74
        MachineOperand &SrcOp = SI.getOperand(2);
2252
74
2253
74
        HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(),
2254
74
                                                  SrcOp.getSubReg() };
2255
74
        auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI, MF);
2256
74
        // The this-> is needed to unconfuse MSVC.
2257
74
        unsigned FoundR = this->findPhysReg(MF, Range, IM, DM, RC);
2258
74
        LLVM_DEBUG(dbgs() << "Replacement reg:" << printReg(FoundR, &HRI)
2259
74
                          << '\n');
2260
74
        if (FoundR == 0)
2261
30
          continue;
2262
#ifndef NDEBUG
2263
        if (HasOptLimit) {
2264
          if (SpillOptCount >= SpillOptMax)
2265
            return;
2266
          SpillOptCount++;
2267
        }
2268
#endif
2269
2270
44
        // Generate the copy-in: "FoundR = COPY SrcR" at the store location.
2271
44
        MachineBasicBlock::iterator StartIt = SI.getIterator(), NextIt;
2272
44
        MachineInstr *CopyIn = nullptr;
2273
44
        if (SrcRR.Reg != FoundR || 
SrcRR.Sub != 00
) {
2274
44
          const DebugLoc &DL = SI.getDebugLoc();
2275
44
          CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR)
2276
44
                       .add(SrcOp);
2277
44
        }
2278
44
2279
44
        ++StartIt;
2280
44
        // Check if this is a last store and the FI is live-on-exit.
2281
44
        if (LoxFIs.count(FI) && 
(&Range == &RL.back())4
) {
2282
4
          // Update store's source register.
2283
4
          if (unsigned SR = SrcOp.getSubReg())
2284
0
            SrcOp.setReg(HRI.getSubReg(FoundR, SR));
2285
4
          else
2286
4
            SrcOp.setReg(FoundR);
2287
4
          SrcOp.setSubReg(0);
2288
4
          // We are keeping this register live.
2289
4
          SrcOp.setIsKill(false);
2290
40
        } else {
2291
40
          B.erase(&SI);
2292
40
          IM.replaceInstr(&SI, CopyIn);
2293
40
        }
2294
44
2295
44
        auto EndIt = std::next(EI.getIterator());
2296
3.03k
        for (auto It = StartIt; It != EndIt; 
It = NextIt2.99k
) {
2297
2.99k
          MachineInstr &MI = *It;
2298
2.99k
          NextIt = std::next(It);
2299
2.99k
          int TFI;
2300
2.99k
          if (!HII.isLoadFromStackSlot(MI, TFI) || 
TFI != FI779
)
2301
2.92k
            continue;
2302
69
          unsigned DstR = MI.getOperand(0).getReg();
2303
69
          assert(MI.getOperand(0).getSubReg() == 0);
2304
69
          MachineInstr *CopyOut = nullptr;
2305
69
          if (DstR != FoundR) {
2306
69
            DebugLoc DL = MI.getDebugLoc();
2307
69
            unsigned MemSize = HII.getMemAccessSize(MI);
2308
69
            assert(HII.getAddrMode(MI) == HexagonII::BaseImmOffset);
2309
69
            unsigned CopyOpc = TargetOpcode::COPY;
2310
69
            if (HII.isSignExtendingLoad(MI))
2311
0
              CopyOpc = (MemSize == 1) ? Hexagon::A2_sxtb : Hexagon::A2_sxth;
2312
69
            else if (HII.isZeroExtendingLoad(MI))
2313
0
              CopyOpc = (MemSize == 1) ? Hexagon::A2_zxtb : Hexagon::A2_zxth;
2314
69
            CopyOut = BuildMI(B, It, DL, HII.get(CopyOpc), DstR)
2315
69
                        .addReg(FoundR, getKillRegState(&MI == &EI));
2316
69
          }
2317
69
          IM.replaceInstr(&MI, CopyOut);
2318
69
          B.erase(It);
2319
69
        }
2320
44
2321
44
        // Update the dead map.
2322
44
        HexagonBlockRanges::RegisterRef FoundRR = { FoundR, 0 };
2323
44
        for (auto RR : HexagonBlockRanges::expandToSubRegs(FoundRR, MRI, HRI))
2324
48
          DM[RR].subtract(Range);
2325
44
      } // for Range in range list
2326
484
    }
2327
5.57k
  }
2328
3.36k
}
2329
2330
void HexagonFrameLowering::expandAlloca(MachineInstr *AI,
2331
6
      const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const {
2332
6
  MachineBasicBlock &MB = *AI->getParent();
2333
6
  DebugLoc DL = AI->getDebugLoc();
2334
6
  unsigned A = AI->getOperand(2).getImm();
2335
6
2336
6
  // Have
2337
6
  //    Rd  = alloca Rs, #A
2338
6
  //
2339
6
  // If Rs and Rd are different registers, use this sequence:
2340
6
  //    Rd  = sub(r29, Rs)
2341
6
  //    r29 = sub(r29, Rs)
2342
6
  //    Rd  = and(Rd, #-A)    ; if necessary
2343
6
  //    r29 = and(r29, #-A)   ; if necessary
2344
6
  //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
2345
6
  // otherwise, do
2346
6
  //    Rd  = sub(r29, Rs)
2347
6
  //    Rd  = and(Rd, #-A)    ; if necessary
2348
6
  //    r29 = Rd
2349
6
  //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
2350
6
2351
6
  MachineOperand &RdOp = AI->getOperand(0);
2352
6
  MachineOperand &RsOp = AI->getOperand(1);
2353
6
  unsigned Rd = RdOp.getReg(), Rs = RsOp.getReg();
2354
6
2355
6
  // Rd = sub(r29, Rs)
2356
6
  BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd)
2357
6
      .addReg(SP)
2358
6
      .addReg(Rs);
2359
6
  if (Rs != Rd) {
2360
2
    // r29 = sub(r29, Rs)
2361
2
    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP)
2362
2
        .addReg(SP)
2363
2
        .addReg(Rs);
2364
2
  }
2365
6
  if (A > 8) {
2366
1
    // Rd  = and(Rd, #-A)
2367
1
    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd)
2368
1
        .addReg(Rd)
2369
1
        .addImm(-int64_t(A));
2370
1
    if (Rs != Rd)
2371
1
      BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP)
2372
1
          .addReg(SP)
2373
1
          .addImm(-int64_t(A));
2374
1
  }
2375
6
  if (Rs == Rd) {
2376
4
    // r29 = Rd
2377
4
    BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP)
2378
4
        .addReg(Rd);
2379
4
  }
2380
6
  if (CF > 0) {
2381
1
    // Rd = add(Rd, #CF)
2382
1
    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd)
2383
1
        .addReg(Rd)
2384
1
        .addImm(CF);
2385
1
  }
2386
6
}
2387
2388
5.89k
bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {
2389
5.89k
  const MachineFrameInfo &MFI = MF.getFrameInfo();
2390
5.89k
  if (!MFI.hasVarSizedObjects())
2391
5.88k
    return false;
2392
6
  unsigned MaxA = MFI.getMaxAlignment();
2393
6
  if (MaxA <= getStackAlignment())
2394
4
    return false;
2395
2
  return true;
2396
2
}
2397
2398
const MachineInstr *HexagonFrameLowering::getAlignaInstr(
2399
3
      const MachineFunction &MF) const {
2400
3
  for (auto &B : MF)
2401
15
    for (auto &I : B)
2402
54
      if (I.getOpcode() == Hexagon::PS_aligna)
2403
1
        return &I;
2404
3
  
return nullptr2
;
2405
3
}
2406
2407
/// Adds all callee-saved registers as implicit uses or defs to the
2408
/// instruction.
2409
void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI,
2410
25
      const CSIVect &CSI, bool IsDef, bool IsKill) const {
2411
25
  // Add the callee-saved registers as implicit uses.
2412
25
  for (auto &R : CSI)
2413
81
    MI->addOperand(MachineOperand::CreateReg(R.getReg(), IsDef, true, IsKill));
2414
25
}
2415
2416
/// Determine whether the callee-saved register saves and restores should
2417
/// be generated via inline code. If this function returns "true", inline
2418
/// code will be generated. If this function returns "false", additional
2419
/// checks are performed, which may still lead to the inline code.
2420
bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
2421
260
      const CSIVect &CSI) const {
2422
260
  if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
2423
3
    return true;
2424
257
  if (!hasFP(MF))
2425
30
    return true;
2426
227
  if (!isOptSize(MF) && 
!isMinSize(MF)205
)
2427
195
    if (MF.getTarget().getOptLevel() > CodeGenOpt::Default)
2428
7
      return true;
2429
220
2430
220
  // Check if CSI only has double registers, and if the registers form
2431
220
  // a contiguous block starting from D8.
2432
220
  BitVector Regs(Hexagon::NUM_TARGET_REGS);
2433
629
  for (unsigned i = 0, n = CSI.size(); i < n; 
++i409
) {
2434
409
    unsigned R = CSI[i].getReg();
2435
409
    if (!Hexagon::DoubleRegsRegClass.contains(R))
2436
0
      return true;
2437
409
    Regs[R] = true;
2438
409
  }
2439
220
  int F = Regs.find_first();
2440
220
  if (F != Hexagon::D8)
2441
0
    return true;
2442
629
  
while (220
F >= 0) {
2443
409
    int N = Regs.find_next(F);
2444
409
    if (N >= 0 && 
N != F+1189
)
2445
0
      return true;
2446
409
    F = N;
2447
409
  }
2448
220
2449
220
  return false;
2450
220
}
2451
2452
bool HexagonFrameLowering::useSpillFunction(const MachineFunction &MF,
2453
131
      const CSIVect &CSI) const {
2454
131
  if (shouldInlineCSR(MF, CSI))
2455
20
    return false;
2456
111
  unsigned NumCSI = CSI.size();
2457
111
  if (NumCSI <= 1)
2458
69
    return false;
2459
42
2460
42
  unsigned Threshold = isOptSize(MF) ? 
SpillFuncThresholdOs6
2461
42
                                     : 
SpillFuncThreshold36
;
2462
42
  return Threshold < NumCSI;
2463
42
}
2464
2465
bool HexagonFrameLowering::useRestoreFunction(const MachineFunction &MF,
2466
129
      const CSIVect &CSI) const {
2467
129
  if (shouldInlineCSR(MF, CSI))
2468
20
    return false;
2469
109
  // The restore functions do a bit more than just restoring registers.
2470
109
  // The non-returning versions will go back directly to the caller's
2471
109
  // caller, others will clean up the stack frame in preparation for
2472
109
  // a tail call. Using them can still save code size even if only one
2473
109
  // register is getting restores. Make the decision based on -Oz:
2474
109
  // using -Os will use inline restore for a single register.
2475
109
  if (isMinSize(MF))
2476
5
    return true;
2477
104
  unsigned NumCSI = CSI.size();
2478
104
  if (NumCSI <= 1)
2479
63
    return false;
2480
41
2481
41
  unsigned Threshold = isOptSize(MF) ? 
SpillFuncThresholdOs-16
2482
41
                                     : 
SpillFuncThreshold35
;
2483
41
  return Threshold < NumCSI;
2484
41
}
2485
2486
4.96k
bool HexagonFrameLowering::mayOverflowFrameOffset(MachineFunction &MF) const {
2487
4.96k
  unsigned StackSize = MF.getFrameInfo().estimateStackSize(MF);
2488
4.96k
  auto &HST = MF.getSubtarget<HexagonSubtarget>();
2489
4.96k
  // A fairly simplistic guess as to whether a potential load/store to a
2490
4.96k
  // stack location could require an extra register.
2491
4.96k
  if (HST.useHVXOps() && 
StackSize > 2561.84k
)
2492
16
    return true;
2493
4.95k
2494
4.95k
  // Check if the function has store-immediate instructions that access
2495
4.95k
  // the stack. Since the offset field is not extendable, if the stack
2496
4.95k
  // size exceeds the offset limit (6 bits, shifted), the stores will
2497
4.95k
  // require a new base register.
2498
4.95k
  bool HasImmStack = false;
2499
4.95k
  unsigned MinLS = ~0u;   // Log_2 of the memory access size.
2500
4.95k
2501
7.02k
  for (const MachineBasicBlock &B : MF) {
2502
30.1k
    for (const MachineInstr &MI : B) {
2503
30.1k
      unsigned LS = 0;
2504
30.1k
      switch (MI.getOpcode()) {
2505
30.1k
        case Hexagon::S4_storeirit_io:
2506
226
        case Hexagon::S4_storeirif_io:
2507
226
        case Hexagon::S4_storeiri_io:
2508
226
          ++LS;
2509
226
          LLVM_FALLTHROUGH;
2510
251
        case Hexagon::S4_storeirht_io:
2511
251
        case Hexagon::S4_storeirhf_io:
2512
251
        case Hexagon::S4_storeirh_io:
2513
251
          ++LS;
2514
251
          LLVM_FALLTHROUGH;
2515
289
        case Hexagon::S4_storeirbt_io:
2516
289
        case Hexagon::S4_storeirbf_io:
2517
289
        case Hexagon::S4_storeirb_io:
2518
289
          if (MI.getOperand(0).isFI())
2519
99
            HasImmStack = true;
2520
289
          MinLS = std::min(MinLS, LS);
2521
289
          break;
2522
30.1k
      }
2523
30.1k
    }
2524
7.02k
  }
2525
4.95k
2526
4.95k
  if (HasImmStack)
2527
52
    return !isUInt<6>(StackSize >> MinLS);
2528
4.89k
2529
4.89k
  return false;
2530
4.89k
}