Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
10
// SGPR spills, so must insert CSR SGPR spills as well as expand them.
11
//
12
// This pass must never create new SGPR virtual registers.
13
//
14
// FIXME: Must stop RegScavenger spills in later passes.
15
//
16
//===----------------------------------------------------------------------===//
17
18
#include "AMDGPU.h"
19
#include "AMDGPUSubtarget.h"
20
#include "SIInstrInfo.h"
21
#include "SIMachineFunctionInfo.h"
22
#include "llvm/CodeGen/LiveIntervals.h"
23
#include "llvm/CodeGen/MachineBasicBlock.h"
24
#include "llvm/CodeGen/MachineFunction.h"
25
#include "llvm/CodeGen/MachineFunctionPass.h"
26
#include "llvm/CodeGen/MachineInstr.h"
27
#include "llvm/CodeGen/MachineInstrBuilder.h"
28
#include "llvm/CodeGen/MachineOperand.h"
29
#include "llvm/CodeGen/VirtRegMap.h"
30
#include "llvm/Target/TargetMachine.h"
31
32
using namespace llvm;
33
34
#define DEBUG_TYPE "si-lower-sgpr-spills"
35
36
using MBBVector = SmallVector<MachineBasicBlock *, 4>;
37
38
namespace {
39
40
static cl::opt<bool> EnableSpillVGPRToAGPR(
41
  "amdgpu-spill-vgpr-to-agpr",
42
  cl::desc("Enable spilling VGPRs to AGPRs"),
43
  cl::ReallyHidden,
44
  cl::init(true));
45
46
class SILowerSGPRSpills : public MachineFunctionPass {
47
private:
48
  const SIRegisterInfo *TRI = nullptr;
49
  const SIInstrInfo *TII = nullptr;
50
  VirtRegMap *VRM = nullptr;
51
  LiveIntervals *LIS = nullptr;
52
53
  // Save and Restore blocks of the current function. Typically there is a
54
  // single save block, unless Windows EH funclets are involved.
55
  MBBVector SaveBlocks;
56
  MBBVector RestoreBlocks;
57
58
public:
59
  static char ID;
60
61
2.44k
  SILowerSGPRSpills() : MachineFunctionPass(ID) {}
62
63
  void calculateSaveRestoreBlocks(MachineFunction &MF);
64
  bool spillCalleeSavedRegs(MachineFunction &MF);
65
66
  bool runOnMachineFunction(MachineFunction &MF) override;
67
68
2.41k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
69
2.41k
    AU.setPreservesAll();
70
2.41k
    MachineFunctionPass::getAnalysisUsage(AU);
71
2.41k
  }
72
};
73
74
} // end anonymous namespace
75
76
char SILowerSGPRSpills::ID = 0;
77
78
101k
INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
79
101k
                      "SI lower SGPR spill instructions", false, false)
80
101k
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
81
101k
INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
82
                    "SI lower SGPR spill instructions", false, false)
83
84
char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
85
86
/// Insert restore code for the callee-saved registers used in the function.
87
static void insertCSRSaves(MachineBasicBlock &SaveBlock,
88
                           ArrayRef<CalleeSavedInfo> CSI,
89
47
                           LiveIntervals *LIS) {
90
47
  MachineFunction &MF = *SaveBlock.getParent();
91
47
  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
92
47
  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
93
47
  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
94
47
95
47
  MachineBasicBlock::iterator I = SaveBlock.begin();
96
47
  if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
97
444
    for (const CalleeSavedInfo &CS : CSI) {
98
444
      // Insert the spill to the stack frame.
99
444
      unsigned Reg = CS.getReg();
100
444
101
444
      MachineInstrSpan MIS(I, &SaveBlock);
102
444
      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
103
444
104
444
      TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
105
444
                              TRI);
106
444
107
444
      if (LIS) {
108
0
        assert(std::distance(MIS.begin(), I) == 1);
109
0
        MachineInstr &Inst = *std::prev(I);
110
0
111
0
        LIS->InsertMachineInstrInMaps(Inst);
112
0
        LIS->removeAllRegUnitsForPhysReg(Reg);
113
0
      }
114
444
    }
115
47
  }
116
47
}
117
118
/// Insert restore code for the callee-saved registers used in the function.
119
static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
120
                              std::vector<CalleeSavedInfo> &CSI,
121
47
                              LiveIntervals *LIS) {
122
47
  MachineFunction &MF = *RestoreBlock.getParent();
123
47
  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
124
47
  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
125
47
  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
126
47
127
47
  // Restore all registers immediately before the return and any
128
47
  // terminators that precede it.
129
47
  MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
130
47
131
47
  // FIXME: Just emit the readlane/writelane directly
132
47
  if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
133
444
    for (const CalleeSavedInfo &CI : reverse(CSI)) {
134
444
      unsigned Reg = CI.getReg();
135
444
      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
136
444
137
444
      TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
138
444
      assert(I != RestoreBlock.begin() &&
139
444
             "loadRegFromStackSlot didn't insert any code!");
140
444
      // Insert in reverse order.  loadRegFromStackSlot can insert
141
444
      // multiple instructions.
142
444
143
444
      if (LIS) {
144
0
        MachineInstr &Inst = *std::prev(I);
145
0
        LIS->InsertMachineInstrInMaps(Inst);
146
0
        LIS->removeAllRegUnitsForPhysReg(Reg);
147
0
      }
148
444
    }
149
47
  }
150
47
}
151
152
/// Compute the sets of entry and return blocks for saving and restoring
153
/// callee-saved registers, and placing prolog and epilog code.
154
25.4k
void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
155
25.4k
  const MachineFrameInfo &MFI = MF.getFrameInfo();
156
25.4k
157
25.4k
  // Even when we do not change any CSR, we still want to insert the
158
25.4k
  // prologue and epilogue of the function.
159
25.4k
  // So set the save points for those.
160
25.4k
161
25.4k
  // Use the points found by shrink-wrapping, if any.
162
25.4k
  if (MFI.getSavePoint()) {
163
0
    SaveBlocks.push_back(MFI.getSavePoint());
164
0
    assert(MFI.getRestorePoint() && "Both restore and save must be set");
165
0
    MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
166
0
    // If RestoreBlock does not have any successor and is not a return block
167
0
    // then the end point is unreachable and we do not need to insert any
168
0
    // epilogue.
169
0
    if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
170
0
      RestoreBlocks.push_back(RestoreBlock);
171
0
    return;
172
0
  }
173
25.4k
174
25.4k
  // Save refs to entry and return blocks.
175
25.4k
  SaveBlocks.push_back(&MF.front());
176
29.1k
  for (MachineBasicBlock &MBB : MF) {
177
29.1k
    if (MBB.isEHFuncletEntry())
178
0
      SaveBlocks.push_back(&MBB);
179
29.1k
    if (MBB.isReturnBlock())
180
25.4k
      RestoreBlocks.push_back(&MBB);
181
29.1k
  }
182
25.4k
}
183
184
25.4k
bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
185
25.4k
  MachineRegisterInfo &MRI = MF.getRegInfo();
186
25.4k
  const Function &F = MF.getFunction();
187
25.4k
  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
188
25.4k
  const SIFrameLowering *TFI = ST.getFrameLowering();
189
25.4k
  MachineFrameInfo &MFI = MF.getFrameInfo();
190
25.4k
  RegScavenger *RS = nullptr;
191
25.4k
192
25.4k
  // Determine which of the registers in the callee save list should be saved.
193
25.4k
  BitVector SavedRegs;
194
25.4k
  TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
195
25.4k
196
25.4k
  // Add the code to save and restore the callee saved registers.
197
25.4k
  if (!F.hasFnAttribute(Attribute::Naked)) {
198
25.4k
    // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
199
25.4k
    // necessary for verifier liveness checks.
200
25.4k
    MFI.setCalleeSavedInfoValid(true);
201
25.4k
202
25.4k
    std::vector<CalleeSavedInfo> CSI;
203
25.4k
    const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
204
25.4k
205
698k
    for (unsigned I = 0; CSRegs[I]; 
++I672k
) {
206
672k
      unsigned Reg = CSRegs[I];
207
672k
      if (SavedRegs.test(Reg)) {
208
444
        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
209
444
        int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
210
444
                                           TRI->getSpillAlignment(*RC),
211
444
                                           true);
212
444
213
444
        CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
214
444
      }
215
672k
    }
216
25.4k
217
25.4k
    if (!CSI.empty()) {
218
47
      for (MachineBasicBlock *SaveBlock : SaveBlocks)
219
47
        insertCSRSaves(*SaveBlock, CSI, LIS);
220
47
221
47
      for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
222
47
        insertCSRRestores(*RestoreBlock, CSI, LIS);
223
47
      return true;
224
47
    }
225
25.3k
  }
226
25.3k
227
25.3k
  return false;
228
25.3k
}
229
230
25.4k
bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
231
25.4k
  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
232
25.4k
  TII = ST.getInstrInfo();
233
25.4k
  TRI = &TII->getRegisterInfo();
234
25.4k
235
25.4k
  VRM = getAnalysisIfAvailable<VirtRegMap>();
236
25.4k
237
25.4k
  assert(SaveBlocks.empty() && RestoreBlocks.empty());
238
25.4k
239
25.4k
  // First, expose any CSR SGPR spills. This is mostly the same as what PEI
240
25.4k
  // does, but somewhat simpler.
241
25.4k
  calculateSaveRestoreBlocks(MF);
242
25.4k
  bool HasCSRs = spillCalleeSavedRegs(MF);
243
25.4k
244
25.4k
  MachineFrameInfo &MFI = MF.getFrameInfo();
245
25.4k
  if (!MFI.hasStackObjects() && 
!HasCSRs24.4k
) {
246
24.4k
    SaveBlocks.clear();
247
24.4k
    RestoreBlocks.clear();
248
24.4k
    return false;
249
24.4k
  }
250
968
251
968
  MachineRegisterInfo &MRI = MF.getRegInfo();
252
968
  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
253
968
  const bool SpillVGPRToAGPR = ST.hasMAIInsts() && 
FuncInfo->hasSpilledVGPRs()14
254
968
    && 
EnableSpillVGPRToAGPR14
;
255
968
256
968
  bool MadeChange = false;
257
968
258
968
  const bool SpillToAGPR = EnableSpillVGPRToAGPR && 
ST.hasMAIInsts()964
;
259
968
260
968
  // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
261
968
  // handled as SpilledToReg in regular PrologEpilogInserter.
262
968
  if ((TRI->spillSGPRToVGPR() && 
(941
HasCSRs941
||
FuncInfo->hasSpilledSGPRs()894
)) ||
263
968
      
SpillVGPRToAGPR770
) {
264
208
    // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
265
208
    // are spilled to VGPRs, in which case we can eliminate the stack usage.
266
208
    //
267
208
    // This operates under the assumption that only other SGPR spills are users
268
208
    // of the frame index.
269
422
    for (MachineBasicBlock &MBB : MF) {
270
422
      MachineBasicBlock::iterator Next;
271
8.66k
      for (auto I = MBB.begin(), E = MBB.end(); I != E; 
I = Next8.23k
) {
272
8.23k
        MachineInstr &MI = *I;
273
8.23k
        Next = std::next(I);
274
8.23k
275
8.23k
        if (SpillToAGPR && 
TII->isVGPRSpill(MI)512
) {
276
91
          // Try to eliminate stack used by VGPR spills before frame
277
91
          // finalization.
278
91
          unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
279
91
                                                     AMDGPU::OpName::vaddr);
280
91
          int FI = MI.getOperand(FIOp).getIndex();
281
91
          unsigned VReg = TII->getNamedOperand(MI, AMDGPU::OpName::vdata)
282
91
            ->getReg();
283
91
          if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
284
91
                                                TRI->isAGPR(MRI, VReg))) {
285
73
            TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr);
286
73
            continue;
287
73
          }
288
8.16k
        }
289
8.16k
290
8.16k
        if (!TII->isSGPRSpill(MI))
291
6.51k
          continue;
292
1.65k
293
1.65k
        int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
294
1.65k
        assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
295
1.65k
        if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
296
1.65k
          bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
297
1.65k
          (void)Spilled;
298
1.65k
          assert(Spilled && "failed to spill SGPR to VGPR when allocated");
299
1.65k
        }
300
1.65k
      }
301
422
    }
302
208
303
422
    for (MachineBasicBlock &MBB : MF) {
304
422
      for (auto SSpill : FuncInfo->getSGPRSpillVGPRs())
305
442
        MBB.addLiveIn(SSpill.VGPR);
306
422
307
422
      for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
308
586
        MBB.addLiveIn(Reg);
309
422
310
422
      for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
311
23
        MBB.addLiveIn(Reg);
312
422
313
422
      MBB.sortUniqueLiveIns();
314
422
    }
315
208
316
208
    MadeChange = true;
317
208
  }
318
968
319
968
  SaveBlocks.clear();
320
968
  RestoreBlocks.clear();
321
968
322
968
  return MadeChange;
323
968
}