Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// \brief Try to reassign registers on GFX10+ from non-sequential to sequential
11
/// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA
12
/// with sequential versions where possible.
13
///
14
//===----------------------------------------------------------------------===//
15
16
#include "AMDGPU.h"
17
#include "AMDGPUSubtarget.h"
18
#include "SIInstrInfo.h"
19
#include "SIMachineFunctionInfo.h"
20
#include "llvm/ADT/Statistic.h"
21
#include "llvm/CodeGen/LiveInterval.h"
22
#include "llvm/CodeGen/LiveIntervals.h"
23
#include "llvm/CodeGen/LiveRegMatrix.h"
24
#include "llvm/CodeGen/MachineFunctionPass.h"
25
#include "llvm/CodeGen/VirtRegMap.h"
26
#include "llvm/Support/MathExtras.h"
27
#include <algorithm>
28
29
using namespace llvm;
30
31
#define DEBUG_TYPE "amdgpu-nsa-reassign"
32
33
STATISTIC(NumNSAInstructions,
34
          "Number of NSA instructions with non-sequential address found");
35
STATISTIC(NumNSAConverted,
36
          "Number of NSA instructions changed to sequential");
37
38
namespace {
39
40
class GCNNSAReassign : public MachineFunctionPass {
41
public:
42
  static char ID;
43
44
2.39k
  GCNNSAReassign() : MachineFunctionPass(ID) {
45
2.39k
    initializeGCNNSAReassignPass(*PassRegistry::getPassRegistry());
46
2.39k
  }
47
48
  bool runOnMachineFunction(MachineFunction &MF) override;
49
50
27.6k
  StringRef getPassName() const override { return "GCN NSA Reassign"; }
51
52
2.37k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
53
2.37k
    AU.addRequired<LiveIntervals>();
54
2.37k
    AU.addRequired<VirtRegMap>();
55
2.37k
    AU.addRequired<LiveRegMatrix>();
56
2.37k
    AU.setPreservesAll();
57
2.37k
    MachineFunctionPass::getAnalysisUsage(AU);
58
2.37k
  }
59
60
private:
61
  typedef enum {
62
    NOT_NSA,        // Not an NSA instruction
63
    FIXED,          // NSA which we cannot modify
64
    NON_CONTIGUOUS, // NSA with non-sequential address which we can try
65
                    // to optimize.
66
    CONTIGUOUS      // NSA with all sequential address registers
67
  } NSA_Status;
68
69
  const GCNSubtarget *ST;
70
71
  const MachineRegisterInfo *MRI;
72
73
  const SIRegisterInfo *TRI;
74
75
  VirtRegMap *VRM;
76
77
  LiveRegMatrix *LRM;
78
79
  LiveIntervals *LIS;
80
81
  unsigned MaxNumVGPRs;
82
83
  const MCPhysReg *CSRegs;
84
85
  NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const;
86
87
  bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
88
                          unsigned StartReg) const;
89
90
  bool canAssign(unsigned StartReg, unsigned NumRegs) const;
91
92
  bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const;
93
};
94
95
} // End anonymous namespace.
96
97
101k
INITIALIZE_PASS_BEGIN(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
98
101k
                      false, false)
99
101k
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
100
101k
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
101
101k
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
102
101k
INITIALIZE_PASS_END(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
103
                    false, false)
104
105
106
char GCNNSAReassign::ID = 0;
107
108
char &llvm::GCNNSAReassignID = GCNNSAReassign::ID;
109
110
bool
111
GCNNSAReassign::tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
112
57
                                   unsigned StartReg) const {
113
57
  unsigned NumRegs = Intervals.size();
114
57
115
234
  for (unsigned N = 0; N < NumRegs; 
++N177
)
116
177
    if (VRM->hasPhys(Intervals[N]->reg))
117
31
      LRM->unassign(*Intervals[N]);
118
57
119
89
  for (unsigned N = 0; N < NumRegs; 
++N32
)
120
79
    if (LRM->checkInterference(*Intervals[N], StartReg + N))
121
47
      return false;
122
57
123
57
  
for (unsigned N = 0; 10
N < NumRegs41
;
++N31
)
124
31
    LRM->assign(*Intervals[N], StartReg + N);
125
10
126
10
  return true;
127
57
}
128
129
57
bool GCNNSAReassign::canAssign(unsigned StartReg, unsigned NumRegs) const {
130
234
  for (unsigned N = 0; N < NumRegs; 
++N177
) {
131
177
    unsigned Reg = StartReg + N;
132
177
    if (!MRI->isAllocatable(Reg))
133
0
      return false;
134
177
135
177
    for (unsigned I = 0; CSRegs[I]; 
++I0
)
136
0
      if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&
137
0
          !LRM->isPhysRegUsed(CSRegs[I]))
138
0
      return false;
139
177
  }
140
57
141
57
  return true;
142
57
}
143
144
bool
145
10
GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const {
146
10
  unsigned NumRegs = Intervals.size();
147
10
148
10
  if (NumRegs > MaxNumVGPRs)
149
0
    return false;
150
10
  unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;
151
10
152
57
  for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; 
++Reg47
) {
153
57
    if (!canAssign(Reg, NumRegs))
154
0
      continue;
155
57
156
57
    if (tryAssignRegisters(Intervals, Reg))
157
10
      return true;
158
57
  }
159
10
160
10
  
return false0
;
161
10
}
162
163
GCNNSAReassign::NSA_Status
164
28.8k
GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
165
28.8k
  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
166
28.8k
  if (!Info || 
Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA254
)
167
28.7k
    return NSA_Status::NOT_NSA;
168
125
169
125
  int VAddr0Idx =
170
125
    AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
171
125
172
125
  unsigned VgprBase = 0;
173
125
  bool NSA = false;
174
230
  for (unsigned I = 0; I < Info->VAddrDwords; 
++I105
) {
175
200
    const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);
176
200
    unsigned Reg = Op.getReg();
177
200
    if (TargetRegisterInfo::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg))
178
0
      return NSA_Status::FIXED;
179
200
180
200
    unsigned PhysReg = VRM->getPhys(Reg);
181
200
182
200
    if (!Fast) {
183
153
      if (!PhysReg)
184
0
        return NSA_Status::FIXED;
185
153
186
153
      // Bail if address is not a VGPR32. That should be possible to extend the
187
153
      // optimization to work with subregs of a wider register tuples, but the
188
153
      // logic to find free registers will be much more complicated with much
189
153
      // less chances for success. That seems reasonable to assume that in most
190
153
      // cases a tuple is used because a vector variable contains different
191
153
      // parts of an address and it is either already consequitive or cannot
192
153
      // be reassigned if not. If needed it is better to rely on register
193
153
      // coalescer to process such address tuples.
194
153
      if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg())
195
0
        return NSA_Status::FIXED;
196
153
197
153
      const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
198
153
199
153
      if (Def && Def->isCopy() && 
Def->getOperand(1).getReg() == PhysReg122
)
200
95
        return NSA_Status::FIXED;
201
58
202
72
      
for (auto U : MRI->use_nodbg_operands(Reg))58
{
203
72
        if (U.isImplicit())
204
0
          return NSA_Status::FIXED;
205
72
        const MachineInstr *UseInst = U.getParent();
206
72
        if (UseInst->isCopy() && 
UseInst->getOperand(0).getReg() == PhysReg0
)
207
0
          return NSA_Status::FIXED;
208
72
      }
209
58
210
58
      if (!LIS->hasInterval(Reg))
211
0
        return NSA_Status::FIXED;
212
105
    }
213
105
214
105
    if (I == 0)
215
32
      VgprBase = PhysReg;
216
73
    else if (VgprBase + I != PhysReg)
217
47
      NSA = true;
218
105
  }
219
125
220
125
  
return NSA 30
?
NSA_Status::NON_CONTIGUOUS23
:
NSA_Status::CONTIGUOUS7
;
221
125
}
222
223
25.2k
bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
224
25.2k
  ST = &MF.getSubtarget<GCNSubtarget>();
225
25.2k
  if (ST->getGeneration() < GCNSubtarget::GFX10)
226
23.1k
    return false;
227
2.10k
228
2.10k
  MRI = &MF.getRegInfo();
229
2.10k
  TRI = ST->getRegisterInfo();
230
2.10k
  VRM = &getAnalysis<VirtRegMap>();
231
2.10k
  LRM = &getAnalysis<LiveRegMatrix>();
232
2.10k
  LIS = &getAnalysis<LiveIntervals>();
233
2.10k
234
2.10k
  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
235
2.10k
  MaxNumVGPRs = ST->getMaxNumVGPRs(MF);
236
2.10k
  MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy()), MaxNumVGPRs);
237
2.10k
  CSRegs = MRI->getCalleeSavedRegs();
238
2.10k
239
2.10k
  using Candidate = std::pair<const MachineInstr*, bool>;
240
2.10k
  SmallVector<Candidate, 32> Candidates;
241
2.52k
  for (const MachineBasicBlock &MBB : MF) {
242
28.8k
    for (const MachineInstr &MI : MBB) {
243
28.8k
      switch (CheckNSA(MI)) {
244
28.8k
      default:
245
28.7k
        continue;
246
28.8k
      case NSA_Status::CONTIGUOUS:
247
5
        Candidates.push_back(std::make_pair(&MI, true));
248
5
        break;
249
28.8k
      case NSA_Status::NON_CONTIGUOUS:
250
11
        Candidates.push_back(std::make_pair(&MI, false));
251
11
        ++NumNSAInstructions;
252
11
        break;
253
28.8k
      }
254
28.8k
    }
255
2.52k
  }
256
2.10k
257
2.10k
  bool Changed = false;
258
2.10k
  for (auto &C : Candidates) {
259
16
    if (C.second)
260
5
      continue;
261
11
262
11
    const MachineInstr *MI = C.first;
263
11
    if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) {
264
0
      // Already happen to be fixed.
265
0
      C.second = true;
266
0
      ++NumNSAConverted;
267
0
      continue;
268
0
    }
269
11
270
11
    const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI->getOpcode());
271
11
    int VAddr0Idx =
272
11
      AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0);
273
11
274
11
    SmallVector<LiveInterval *, 16> Intervals;
275
11
    SmallVector<unsigned, 16> OrigRegs;
276
11
    SlotIndex MinInd, MaxInd;
277
43
    for (unsigned I = 0; I < Info->VAddrDwords; 
++I32
) {
278
33
      const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
279
33
      unsigned Reg = Op.getReg();
280
33
      LiveInterval *LI = &LIS->getInterval(Reg);
281
33
      if (llvm::find(Intervals, LI) != Intervals.end()) {
282
1
        // Same register used, unable to make sequential
283
1
        Intervals.clear();
284
1
        break;
285
1
      }
286
32
      Intervals.push_back(LI);
287
32
      OrigRegs.push_back(VRM->getPhys(Reg));
288
32
      MinInd = I ? 
std::min(MinInd, LI->beginIndex())21
:
LI->beginIndex()11
;
289
32
      MaxInd = I ? 
std::max(MaxInd, LI->endIndex())21
:
LI->endIndex()11
;
290
32
    }
291
11
292
11
    if (Intervals.empty())
293
1
      continue;
294
10
295
10
    LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI
296
10
                      << "\tOriginal allocation:\t";
297
10
               for(auto *LI : Intervals)
298
10
                 dbgs() << " " << llvm::printReg((VRM->getPhys(LI->reg)), TRI);
299
10
               dbgs() << '\n');
300
10
301
10
    bool Success = scavengeRegs(Intervals);
302
10
    if (!Success) {
303
0
      LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n");
304
0
      if (VRM->hasPhys(Intervals.back()->reg)) // Did not change allocation.
305
0
        continue;
306
10
    } else {
307
10
      // Check we did not make it worse for other instructions.
308
10
      auto I = std::lower_bound(Candidates.begin(), &C, MinInd,
309
10
                                [this](const Candidate &C, SlotIndex I) {
310
2
                                  return LIS->getInstructionIndex(*C.first) < I;
311
2
                                });
312
23
      for (auto E = Candidates.end(); Success && 
I != E22
&&
313
23
              
LIS->getInstructionIndex(*I->first) < MaxInd13
;
++I13
) {
314
13
        if (I->second && 
CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS3
) {
315
1
          Success = false;
316
1
          LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first);
317
1
        }
318
13
      }
319
10
    }
320
10
321
10
    if (!Success) {
322
4
      for (unsigned I = 0; I < Info->VAddrDwords; 
++I3
)
323
3
        if (VRM->hasPhys(Intervals[I]->reg))
324
3
          LRM->unassign(*Intervals[I]);
325
1
326
4
      for (unsigned I = 0; I < Info->VAddrDwords; 
++I3
)
327
3
        LRM->assign(*Intervals[I], OrigRegs[I]);
328
1
329
1
      continue;
330
1
    }
331
9
332
9
    C.second = true;
333
9
    ++NumNSAConverted;
334
9
    LLVM_DEBUG(dbgs() << "\tNew allocation:\t\t ["
335
9
                 << llvm::printReg((VRM->getPhys(Intervals.front()->reg)), TRI)
336
9
                 << " : "
337
9
                 << llvm::printReg((VRM->getPhys(Intervals.back()->reg)), TRI)
338
9
                 << "]\n");
339
9
    Changed = true;
340
9
  }
341
2.10k
342
2.10k
  return Changed;
343
2.10k
}