Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
/// \file
11
/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12
/// This pass is merging consecutive CFAlus where applicable.
13
/// It needs to be called after IfCvt for best results.
14
//===----------------------------------------------------------------------===//
15
16
#include "AMDGPU.h"
17
#include "AMDGPUSubtarget.h"
18
#include "R600Defines.h"
19
#include "R600InstrInfo.h"
20
#include "R600MachineFunctionInfo.h"
21
#include "R600RegisterInfo.h"
22
#include "llvm/CodeGen/MachineFunctionPass.h"
23
#include "llvm/CodeGen/MachineInstrBuilder.h"
24
#include "llvm/CodeGen/MachineRegisterInfo.h"
25
#include "llvm/Support/Debug.h"
26
#include "llvm/Support/raw_ostream.h"
27
28
using namespace llvm;
29
30
#define DEBUG_TYPE "r600mergeclause"
31
32
namespace {
33
34
123k
static bool isCFAlu(const MachineInstr &MI) {
35
123k
  switch (MI.getOpcode()) {
36
8.68k
  case AMDGPU::CF_ALU:
37
8.68k
  case AMDGPU::CF_ALU_PUSH_BEFORE:
38
8.68k
    return true;
39
114k
  default:
40
114k
    return false;
41
0
  }
42
0
}
43
44
class R600ClauseMergePass : public MachineFunctionPass {
45
46
private:
47
  const R600InstrInfo *TII;
48
49
  unsigned getCFAluSize(const MachineInstr &MI) const;
50
  bool isCFAluEnabled(const MachineInstr &MI) const;
51
52
  /// IfCvt pass can generate "disabled" ALU clause marker that need to be
53
  /// removed and their content affected to the previous alu clause.
54
  /// This function parse instructions after CFAlu until it find a disabled
55
  /// CFAlu and merge the content, or an enabled CFAlu.
56
  void cleanPotentialDisabledCFAlu(MachineInstr &CFAlu) const;
57
58
  /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
59
  /// it is the case.
60
  bool mergeIfPossible(MachineInstr &RootCFAlu,
61
                       const MachineInstr &LatrCFAlu) const;
62
63
public:
64
  static char ID;
65
66
244
  R600ClauseMergePass() : MachineFunctionPass(ID) { }
67
68
  bool runOnMachineFunction(MachineFunction &MF) override;
69
70
  StringRef getPassName() const override;
71
};
72
73
} // end anonymous namespace
74
75
90.0k
INITIALIZE_PASS_BEGIN90.0k
(R600ClauseMergePass, DEBUG_TYPE,
76
90.0k
                      "R600 Clause Merge", false, false)
77
90.0k
INITIALIZE_PASS_END(R600ClauseMergePass, DEBUG_TYPE,
78
                    "R600 Clause Merge", false, false)
79
80
char R600ClauseMergePass::ID = 0;
81
82
char &llvm::R600ClauseMergePassID = R600ClauseMergePass::ID;
83
84
688
unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
85
688
  assert(isCFAlu(MI));
86
688
  return MI
87
688
      .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT))
88
688
      .getImm();
89
688
}
90
91
1.48k
bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
92
1.48k
  assert(isCFAlu(MI));
93
1.48k
  return MI
94
1.48k
      .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled))
95
1.48k
      .getImm();
96
1.48k
}
97
98
void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
99
3.59k
    MachineInstr &CFAlu) const {
100
3.59k
  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
101
3.59k
  MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
102
3.59k
  I++;
103
3.62k
  do {
104
57.4k
    while (
I != E && 57.4k
!isCFAlu(*I)55.3k
)
105
53.8k
      I++;
106
3.62k
    if (I == E)
107
2.14k
      return;
108
1.48k
    MachineInstr &MI = *I++;
109
1.48k
    if (isCFAluEnabled(MI))
110
1.45k
      break;
111
26
    CFAlu.getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
112
26
    MI.eraseFromParent();
113
3.59k
  } while (I != E);
114
3.59k
}
115
116
bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
117
318
                                          const MachineInstr &LatrCFAlu) const {
118
318
  assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
119
318
  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
120
318
  unsigned RootInstCount = getCFAluSize(RootCFAlu),
121
318
      LaterInstCount = getCFAluSize(LatrCFAlu);
122
318
  unsigned CumuledInsts = RootInstCount + LaterInstCount;
123
318
  if (
CumuledInsts >= TII->getMaxAlusPerClause()318
) {
124
226
    DEBUG(dbgs() << "Excess inst counts\n");
125
226
    return false;
126
226
  }
127
92
  
if (92
RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE92
)
128
0
    return false;
129
92
  // Is KCache Bank 0 compatible ?
130
92
  int Mode0Idx =
131
92
      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
132
92
  int KBank0Idx =
133
92
      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
134
92
  int KBank0LineIdx =
135
92
      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
136
92
  if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
137
2
      RootCFAlu.getOperand(Mode0Idx).getImm() &&
138
0
      (LatrCFAlu.getOperand(KBank0Idx).getImm() !=
139
0
           RootCFAlu.getOperand(KBank0Idx).getImm() ||
140
0
       LatrCFAlu.getOperand(KBank0LineIdx).getImm() !=
141
92
           RootCFAlu.getOperand(KBank0LineIdx).getImm())) {
142
0
    DEBUG(dbgs() << "Wrong KC0\n");
143
0
    return false;
144
0
  }
145
92
  // Is KCache Bank 1 compatible ?
146
92
  int Mode1Idx =
147
92
      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
148
92
  int KBank1Idx =
149
92
      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
150
92
  int KBank1LineIdx =
151
92
      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
152
92
  if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
153
0
      RootCFAlu.getOperand(Mode1Idx).getImm() &&
154
0
      (LatrCFAlu.getOperand(KBank1Idx).getImm() !=
155
0
           RootCFAlu.getOperand(KBank1Idx).getImm() ||
156
0
       LatrCFAlu.getOperand(KBank1LineIdx).getImm() !=
157
92
           RootCFAlu.getOperand(KBank1LineIdx).getImm())) {
158
0
    DEBUG(dbgs() << "Wrong KC0\n");
159
0
    return false;
160
0
  }
161
92
  
if (92
LatrCFAlu.getOperand(Mode0Idx).getImm()92
) {
162
2
    RootCFAlu.getOperand(Mode0Idx).setImm(
163
2
        LatrCFAlu.getOperand(Mode0Idx).getImm());
164
2
    RootCFAlu.getOperand(KBank0Idx).setImm(
165
2
        LatrCFAlu.getOperand(KBank0Idx).getImm());
166
2
    RootCFAlu.getOperand(KBank0LineIdx)
167
2
        .setImm(LatrCFAlu.getOperand(KBank0LineIdx).getImm());
168
2
  }
169
92
  if (
LatrCFAlu.getOperand(Mode1Idx).getImm()92
) {
170
0
    RootCFAlu.getOperand(Mode1Idx).setImm(
171
0
        LatrCFAlu.getOperand(Mode1Idx).getImm());
172
0
    RootCFAlu.getOperand(KBank1Idx).setImm(
173
0
        LatrCFAlu.getOperand(KBank1Idx).getImm());
174
0
    RootCFAlu.getOperand(KBank1LineIdx)
175
0
        .setImm(LatrCFAlu.getOperand(KBank1LineIdx).getImm());
176
0
  }
177
318
  RootCFAlu.getOperand(CntIdx).setImm(CumuledInsts);
178
318
  RootCFAlu.setDesc(TII->get(LatrCFAlu.getOpcode()));
179
318
  return true;
180
318
}
181
182
2.05k
bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
183
2.05k
  if (skipFunction(*MF.getFunction()))
184
0
    return false;
185
2.05k
186
2.05k
  const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
187
2.05k
  TII = ST.getInstrInfo();
188
2.05k
189
2.05k
  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
190
4.23k
                                                  
BB != BB_E4.23k
;
++BB2.17k
) {
191
2.17k
    MachineBasicBlock &MBB = *BB;
192
2.17k
    MachineBasicBlock::iterator I = MBB.begin(),  E = MBB.end();
193
2.17k
    MachineBasicBlock::iterator LatestCFAlu = E;
194
59.6k
    while (
I != E59.6k
) {
195
57.5k
      MachineInstr &MI = *I++;
196
57.5k
      if (
(!TII->canBeConsideredALU(MI) && 57.5k
!isCFAlu(MI)10.6k
) ||
197
50.4k
          TII->mustBeLastInClause(MI.getOpcode()))
198
7.03k
        LatestCFAlu = E;
199
57.5k
      if (!isCFAlu(MI))
200
53.9k
        continue;
201
3.59k
      cleanPotentialDisabledCFAlu(MI);
202
3.59k
203
3.59k
      if (
LatestCFAlu != E && 3.59k
mergeIfPossible(*LatestCFAlu, MI)318
) {
204
92
        MI.eraseFromParent();
205
3.59k
      } else {
206
3.50k
        assert(MI.getOperand(8).getImm() && "CF ALU instruction disabled");
207
3.50k
        LatestCFAlu = MI;
208
3.50k
      }
209
57.5k
    }
210
2.17k
  }
211
2.05k
  return false;
212
2.05k
}
213
214
244
StringRef R600ClauseMergePass::getPassName() const {
215
244
  return "R600 Merge Clause Markers Pass";
216
244
}
217
218
244
llvm::FunctionPass *llvm::createR600ClauseMergePass() {
219
244
  return new R600ClauseMergePass();
220
244
}