Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
11
/// 128 Alu instructions ; these instructions can access up to 4 prefetched
12
/// 4 lines of 16 registers from constant buffers. Such ALU clauses are
13
/// initiated by CF_ALU instructions.
14
//===----------------------------------------------------------------------===//
15
16
#include "AMDGPU.h"
17
#include "AMDGPUSubtarget.h"
18
#include "R600Defines.h"
19
#include "R600InstrInfo.h"
20
#include "R600RegisterInfo.h"
21
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22
#include "llvm/ADT/SmallVector.h"
23
#include "llvm/ADT/StringRef.h"
24
#include "llvm/CodeGen/MachineBasicBlock.h"
25
#include "llvm/CodeGen/MachineFunction.h"
26
#include "llvm/CodeGen/MachineFunctionPass.h"
27
#include "llvm/CodeGen/MachineInstr.h"
28
#include "llvm/CodeGen/MachineInstrBuilder.h"
29
#include "llvm/CodeGen/MachineOperand.h"
30
#include "llvm/Pass.h"
31
#include "llvm/Support/ErrorHandling.h"
32
#include <cassert>
33
#include <cstdint>
34
#include <utility>
35
#include <vector>
36
37
using namespace llvm;
38
39
namespace llvm {
40
41
  void initializeR600EmitClauseMarkersPass(PassRegistry&);
42
43
} // end namespace llvm
44
45
namespace {
46
47
class R600EmitClauseMarkers : public MachineFunctionPass {
48
private:
49
  const R600InstrInfo *TII = nullptr;
50
  int Address = 0;
51
52
49.7k
  unsigned OccupiedDwords(MachineInstr &MI) const {
53
49.7k
    switch (MI.getOpcode()) {
54
49.7k
    case R600::INTERP_PAIR_XY:
55
32
    case R600::INTERP_PAIR_ZW:
56
32
    case R600::INTERP_VEC_LOAD:
57
32
    case R600::DOT_4:
58
32
      return 4;
59
32
    case R600::KILL:
60
0
      return 0;
61
49.7k
    default:
62
49.7k
      break;
63
49.7k
    }
64
49.7k
65
49.7k
    // These will be expanded to two ALU instructions in the
66
49.7k
    // ExpandSpecialInstructions pass.
67
49.7k
    if (TII->isLDSRetInstr(MI.getOpcode()))
68
886
      return 2;
69
48.8k
70
48.8k
    if (TII->isVector(MI) || 
TII->isCubeOp(MI.getOpcode())48.7k
||
71
48.8k
        
TII->isReductionOp(MI.getOpcode())48.7k
)
72
54
      return 4;
73
48.7k
74
48.7k
    unsigned NumLiteral = 0;
75
48.7k
    for (MachineInstr::mop_iterator It = MI.operands_begin(),
76
48.7k
                                    E = MI.operands_end();
77
963k
         It != E; 
++It914k
) {
78
914k
      MachineOperand &MO = *It;
79
914k
      if (MO.isReg() && 
MO.getReg() == R600::ALU_LITERAL_X191k
)
80
21.1k
        ++NumLiteral;
81
914k
    }
82
48.7k
    return 1 + NumLiteral;
83
48.7k
  }
84
85
64.2k
  bool isALU(const MachineInstr &MI) const {
86
64.2k
    if (TII->isALUInstr(MI.getOpcode()))
87
53.2k
      return true;
88
11.0k
    if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))
89
3
      return true;
90
11.0k
    switch (MI.getOpcode()) {
91
11.0k
    case R600::PRED_X:
92
298
    case R600::INTERP_PAIR_XY:
93
298
    case R600::INTERP_PAIR_ZW:
94
298
    case R600::INTERP_VEC_LOAD:
95
298
    case R600::COPY:
96
298
    case R600::DOT_4:
97
298
      return true;
98
10.7k
    default:
99
10.7k
      return false;
100
11.0k
    }
101
11.0k
  }
102
103
53.1k
  bool IsTrivialInst(MachineInstr &MI) const {
104
53.1k
    switch (MI.getOpcode()) {
105
53.1k
    case R600::KILL:
106
300
    case R600::RETURN:
107
300
    case R600::IMPLICIT_DEF:
108
300
      return true;
109
52.8k
    default:
110
52.8k
      return false;
111
53.1k
    }
112
53.1k
  }
113
114
12.3k
  std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
115
12.3k
    // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
116
12.3k
    // (See also R600ISelLowering.cpp)
117
12.3k
    // ConstIndex value is in [0, 4095];
118
12.3k
    return std::pair<unsigned, unsigned>(
119
12.3k
        ((Sel >> 2) - 512) >> 12, // KC_BANK
120
12.3k
        // Line Number of ConstIndex
121
12.3k
        // A line contains 16 constant registers however KCX bank can lock
122
12.3k
        // two line at the same time ; thus we want to get an even line number.
123
12.3k
        // Line number can be retrieved with (>>4), using (>>5) <<1 generates
124
12.3k
        // an even number.
125
12.3k
        ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
126
12.3k
  }
127
128
  bool
129
  SubstituteKCacheBank(MachineInstr &MI,
130
                       std::vector<std::pair<unsigned, unsigned>> &CachedConsts,
131
49.7k
                       bool UpdateInstr = true) const {
132
49.7k
    std::vector<std::pair<unsigned, unsigned>> UsedKCache;
133
49.7k
134
49.7k
    if (!TII->isALUInstr(MI.getOpcode()) && 
MI.getOpcode() != R600::DOT_434
)
135
2
      return true;
136
49.7k
137
49.7k
    const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts =
138
49.7k
        TII->getSrcs(MI);
139
49.7k
    assert(
140
49.7k
        (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) &&
141
49.7k
        "Can't assign Const");
142
146k
    for (unsigned i = 0, n = Consts.size(); i < n; 
++i96.7k
) {
143
96.7k
      if (Consts[i].first->getReg() != R600::ALU_CONST)
144
84.3k
        continue;
145
12.3k
      unsigned Sel = Consts[i].second;
146
12.3k
      unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
147
12.3k
      unsigned KCacheIndex = Index * 4 + Chan;
148
12.3k
      const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
149
12.3k
      if (CachedConsts.empty()) {
150
3.29k
        CachedConsts.push_back(BankLine);
151
3.29k
        UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
152
3.29k
        continue;
153
3.29k
      }
154
9.02k
      if (CachedConsts[0] == BankLine) {
155
9.02k
        UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
156
9.02k
        continue;
157
9.02k
      }
158
0
      if (CachedConsts.size() == 1) {
159
0
        CachedConsts.push_back(BankLine);
160
0
        UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
161
0
        continue;
162
0
      }
163
0
      if (CachedConsts[1] == BankLine) {
164
0
        UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
165
0
        continue;
166
0
      }
167
0
      return false;
168
0
    }
169
49.7k
170
49.7k
    if (!UpdateInstr)
171
584
      return true;
172
49.1k
173
145k
    
for (unsigned i = 0, j = 0, n = Consts.size(); 49.1k
i < n;
++i96.1k
) {
174
96.1k
      if (Consts[i].first->getReg() != R600::ALU_CONST)
175
83.7k
        continue;
176
12.3k
      switch(UsedKCache[j].first) {
177
12.3k
      case 0:
178
12.3k
        Consts[i].first->setReg(
179
12.3k
            R600::R600_KC0RegClass.getRegister(UsedKCache[j].second));
180
12.3k
        break;
181
12.3k
      case 1:
182
0
        Consts[i].first->setReg(
183
0
            R600::R600_KC1RegClass.getRegister(UsedKCache[j].second));
184
0
        break;
185
12.3k
      default:
186
0
        llvm_unreachable("Wrong Cache Line");
187
12.3k
      }
188
12.3k
      j++;
189
12.3k
    }
190
49.1k
    return true;
191
49.1k
  }
192
193
  bool canClauseLocalKillFitInClause(
194
                        unsigned AluInstCount,
195
                        std::vector<std::pair<unsigned, unsigned>> KCacheBanks,
196
                        MachineBasicBlock::iterator Def,
197
49.1k
                        MachineBasicBlock::iterator BBEnd) {
198
49.1k
    const R600RegisterInfo &TRI = TII->getRegisterInfo();
199
49.1k
    //TODO: change this to defs?
200
49.1k
    for (MachineInstr::const_mop_iterator
201
49.1k
           MOI = Def->operands_begin(),
202
960k
           MOE = Def->operands_end(); MOI != MOE; 
++MOI911k
) {
203
911k
      if (!MOI->isReg() || 
!MOI->isDef()191k
||
204
911k
          
TRI.isPhysRegLiveAcrossClauses(MOI->getReg())46.5k
)
205
911k
        continue;
206
292
207
292
      // Def defines a clause local register, so check that its use will fit
208
292
      // in the clause.
209
292
      unsigned LastUseCount = 0;
210
584
      for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; 
++UseI292
) {
211
584
        AluInstCount += OccupiedDwords(*UseI);
212
584
        // Make sure we won't need to end the clause due to KCache limitations.
213
584
        if (!SubstituteKCacheBank(*UseI, KCacheBanks, false))
214
0
          return false;
215
584
216
584
        // We have reached the maximum instruction limit before finding the
217
584
        // use that kills this register, so we cannot use this def in the
218
584
        // current clause.
219
584
        if (AluInstCount >= TII->getMaxAlusPerClause())
220
0
          return false;
221
584
222
584
        // TODO: Is this true? kill flag appears to work OK below
223
584
        // Register kill flags have been cleared by the time we get to this
224
584
        // pass, but it is safe to assume that all uses of this register
225
584
        // occur in the same basic block as its definition, because
226
584
        // it is illegal for the scheduler to schedule them in
227
584
        // different blocks.
228
584
        if (UseI->readsRegister(MOI->getReg(), &TRI))
229
292
          LastUseCount = AluInstCount;
230
584
231
584
        // Exit early if the current use kills the register
232
584
        if (UseI != Def && 
UseI->killsRegister(MOI->getReg(), &TRI)292
)
233
292
          break;
234
584
      }
235
292
      if (LastUseCount)
236
292
        return LastUseCount <= TII->getMaxAlusPerClause();
237
0
      llvm_unreachable("Clause local register live at end of clause.");
238
0
    }
239
49.1k
    
return true48.8k
;
240
49.1k
  }
241
242
  MachineBasicBlock::iterator
243
3.94k
  MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
244
3.94k
    MachineBasicBlock::iterator ClauseHead = I;
245
3.94k
    std::vector<std::pair<unsigned, unsigned>> KCacheBanks;
246
3.94k
    bool PushBeforeModifier = false;
247
3.94k
    unsigned AluInstCount = 0;
248
53.4k
    for (MachineBasicBlock::iterator E = MBB.end(); I != E; 
++I49.5k
) {
249
53.1k
      if (IsTrivialInst(*I))
250
300
        continue;
251
52.8k
      if (!isALU(*I))
252
3.29k
        break;
253
49.5k
      if (AluInstCount > TII->getMaxAlusPerClause())
254
228
        break;
255
49.3k
      if (I->getOpcode() == R600::PRED_X) {
256
173
        // We put PRED_X in its own clause to ensure that ifcvt won't create
257
173
        // clauses with more than 128 insts.
258
173
        // IfCvt is indeed checking that "then" and "else" branches of an if
259
173
        // statement have less than ~60 insts thus converted clauses can't be
260
173
        // bigger than ~121 insts (predicate setter needs to be in the same
261
173
        // clause as predicated alus).
262
173
        if (AluInstCount > 0)
263
85
          break;
264
88
        if (TII->getFlagOp(*I).getImm() & MO_FLAG_PUSH)
265
88
          PushBeforeModifier = true;
266
88
        AluInstCount ++;
267
88
        continue;
268
88
      }
269
49.1k
      // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
270
49.1k
      //
271
49.1k
      // * KILL or INTERP instructions
272
49.1k
      // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
273
49.1k
      // * Uses waterfalling (i.e. INDEX_MODE = AR.X)
274
49.1k
      //
275
49.1k
      // XXX: These checks have not been implemented yet.
276
49.1k
      if (TII->mustBeLastInClause(I->getOpcode())) {
277
4
        I++;
278
4
        break;
279
4
      }
280
49.1k
281
49.1k
      // If this instruction defines a clause local register, make sure
282
49.1k
      // its use can fit in this clause.
283
49.1k
      if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E))
284
0
        break;
285
49.1k
286
49.1k
      if (!SubstituteKCacheBank(*I, KCacheBanks))
287
0
        break;
288
49.1k
      AluInstCount += OccupiedDwords(*I);
289
49.1k
    }
290
3.94k
    unsigned Opcode = PushBeforeModifier ?
291
3.85k
        
R600::CF_ALU_PUSH_BEFORE88
: R600::CF_ALU;
292
3.94k
    BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
293
3.94k
    // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
294
3.94k
    // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
295
3.94k
    // pass may assume that identical ALU clause starter at the beginning of a
296
3.94k
    // true and false branch can be factorized which is not the case.
297
3.94k
        .addImm(Address++) // ADDR
298
3.94k
        .addImm(KCacheBanks.empty()?
0651
:
KCacheBanks[0].first3.29k
) // KB0
299
3.94k
        .addImm((KCacheBanks.size() < 2)?0:
KCacheBanks[1].first0
) // KB1
300
3.94k
        .addImm(KCacheBanks.empty()?
0651
:
23.29k
) // KM0
301
3.94k
        .addImm((KCacheBanks.size() < 2)?0:
20
) // KM1
302
3.94k
        .addImm(KCacheBanks.empty()?
0651
:
KCacheBanks[0].second3.29k
) // KLINE0
303
3.94k
        .addImm((KCacheBanks.size() < 2)?0:
KCacheBanks[1].second0
) // KLINE1
304
3.94k
        .addImm(AluInstCount) // COUNT
305
3.94k
        .addImm(1); // Enabled
306
3.94k
    return I;
307
3.94k
  }
308
309
public:
310
  static char ID;
311
312
280
  R600EmitClauseMarkers() : MachineFunctionPass(ID) {
313
280
    initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry());
314
280
  }
315
316
2.29k
  bool runOnMachineFunction(MachineFunction &MF) override {
317
2.29k
    const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
318
2.29k
    TII = ST.getInstrInfo();
319
2.29k
320
2.29k
    for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
321
4.75k
                                                    BB != BB_E; 
++BB2.46k
) {
322
2.46k
      MachineBasicBlock &MBB = *BB;
323
2.46k
      MachineBasicBlock::iterator I = MBB.begin();
324
2.46k
      if (I != MBB.end() && 
I->getOpcode() == R600::CF_ALU2.46k
)
325
0
        continue; // BB was already parsed
326
13.8k
      
for (MachineBasicBlock::iterator E = MBB.end(); 2.46k
I != E;) {
327
11.3k
        if (isALU(*I)) {
328
3.94k
          auto next = MakeALUClause(MBB, I);
329
3.94k
          assert(next != I);
330
3.94k
          I = next;
331
3.94k
        } else
332
7.44k
          ++I;
333
11.3k
      }
334
2.46k
    }
335
2.29k
    return false;
336
2.29k
  }
337
338
2.57k
  StringRef getPassName() const override {
339
2.57k
    return "R600 Emit Clause Markers Pass";
340
2.57k
  }
341
};
342
343
char R600EmitClauseMarkers::ID = 0;
344
345
} // end anonymous namespace
346
347
280
INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
348
280
                      "R600 Emit Clause Markters", false, false)
349
280
INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
350
                      "R600 Emit Clause Markters", false, false)
351
352
280
FunctionPass *llvm::createR600EmitClauseMarkers() {
353
280
  return new R600EmitClauseMarkers();
354
280
}