Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
Line
Count
Source (jump to first uncovered line)
1
//===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// This pass implements instructions packetization for R600. It unsets isLast
11
/// bit of instructions inside a bundle and substitutes src register with
12
/// PreviousVector when applicable.
13
//
14
//===----------------------------------------------------------------------===//
15
16
#include "AMDGPU.h"
17
#include "AMDGPUSubtarget.h"
18
#include "R600InstrInfo.h"
19
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20
#include "llvm/CodeGen/DFAPacketizer.h"
21
#include "llvm/CodeGen/MachineDominators.h"
22
#include "llvm/CodeGen/MachineFunctionPass.h"
23
#include "llvm/CodeGen/MachineLoopInfo.h"
24
#include "llvm/CodeGen/Passes.h"
25
#include "llvm/CodeGen/ScheduleDAG.h"
26
#include "llvm/Support/Debug.h"
27
#include "llvm/Support/raw_ostream.h"
28
29
using namespace llvm;
30
31
#define DEBUG_TYPE "packets"
32
33
namespace {
34
35
class R600Packetizer : public MachineFunctionPass {
36
37
public:
38
  static char ID;
39
280
  R600Packetizer() : MachineFunctionPass(ID) {}
40
41
280
  void getAnalysisUsage(AnalysisUsage &AU) const override {
42
280
    AU.setPreservesCFG();
43
280
    AU.addRequired<MachineDominatorTree>();
44
280
    AU.addPreserved<MachineDominatorTree>();
45
280
    AU.addRequired<MachineLoopInfo>();
46
280
    AU.addPreserved<MachineLoopInfo>();
47
280
    MachineFunctionPass::getAnalysisUsage(AU);
48
280
  }
49
50
2.57k
  StringRef getPassName() const override { return "R600 Packetizer"; }
51
52
  bool runOnMachineFunction(MachineFunction &Fn) override;
53
};
54
55
class R600PacketizerList : public VLIWPacketizerList {
56
private:
57
  const R600InstrInfo *TII;
58
  const R600RegisterInfo &TRI;
59
  bool VLIW5;
60
  bool ConsideredInstUsesAlreadyWrittenVectorElement;
61
62
263k
  unsigned getSlot(const MachineInstr &MI) const {
63
263k
    return TRI.getHWRegChan(MI.getOperand(0).getReg());
64
263k
  }
65
66
  /// \returns register to PV chan mapping for bundle/single instructions that
67
  /// immediately precedes I.
68
  DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
69
46.7k
      const {
70
46.7k
    DenseMap<unsigned, unsigned> Result;
71
46.7k
    I--;
72
46.7k
    if (!TII->isALUInstr(I->getOpcode()) && 
!I->isBundle()31.5k
)
73
5.75k
      return Result;
74
40.9k
    MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
75
40.9k
    if (I->isBundle())
76
25.7k
      BI++;
77
40.9k
    int LastDstChan = -1;
78
101k
    do {
79
101k
      bool isTrans = false;
80
101k
      int BISlot = getSlot(*BI);
81
101k
      if (LastDstChan >= BISlot)
82
18.1k
        isTrans = true;
83
101k
      LastDstChan = BISlot;
84
101k
      if (TII->isPredicated(*BI))
85
442
        continue;
86
100k
      int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write);
87
100k
      if (OperandIdx > -1 && 
BI->getOperand(OperandIdx).getImm() == 075.2k
)
88
607
        continue;
89
100k
      int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst);
90
100k
      if (DstIdx == -1) {
91
4.18k
        continue;
92
4.18k
      }
93
96.0k
      unsigned Dst = BI->getOperand(DstIdx).getReg();
94
96.0k
      if (isTrans || 
TII->isTransOnly(*BI)77.9k
) {
95
18.8k
        Result[Dst] = R600::PS;
96
18.8k
        continue;
97
18.8k
      }
98
77.1k
      if (BI->getOpcode() == R600::DOT4_r600 ||
99
77.1k
          
BI->getOpcode() == R600::DOT4_eg77.1k
) {
100
24
        Result[Dst] = R600::PV_X;
101
24
        continue;
102
24
      }
103
77.1k
      if (Dst == R600::OQAP) {
104
1.67k
        continue;
105
1.67k
      }
106
75.4k
      unsigned PVReg = 0;
107
75.4k
      switch (TRI.getHWRegChan(Dst)) {
108
75.4k
      case 0:
109
15.4k
        PVReg = R600::PV_X;
110
15.4k
        break;
111
75.4k
      case 1:
112
14.7k
        PVReg = R600::PV_Y;
113
14.7k
        break;
114
75.4k
      case 2:
115
16.0k
        PVReg = R600::PV_Z;
116
16.0k
        break;
117
75.4k
      case 3:
118
29.2k
        PVReg = R600::PV_W;
119
29.2k
        break;
120
75.4k
      default:
121
0
        llvm_unreachable("Invalid Chan");
122
75.4k
      }
123
75.4k
      Result[Dst] = PVReg;
124
101k
    } while ((++BI)->isBundledWithPred());
125
40.9k
    return Result;
126
40.9k
  }
127
128
  void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs)
129
42.9k
      const {
130
42.9k
    unsigned Ops[] = {
131
42.9k
      R600::OpName::src0,
132
42.9k
      R600::OpName::src1,
133
42.9k
      R600::OpName::src2
134
42.9k
    };
135
171k
    for (unsigned i = 0; i < 3; 
i++128k
) {
136
128k
      int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]);
137
128k
      if (OperandIdx < 0)
138
44.2k
        continue;
139
84.7k
      unsigned Src = MI.getOperand(OperandIdx).getReg();
140
84.7k
      const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
141
84.7k
      if (It != PVs.end())
142
23.3k
        MI.getOperand(OperandIdx).setReg(It->second);
143
84.7k
    }
144
42.9k
  }
145
public:
146
  // Ctor.
147
  R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST,
148
                     MachineLoopInfo &MLI)
149
      : VLIWPacketizerList(MF, MLI, nullptr),
150
        TII(ST.getInstrInfo()),
151
2.29k
        TRI(TII->getRegisterInfo()) {
152
2.29k
    VLIW5 = !ST.hasCaymanISA();
153
2.29k
  }
154
155
  // initPacketizerState - initialize some internal flags.
156
61.8k
  void initPacketizerState() override {
157
61.8k
    ConsideredInstUsesAlreadyWrittenVectorElement = false;
158
61.8k
  }
159
160
  // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
161
  bool ignorePseudoInstruction(const MachineInstr &MI,
162
46.7k
                               const MachineBasicBlock *MBB) override {
163
46.7k
    return false;
164
46.7k
  }
165
166
  // isSoloInstruction - return true if instruction MI can not be packetized
167
  // with any other instruction, which means that MI itself is a packet.
168
61.8k
  bool isSoloInstruction(const MachineInstr &MI) override {
169
61.8k
    if (TII->isVector(MI))
170
0
      return true;
171
61.8k
    if (!TII->isALUInstr(MI.getOpcode()))
172
11.6k
      return true;
173
50.1k
    if (MI.getOpcode() == R600::GROUP_BARRIER)
174
4
      return true;
175
50.1k
    // XXX: This can be removed once the packetizer properly handles all the
176
50.1k
    // LDS instruction group restrictions.
177
50.1k
    return TII->isLDSInstr(MI.getOpcode());
178
50.1k
  }
179
180
  // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
181
  // together.
182
54.2k
  bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override {
183
54.2k
    MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
184
54.2k
    if (getSlot(*MII) == getSlot(*MIJ))
185
12.0k
      ConsideredInstUsesAlreadyWrittenVectorElement = true;
186
54.2k
    // Does MII and MIJ share the same pred_sel ?
187
54.2k
    int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel),
188
54.2k
        OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel);
189
54.2k
    Register PredI = (OpI > -1)?MII->getOperand(OpI).getReg() : 
Register()0
,
190
54.2k
      PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg() : 
Register()0
;
191
54.2k
    if (PredI != PredJ)
192
36
      return false;
193
54.2k
    if (SUJ->isSucc(SUI)) {
194
38.3k
      for (unsigned i = 0, e = SUJ->Succs.size(); i < e; 
++i27.0k
) {
195
32.4k
        const SDep &Dep = SUJ->Succs[i];
196
32.4k
        if (Dep.getSUnit() != SUI)
197
21.1k
          continue;
198
11.2k
        if (Dep.getKind() == SDep::Anti)
199
5.89k
          continue;
200
5.39k
        if (Dep.getKind() == SDep::Output)
201
844
          if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
202
8
            continue;
203
5.38k
        return false;
204
5.38k
      }
205
11.2k
    }
206
54.2k
207
54.2k
    bool ARDef =
208
48.8k
        TII->definesAddressRegister(*MII) || 
TII->definesAddressRegister(*MIJ)48.6k
;
209
48.8k
    bool ARUse =
210
48.8k
        TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ);
211
48.8k
212
48.8k
    return !ARDef || 
!ARUse215
;
213
54.2k
  }
214
215
  // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
216
  // and SUJ.
217
5.56k
  bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
218
5.56k
    return false;
219
5.56k
  }
220
221
23.7k
  void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
222
23.7k
    unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last);
223
23.7k
    MI->getOperand(LastOp).setImm(Bit);
224
23.7k
  }
225
226
  bool isBundlableWithCurrentPMI(MachineInstr &MI,
227
                                 const DenseMap<unsigned, unsigned> &PV,
228
                                 std::vector<R600InstrInfo::BankSwizzle> &BS,
229
46.7k
                                 bool &isTransSlot) {
230
46.7k
    isTransSlot = TII->isTransOnly(MI);
231
46.7k
    assert (!isTransSlot || VLIW5);
232
46.7k
233
46.7k
    // Is the dst reg sequence legal ?
234
46.7k
    if (!isTransSlot && 
!CurrentPacketMIs.empty()45.7k
) {
235
27.0k
      if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) {
236
11.5k
        if (ConsideredInstUsesAlreadyWrittenVectorElement &&
237
11.5k
            
!TII->isVectorOnly(MI)9.44k
&&
VLIW59.13k
) {
238
8.68k
          isTransSlot = true;
239
8.68k
          LLVM_DEBUG({
240
8.68k
            dbgs() << "Considering as Trans Inst :";
241
8.68k
            MI.dump();
242
8.68k
          });
243
8.68k
        }
244
2.85k
        else
245
2.85k
          return false;
246
43.8k
      }
247
27.0k
    }
248
43.8k
249
43.8k
    // Are the Constants limitations met ?
250
43.8k
    CurrentPacketMIs.push_back(&MI);
251
43.8k
    if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
252
342
      LLVM_DEBUG({
253
342
        dbgs() << "Couldn't pack :\n";
254
342
        MI.dump();
255
342
        dbgs() << "with the following packets :\n";
256
342
        for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
257
342
          CurrentPacketMIs[i]->dump();
258
342
          dbgs() << "\n";
259
342
        }
260
342
        dbgs() << "because of Consts read limitations\n";
261
342
      });
262
342
      CurrentPacketMIs.pop_back();
263
342
      return false;
264
342
    }
265
43.5k
266
43.5k
    // Is there a BankSwizzle set that meet Read Port limitations ?
267
43.5k
    if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
268
43.5k
            PV, BS, isTransSlot)) {
269
518
      LLVM_DEBUG({
270
518
        dbgs() << "Couldn't pack :\n";
271
518
        MI.dump();
272
518
        dbgs() << "with the following packets :\n";
273
518
        for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
274
518
          CurrentPacketMIs[i]->dump();
275
518
          dbgs() << "\n";
276
518
        }
277
518
        dbgs() << "because of Read port limitations\n";
278
518
      });
279
518
      CurrentPacketMIs.pop_back();
280
518
      return false;
281
518
    }
282
42.9k
283
42.9k
    // We cannot read LDS source registers from the Trans slot.
284
42.9k
    if (isTransSlot && 
TII->readsLDSSrcReg(MI)9.28k
)
285
0
      return false;
286
42.9k
287
42.9k
    CurrentPacketMIs.pop_back();
288
42.9k
    return true;
289
42.9k
  }
290
291
46.7k
  MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override {
292
46.7k
    MachineBasicBlock::iterator FirstInBundle =
293
46.7k
        CurrentPacketMIs.empty() ? 
&MI19.1k
:
CurrentPacketMIs.front()27.5k
;
294
46.7k
    const DenseMap<unsigned, unsigned> &PV =
295
46.7k
        getPreviousVector(FirstInBundle);
296
46.7k
    std::vector<R600InstrInfo::BankSwizzle> BS;
297
46.7k
    bool isTransSlot;
298
46.7k
299
46.7k
    if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
300
83.6k
      for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; 
i++40.6k
) {
301
40.6k
        MachineInstr *MI = CurrentPacketMIs[i];
302
40.6k
        unsigned Op = TII->getOperandIdx(MI->getOpcode(),
303
40.6k
            R600::OpName::bank_swizzle);
304
40.6k
        MI->getOperand(Op).setImm(BS[i]);
305
40.6k
      }
306
42.9k
      unsigned Op =
307
42.9k
          TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle);
308
42.9k
      MI.getOperand(Op).setImm(BS.back());
309
42.9k
      if (!CurrentPacketMIs.empty())
310
23.7k
        setIsLastBit(CurrentPacketMIs.back(), 0);
311
42.9k
      substitutePV(MI, PV);
312
42.9k
      MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
313
42.9k
      if (isTransSlot) {
314
9.28k
        endPacket(std::next(It)->getParent(), std::next(It));
315
9.28k
      }
316
42.9k
      return It;
317
42.9k
    }
318
3.71k
    endPacket(MI.getParent(), MI);
319
3.71k
    if (TII->isTransOnly(MI))
320
11
      return MI;
321
3.70k
    return VLIWPacketizerList::addToPacket(MI);
322
3.70k
  }
323
};
324
325
2.29k
bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
326
2.29k
  const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
327
2.29k
  const R600InstrInfo *TII = ST.getInstrInfo();
328
2.29k
329
2.29k
  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
330
2.29k
331
2.29k
  // Instantiate the packetizer.
332
2.29k
  R600PacketizerList Packetizer(Fn, ST, MLI);
333
2.29k
334
2.29k
  // DFA state table should not be empty.
335
2.29k
  assert(Packetizer.getResourceTracker() && "Empty DFA table!");
336
2.29k
  assert(Packetizer.getResourceTracker()->getInstrItins());
337
2.29k
338
2.29k
  if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty())
339
1
    return false;
340
2.29k
341
2.29k
  //
342
2.29k
  // Loop over all basic blocks and remove KILL pseudo-instructions
343
2.29k
  // These instructions confuse the dependence analysis. Consider:
344
2.29k
  // D0 = ...   (Insn 0)
345
2.29k
  // R0 = KILL R0, D0 (Insn 1)
346
2.29k
  // R0 = ... (Insn 2)
347
2.29k
  // Here, Insn 1 will result in the dependence graph not emitting an output
348
2.29k
  // dependence between Insn 0 and Insn 2. This can lead to incorrect
349
2.29k
  // packetization
350
2.29k
  //
351
2.29k
  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
352
4.58k
       MBB != MBBe; 
++MBB2.29k
) {
353
2.29k
    MachineBasicBlock::iterator End = MBB->end();
354
2.29k
    MachineBasicBlock::iterator MI = MBB->begin();
355
64.2k
    while (MI != End) {
356
61.9k
      if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF ||
357
61.9k
          
(61.9k
MI->getOpcode() == R600::CF_ALU61.9k
&&
!MI->getOperand(8).getImm()3.76k
)) {
358
17
        MachineBasicBlock::iterator DeleteMI = MI;
359
17
        ++MI;
360
17
        MBB->erase(DeleteMI);
361
17
        End = MBB->end();
362
17
        continue;
363
17
      }
364
61.9k
      ++MI;
365
61.9k
    }
366
2.29k
  }
367
2.29k
368
2.29k
  // Loop over all of the basic blocks.
369
2.29k
  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
370
4.58k
       MBB != MBBe; 
++MBB2.29k
) {
371
2.29k
    // Find scheduling regions and schedule / packetize each region.
372
2.29k
    unsigned RemainingCount = MBB->size();
373
2.29k
    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
374
4.57k
        RegionEnd != MBB->begin();) {
375
2.28k
      // The next region starts above the previous region. Look backward in the
376
2.28k
      // instruction stream until we find the nearest boundary.
377
2.28k
      MachineBasicBlock::iterator I = RegionEnd;
378
2.28k
      for(;I != MBB->begin(); 
--I, --RemainingCount0
) {
379
2.28k
        if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn))
380
2.28k
          break;
381
2.28k
      }
382
2.28k
      I = MBB->begin();
383
2.28k
384
2.28k
      // Skip empty scheduling regions.
385
2.28k
      if (I == RegionEnd) {
386
0
        RegionEnd = std::prev(RegionEnd);
387
0
        --RemainingCount;
388
0
        continue;
389
0
      }
390
2.28k
      // Skip regions with one instruction.
391
2.28k
      if (I == std::prev(RegionEnd)) {
392
91
        RegionEnd = std::prev(RegionEnd);
393
91
        continue;
394
91
      }
395
2.19k
396
2.19k
      Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd);
397
2.19k
      RegionEnd = I;
398
2.19k
    }
399
2.29k
  }
400
2.29k
401
2.29k
  return true;
402
2.29k
403
2.29k
}
404
405
} // end anonymous namespace
406
407
101k
INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE,
408
101k
                     "R600 Packetizer", false, false)
409
101k
INITIALIZE_PASS_END(R600Packetizer, DEBUG_TYPE,
410
                    "R600 Packetizer", false, false)
411
412
char R600Packetizer::ID = 0;
413
414
char &llvm::R600PacketizerID = R600Packetizer::ID;
415
416
280
llvm::FunctionPass *llvm::createR600Packetizer() {
417
280
  return new R600Packetizer();
418
280
}