Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- R600MergeVectorRegisters.cpp ---------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// This pass merges inputs of swizzeable instructions into vector sharing
11
/// common data and/or have enough undef subreg using swizzle abilities.
12
///
13
/// For instance let's consider the following pseudo code :
14
/// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3
15
/// ...
16
/// %7 = REG_SEQ %1, sub0, %3, sub1, undef, sub2, %4, sub3
17
/// (swizzable Inst) %7, SwizzleMask : sub0, sub1, sub2, sub3
18
///
19
/// is turned into :
20
/// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3
21
/// ...
22
/// %7 = INSERT_SUBREG %4, sub3
23
/// (swizzable Inst) %7, SwizzleMask : sub0, sub2, sub1, sub3
24
///
25
/// This allow regalloc to reduce register pressure for vector registers and
26
/// to reduce MOV count.
27
//===----------------------------------------------------------------------===//
28
29
#include "AMDGPU.h"
30
#include "AMDGPUSubtarget.h"
31
#include "R600Defines.h"
32
#include "R600InstrInfo.h"
33
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
34
#include "llvm/ADT/DenseMap.h"
35
#include "llvm/ADT/STLExtras.h"
36
#include "llvm/ADT/StringRef.h"
37
#include "llvm/CodeGen/MachineBasicBlock.h"
38
#include "llvm/CodeGen/MachineDominators.h"
39
#include "llvm/CodeGen/MachineFunction.h"
40
#include "llvm/CodeGen/MachineFunctionPass.h"
41
#include "llvm/CodeGen/MachineInstr.h"
42
#include "llvm/CodeGen/MachineInstrBuilder.h"
43
#include "llvm/CodeGen/MachineLoopInfo.h"
44
#include "llvm/CodeGen/MachineOperand.h"
45
#include "llvm/CodeGen/MachineRegisterInfo.h"
46
#include "llvm/IR/DebugLoc.h"
47
#include "llvm/Pass.h"
48
#include "llvm/Support/Debug.h"
49
#include "llvm/Support/ErrorHandling.h"
50
#include "llvm/Support/raw_ostream.h"
51
#include <cassert>
52
#include <utility>
53
#include <vector>
54
55
using namespace llvm;
56
57
#define DEBUG_TYPE "vec-merger"
58
59
9.05k
static bool isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
60
9.05k
  assert(MRI.isSSA());
61
9.05k
  if (TargetRegisterInfo::isPhysicalRegister(Reg))
62
1.30k
    return false;
63
7.74k
  const MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
64
7.74k
  return MI && MI->isImplicitDef();
65
7.74k
}
66
67
namespace {
68
69
class RegSeqInfo {
70
public:
71
  MachineInstr *Instr;
72
  DenseMap<unsigned, unsigned> RegToChan;
73
  std::vector<unsigned> UndefReg;
74
75
2.45k
  RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) {
76
2.45k
    assert(MI->getOpcode() == R600::REG_SEQUENCE);
77
11.5k
    for (unsigned i = 1, e = Instr->getNumOperands(); i < e; 
i+=29.05k
) {
78
9.05k
      MachineOperand &MO = Instr->getOperand(i);
79
9.05k
      unsigned Chan = Instr->getOperand(i + 1).getImm();
80
9.05k
      if (isImplicitlyDef(MRI, MO.getReg()))
81
101
        UndefReg.push_back(Chan);
82
8.95k
      else
83
8.95k
        RegToChan[MO.getReg()] = Chan;
84
9.05k
    }
85
2.45k
  }
86
87
610
  RegSeqInfo() = default;
88
89
4
  bool operator==(const RegSeqInfo &RSI) const {
90
4
    return RSI.Instr == Instr;
91
4
  }
92
};
93
94
class R600VectorRegMerger : public MachineFunctionPass {
95
private:
96
  using InstructionSetMap = DenseMap<unsigned, std::vector<MachineInstr *>>;
97
98
  MachineRegisterInfo *MRI;
99
  const R600InstrInfo *TII = nullptr;
100
  DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq;
101
  InstructionSetMap PreviousRegSeqByReg;
102
  InstructionSetMap PreviousRegSeqByUndefCount;
103
104
  bool canSwizzle(const MachineInstr &MI) const;
105
  bool areAllUsesSwizzeable(unsigned Reg) const;
106
  void SwizzleInput(MachineInstr &,
107
      const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const;
108
  bool tryMergeVector(const RegSeqInfo *Untouched, RegSeqInfo *ToMerge,
109
      std::vector<std::pair<unsigned, unsigned>> &Remap) const;
110
  bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
111
      std::vector<std::pair<unsigned, unsigned>> &RemapChan);
112
  bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
113
      std::vector<std::pair<unsigned, unsigned>> &RemapChan);
114
  MachineInstr *RebuildVector(RegSeqInfo *MI, const RegSeqInfo *BaseVec,
115
      const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const;
116
  void RemoveMI(MachineInstr *);
117
  void trackRSI(const RegSeqInfo &RSI);
118
119
public:
120
  static char ID;
121
122
280
  R600VectorRegMerger() : MachineFunctionPass(ID) {}
123
124
280
  void getAnalysisUsage(AnalysisUsage &AU) const override {
125
280
    AU.setPreservesCFG();
126
280
    AU.addRequired<MachineDominatorTree>();
127
280
    AU.addPreserved<MachineDominatorTree>();
128
280
    AU.addRequired<MachineLoopInfo>();
129
280
    AU.addPreserved<MachineLoopInfo>();
130
280
    MachineFunctionPass::getAnalysisUsage(AU);
131
280
  }
132
133
2.57k
  StringRef getPassName() const override {
134
2.57k
    return "R600 Vector Registers Merge Pass";
135
2.57k
  }
136
137
  bool runOnMachineFunction(MachineFunction &Fn) override;
138
};
139
140
} // end anonymous namespace
141
142
101k
INITIALIZE_PASS_BEGIN(R600VectorRegMerger, DEBUG_TYPE,
143
101k
                     "R600 Vector Reg Merger", false, false)
144
101k
INITIALIZE_PASS_END(R600VectorRegMerger, DEBUG_TYPE,
145
                    "R600 Vector Reg Merger", false, false)
146
147
char R600VectorRegMerger::ID = 0;
148
149
char &llvm::R600VectorRegMergerID = R600VectorRegMerger::ID;
150
151
bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI)
152
2.45k
    const {
153
2.45k
  if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
154
252
    return true;
155
2.20k
  switch (MI.getOpcode()) {
156
2.20k
  case R600::R600_ExportSwz:
157
53
  case R600::EG_ExportSwz:
158
53
    return true;
159
2.15k
  default:
160
2.15k
    return false;
161
2.20k
  }
162
2.20k
}
163
164
bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched,
165
    RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned>> &Remap)
166
5
    const {
167
5
  unsigned CurrentUndexIdx = 0;
168
5
  for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(),
169
14
      E = ToMerge->RegToChan.end(); It != E; 
++It9
) {
170
9
    DenseMap<unsigned, unsigned>::const_iterator PosInUntouched =
171
9
        Untouched->RegToChan.find((*It).first);
172
9
    if (PosInUntouched != Untouched->RegToChan.end()) {
173
5
      Remap.push_back(std::pair<unsigned, unsigned>
174
5
          ((*It).second, (*PosInUntouched).second));
175
5
      continue;
176
5
    }
177
4
    if (CurrentUndexIdx >= Untouched->UndefReg.size())
178
0
      return false;
179
4
    Remap.push_back(std::pair<unsigned, unsigned>
180
4
        ((*It).second, Untouched->UndefReg[CurrentUndexIdx++]));
181
4
  }
182
5
183
5
  return true;
184
5
}
185
186
static
187
unsigned getReassignedChan(
188
    const std::vector<std::pair<unsigned, unsigned>> &RemapChan,
189
9
    unsigned Chan) {
190
13
  for (unsigned j = 0, je = RemapChan.size(); j < je; 
j++4
) {
191
13
    if (RemapChan[j].first == Chan)
192
9
      return RemapChan[j].second;
193
13
  }
194
9
  
llvm_unreachable0
("Chan wasn't reassigned");
195
9
}
196
197
MachineInstr *R600VectorRegMerger::RebuildVector(
198
    RegSeqInfo *RSI, const RegSeqInfo *BaseRSI,
199
5
    const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const {
200
5
  unsigned Reg = RSI->Instr->getOperand(0).getReg();
201
5
  MachineBasicBlock::iterator Pos = RSI->Instr;
202
5
  MachineBasicBlock &MBB = *Pos->getParent();
203
5
  DebugLoc DL = Pos->getDebugLoc();
204
5
205
5
  unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg();
206
5
  DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan;
207
5
  std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg;
208
5
  for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(),
209
14
      E = RSI->RegToChan.end(); It != E; 
++It9
) {
210
9
    unsigned DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass);
211
9
    unsigned SubReg = (*It).first;
212
9
    unsigned Swizzle = (*It).second;
213
9
    unsigned Chan = getReassignedChan(RemapChan, Swizzle);
214
9
215
9
    MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(R600::INSERT_SUBREG),
216
9
        DstReg)
217
9
        .addReg(SrcVec)
218
9
        .addReg(SubReg)
219
9
        .addImm(Chan);
220
9
    UpdatedRegToChan[SubReg] = Chan;
221
9
    std::vector<unsigned>::iterator ChanPos = llvm::find(UpdatedUndef, Chan);
222
9
    if (ChanPos != UpdatedUndef.end())
223
4
      UpdatedUndef.erase(ChanPos);
224
9
    assert(!is_contained(UpdatedUndef, Chan) &&
225
9
           "UpdatedUndef shouldn't contain Chan more than once!");
226
9
    LLVM_DEBUG(dbgs() << "    ->"; Tmp->dump(););
227
9
    (void)Tmp;
228
9
    SrcVec = DstReg;
229
9
  }
230
5
  MachineInstr *NewMI =
231
5
      BuildMI(MBB, Pos, DL, TII->get(R600::COPY), Reg).addReg(SrcVec);
232
5
  LLVM_DEBUG(dbgs() << "    ->"; NewMI->dump(););
233
5
234
5
  LLVM_DEBUG(dbgs() << "  Updating Swizzle:\n");
235
5
  for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
236
10
      E = MRI->use_instr_end(); It != E; 
++It5
) {
237
5
    LLVM_DEBUG(dbgs() << "    "; (*It).dump(); dbgs() << "    ->");
238
5
    SwizzleInput(*It, RemapChan);
239
5
    LLVM_DEBUG((*It).dump());
240
5
  }
241
5
  RSI->Instr->eraseFromParent();
242
5
243
5
  // Update RSI
244
5
  RSI->Instr = NewMI;
245
5
  RSI->RegToChan = UpdatedRegToChan;
246
5
  RSI->UndefReg = UpdatedUndef;
247
5
248
5
  return NewMI;
249
5
}
250
251
281
void R600VectorRegMerger::RemoveMI(MachineInstr *MI) {
252
281
  for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(),
253
19.2k
      E = PreviousRegSeqByReg.end(); It != E; 
++It18.9k
) {
254
18.9k
    std::vector<MachineInstr *> &MIs = (*It).second;
255
18.9k
    MIs.erase(llvm::find(MIs, MI), MIs.end());
256
18.9k
  }
257
281
  for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(),
258
807
      E = PreviousRegSeqByUndefCount.end(); It != E; 
++It526
) {
259
526
    std::vector<MachineInstr *> &MIs = (*It).second;
260
526
    MIs.erase(llvm::find(MIs, MI), MIs.end());
261
526
  }
262
281
}
263
264
void R600VectorRegMerger::SwizzleInput(MachineInstr &MI,
265
5
    const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const {
266
5
  unsigned Offset;
267
5
  if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
268
2
    Offset = 2;
269
3
  else
270
3
    Offset = 3;
271
25
  for (unsigned i = 0; i < 4; 
i++20
) {
272
20
    unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1;
273
35
    for (unsigned j = 0, e = RemapChan.size(); j < e; 
j++15
) {
274
30
      if (RemapChan[j].first == Swizzle) {
275
15
        MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1);
276
15
        break;
277
15
      }
278
30
    }
279
20
  }
280
5
}
281
282
2.45k
bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const {
283
2.45k
  for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
284
2.76k
      E = MRI->use_instr_end(); It != E; 
++It305
) {
285
2.45k
    if (!canSwizzle(*It))
286
2.15k
      return false;
287
2.45k
  }
288
2.45k
  
return true305
;
289
2.45k
}
290
291
bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI,
292
    RegSeqInfo &CompatibleRSI,
293
305
    std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
294
305
  for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(),
295
3.02k
      MOE = RSI.Instr->operands_end(); MOp != MOE; 
++MOp2.72k
) {
296
2.72k
    if (!MOp->isReg())
297
1.20k
      continue;
298
1.51k
    if (PreviousRegSeqByReg[MOp->getReg()].empty())
299
1.51k
      continue;
300
4
    for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) {
301
4
      CompatibleRSI = PreviousRegSeq[MI];
302
4
      if (RSI == CompatibleRSI)
303
0
        continue;
304
4
      if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan))
305
4
        return true;
306
4
    }
307
4
  }
308
305
  
return false301
;
309
305
}
310
311
bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI,
312
    RegSeqInfo &CompatibleRSI,
313
301
    std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
314
301
  unsigned NeededUndefs = 4 - RSI.UndefReg.size();
315
301
  if (PreviousRegSeqByUndefCount[NeededUndefs].empty())
316
300
    return false;
317
1
  std::vector<MachineInstr *> &MIs =
318
1
      PreviousRegSeqByUndefCount[NeededUndefs];
319
1
  CompatibleRSI = PreviousRegSeq[MIs.back()];
320
1
  tryMergeVector(&CompatibleRSI, &RSI, RemapChan);
321
1
  return true;
322
1
}
323
324
305
void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) {
325
305
  for (DenseMap<unsigned, unsigned>::const_iterator
326
1.43k
  It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; 
++It1.13k
) {
327
1.13k
    PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr);
328
1.13k
  }
329
305
  PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr);
330
305
  PreviousRegSeq[RSI.Instr] = RSI;
331
305
}
332
333
2.29k
bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
334
2.29k
  if (skipFunction(Fn.getFunction()))
335
0
    return false;
336
2.29k
337
2.29k
  const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
338
2.29k
  TII = ST.getInstrInfo();
339
2.29k
  MRI = &Fn.getRegInfo();
340
2.29k
341
2.29k
  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
342
4.76k
       MBB != MBBe; 
++MBB2.47k
) {
343
2.47k
    MachineBasicBlock *MB = &*MBB;
344
2.47k
    PreviousRegSeq.clear();
345
2.47k
    PreviousRegSeqByReg.clear();
346
2.47k
    PreviousRegSeqByUndefCount.clear();
347
2.47k
348
2.47k
    for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();
349
63.7k
         MII != MIIE; 
++MII61.3k
) {
350
61.3k
      MachineInstr &MI = *MII;
351
61.3k
      if (MI.getOpcode() != R600::REG_SEQUENCE) {
352
58.8k
        if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
353
276
          unsigned Reg = MI.getOperand(1).getReg();
354
276
          for (MachineRegisterInfo::def_instr_iterator
355
276
               It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end();
356
552
               It != E; 
++It276
) {
357
276
            RemoveMI(&(*It));
358
276
          }
359
276
        }
360
58.8k
        continue;
361
58.8k
      }
362
2.45k
363
2.45k
      RegSeqInfo RSI(*MRI, &MI);
364
2.45k
365
2.45k
      // All uses of MI are swizzeable ?
366
2.45k
      unsigned Reg = MI.getOperand(0).getReg();
367
2.45k
      if (!areAllUsesSwizzeable(Reg))
368
2.15k
        continue;
369
305
370
305
      LLVM_DEBUG({
371
305
        dbgs() << "Trying to optimize ";
372
305
        MI.dump();
373
305
      });
374
305
375
305
      RegSeqInfo CandidateRSI;
376
305
      std::vector<std::pair<unsigned, unsigned>> RemapChan;
377
305
      LLVM_DEBUG(dbgs() << "Using common slots...\n";);
378
305
      if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) {
379
4
        // Remove CandidateRSI mapping
380
4
        RemoveMI(CandidateRSI.Instr);
381
4
        MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
382
4
        trackRSI(RSI);
383
4
        continue;
384
4
      }
385
301
      LLVM_DEBUG(dbgs() << "Using free slots...\n";);
386
301
      RemapChan.clear();
387
301
      if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) {
388
1
        RemoveMI(CandidateRSI.Instr);
389
1
        MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
390
1
        trackRSI(RSI);
391
1
        continue;
392
1
      }
393
300
      //Failed to merge
394
300
      trackRSI(RSI);
395
300
    }
396
2.47k
  }
397
2.29k
  return false;
398
2.29k
}
399
400
280
llvm::FunctionPass *llvm::createR600VectorRegMerger() {
401
280
  return new R600VectorRegMerger();
402
280
}