Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
/// \file
9
/// This file implements the targeting of the RegisterBankInfo class for
10
/// AMDGPU.
11
/// \todo This should be generated by TableGen.
12
//===----------------------------------------------------------------------===//
13
14
#include "AMDGPURegisterBankInfo.h"
15
#include "AMDGPUInstrInfo.h"
16
#include "AMDGPUSubtarget.h"
17
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
18
#include "SIMachineFunctionInfo.h"
19
#include "SIRegisterInfo.h"
20
#include "llvm/ADT/SmallSet.h"
21
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
22
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
23
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
24
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
25
#include "llvm/CodeGen/TargetRegisterInfo.h"
26
#include "llvm/CodeGen/TargetSubtargetInfo.h"
27
#include "llvm/IR/Constants.h"
28
29
#define GET_TARGET_REGBANK_IMPL
30
#include "AMDGPUGenRegisterBank.inc"
31
32
// This file will be TableGen'ed at some point.
33
#include "AMDGPUGenRegisterBankInfo.def"
34
35
using namespace llvm;
36
37
namespace {
38
39
// Observer to apply a register bank to new registers created by LegalizerHelper.
40
class ApplyRegBankMapping final : public GISelChangeObserver {
41
private:
42
  MachineRegisterInfo &MRI;
43
  const RegisterBank *NewBank;
44
  SmallVector<MachineInstr *, 4> NewInsts;
45
46
public:
47
  ApplyRegBankMapping(MachineRegisterInfo &MRI_, const RegisterBank *RB)
48
48
    : MRI(MRI_), NewBank(RB) {}
49
50
48
  ~ApplyRegBankMapping() {
51
48
    for (MachineInstr *MI : NewInsts)
52
528
      applyBank(*MI);
53
48
  }
54
55
  /// Set any registers that don't have a set register class or bank to SALU.
56
528
  void applyBank(MachineInstr &MI) {
57
1.39k
    for (MachineOperand &Op : MI.operands()) {
58
1.39k
      if (!Op.isReg())
59
208
        continue;
60
1.18k
61
1.18k
      Register Reg = Op.getReg();
62
1.18k
      if (MRI.getRegClassOrRegBank(Reg))
63
976
        continue;
64
208
65
208
      const RegisterBank *RB = NewBank;
66
208
      // FIXME: This might not be enough to detect when SCC should be used.
67
208
      if (MRI.getType(Reg) == LLT::scalar(1))
68
32
        RB = (NewBank == &AMDGPU::SGPRRegBank ?
69
32
              &AMDGPU::SCCRegBank : 
&AMDGPU::VCCRegBank0
);
70
208
71
208
      MRI.setRegBank(Reg, *RB);
72
208
    }
73
528
  }
74
75
0
  void erasingInstr(MachineInstr &MI) override {}
76
77
528
  void createdInstr(MachineInstr &MI) override {
78
528
    // At this point, the instruction was just inserted and has no operands.
79
528
    NewInsts.push_back(&MI);
80
528
  }
81
82
16
  void changingInstr(MachineInstr &MI) override {}
83
16
  void changedInstr(MachineInstr &MI) override {}
84
};
85
86
}
87
AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
88
    : AMDGPUGenRegisterBankInfo(),
89
3.64k
      TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
90
3.64k
91
3.64k
  // HACK: Until this is fully tablegen'd.
92
3.64k
  static bool AlreadyInit = false;
93
3.64k
  if (AlreadyInit)
94
114
    return;
95
3.52k
96
3.52k
  AlreadyInit = true;
97
3.52k
98
3.52k
  const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID);
99
3.52k
  (void)RBSGPR;
100
3.52k
  assert(&RBSGPR == &AMDGPU::SGPRRegBank);
101
3.52k
102
3.52k
  const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID);
103
3.52k
  (void)RBVGPR;
104
3.52k
  assert(&RBVGPR == &AMDGPU::VGPRRegBank);
105
3.52k
106
3.52k
}
107
108
unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
109
                                          const RegisterBank &Src,
110
4.82k
                                          unsigned Size) const {
111
4.82k
  // TODO: Should there be a UniformVGPRRegBank which can use readfirstlane?
112
4.82k
  if (Dst.getID() == AMDGPU::SGPRRegBankID &&
113
4.82k
      
Src.getID() == AMDGPU::VGPRRegBankID2.24k
) {
114
289
    return std::numeric_limits<unsigned>::max();
115
289
  }
116
4.53k
117
4.53k
  // Bool values are tricky, because the meaning is based on context. The SCC
118
4.53k
  // and VCC banks are for the natural scalar and vector conditions produced by
119
4.53k
  // a compare.
120
4.53k
  //
121
4.53k
  // Legalization doesn't know about the necessary context, so an s1 use may
122
4.53k
  // have been a truncate from an arbitrary value, in which case a copy (lowered
123
4.53k
  // as a compare with 0) needs to be inserted.
124
4.53k
  if (Size == 1 &&
125
4.53k
      
(344
Dst.getID() == AMDGPU::SCCRegBankID344
||
126
344
       
Dst.getID() == AMDGPU::SGPRRegBankID245
) &&
127
4.53k
      
(156
Src.getID() == AMDGPU::SGPRRegBankID156
||
128
156
       
Src.getID() == AMDGPU::VGPRRegBankID112
||
129
156
       
Src.getID() == AMDGPU::VCCRegBankID84
))
130
117
    return std::numeric_limits<unsigned>::max();
131
4.41k
132
4.41k
  if (Dst.getID() == AMDGPU::SCCRegBankID &&
133
4.41k
      
Src.getID() == AMDGPU::VCCRegBankID0
)
134
0
    return std::numeric_limits<unsigned>::max();
135
4.41k
136
4.41k
  return RegisterBankInfo::copyCost(Dst, Src, Size);
137
4.41k
}
138
139
unsigned AMDGPURegisterBankInfo::getBreakDownCost(
140
  const ValueMapping &ValMapping,
141
255
  const RegisterBank *CurBank) const {
142
255
  // Check if this is a breakdown for G_LOAD to move the pointer from SGPR to
143
255
  // VGPR.
144
255
  // FIXME: Is there a better way to do this?
145
255
  if (ValMapping.NumBreakDowns >= 2 || 
ValMapping.BreakDown[0].Length >= 640
)
146
255
    return 10; // This is expensive.
147
0
148
0
  assert(ValMapping.NumBreakDowns == 2 &&
149
0
         ValMapping.BreakDown[0].Length == 32 &&
150
0
         ValMapping.BreakDown[0].StartIdx == 0 &&
151
0
         ValMapping.BreakDown[1].Length == 32 &&
152
0
         ValMapping.BreakDown[1].StartIdx == 32 &&
153
0
         ValMapping.BreakDown[0].RegBank == ValMapping.BreakDown[1].RegBank);
154
0
155
0
  // 32-bit extract of a 64-bit value is just access of a subregister, so free.
156
0
  // TODO: Cost of 0 hits assert, though it's not clear it's what we really
157
0
  // want.
158
0
159
0
  // TODO: 32-bit insert to a 64-bit SGPR may incur a non-free copy due to SGPR
160
0
  // alignment restrictions, but this probably isn't important.
161
0
  return 1;
162
0
}
163
164
const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass(
165
8.49k
    const TargetRegisterClass &RC) const {
166
8.49k
167
8.49k
  if (TRI->isSGPRClass(&RC))
168
4.64k
    return getRegBank(AMDGPU::SGPRRegBankID);
169
3.84k
170
3.84k
  return getRegBank(AMDGPU::VGPRRegBankID);
171
3.84k
}
172
173
template <unsigned NumOps>
174
RegisterBankInfo::InstructionMappings
175
AMDGPURegisterBankInfo::addMappingFromTable(
176
    const MachineInstr &MI, const MachineRegisterInfo &MRI,
177
    const std::array<unsigned, NumOps> RegSrcOpIdx,
178
57
    ArrayRef<OpRegBankEntry<NumOps>> Table) const {
179
57
180
57
  InstructionMappings AltMappings;
181
57
182
57
  SmallVector<const ValueMapping *, 10> Operands(MI.getNumOperands());
183
57
184
57
  unsigned Sizes[NumOps];
185
225
  for (unsigned I = 0; I < NumOps; 
++I168
) {
186
168
    Register Reg = MI.getOperand(RegSrcOpIdx[I]).getReg();
187
168
    Sizes[I] = getSizeInBits(Reg, MRI, *TRI);
188
168
  }
189
57
190
110
  for (unsigned I = 0, E = MI.getNumExplicitDefs(); I != E; 
++I53
) {
191
53
    unsigned SizeI = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
192
53
    Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
193
53
  }
194
57
195
57
  unsigned MappingID = 0;
196
196
  for (const auto &Entry : Table) {
197
788
    for (unsigned I = 0; I < NumOps; 
++I592
) {
198
592
      int OpIdx = RegSrcOpIdx[I];
199
592
      Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[I], Sizes[I]);
200
592
    }
201
196
202
196
    AltMappings.push_back(&getInstructionMapping(MappingID++, Entry.Cost,
203
196
                                                 getOperandsMapping(Operands),
204
196
                                                 Operands.size()));
205
196
  }
206
57
207
57
  return AltMappings;
208
57
}
llvm::SmallVector<llvm::RegisterBankInfo::InstructionMapping const*, 4u> llvm::AMDGPURegisterBankInfo::addMappingFromTable<3u>(llvm::MachineInstr const&, llvm::MachineRegisterInfo const&, std::__1::array<unsigned int, 3u>, llvm::ArrayRef<llvm::AMDGPURegisterBankInfo::OpRegBankEntry<3u> >) const
Line
Count
Source
178
48
    ArrayRef<OpRegBankEntry<NumOps>> Table) const {
179
48
180
48
  InstructionMappings AltMappings;
181
48
182
48
  SmallVector<const ValueMapping *, 10> Operands(MI.getNumOperands());
183
48
184
48
  unsigned Sizes[NumOps];
185
192
  for (unsigned I = 0; I < NumOps; 
++I144
) {
186
144
    Register Reg = MI.getOperand(RegSrcOpIdx[I]).getReg();
187
144
    Sizes[I] = getSizeInBits(Reg, MRI, *TRI);
188
144
  }
189
48
190
96
  for (unsigned I = 0, E = MI.getNumExplicitDefs(); I != E; 
++I48
) {
191
48
    unsigned SizeI = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
192
48
    Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
193
48
  }
194
48
195
48
  unsigned MappingID = 0;
196
168
  for (const auto &Entry : Table) {
197
672
    for (unsigned I = 0; I < NumOps; 
++I504
) {
198
504
      int OpIdx = RegSrcOpIdx[I];
199
504
      Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[I], Sizes[I]);
200
504
    }
201
168
202
168
    AltMappings.push_back(&getInstructionMapping(MappingID++, Entry.Cost,
203
168
                                                 getOperandsMapping(Operands),
204
168
                                                 Operands.size()));
205
168
  }
206
48
207
48
  return AltMappings;
208
48
}
llvm::SmallVector<llvm::RegisterBankInfo::InstructionMapping const*, 4u> llvm::AMDGPURegisterBankInfo::addMappingFromTable<4u>(llvm::MachineInstr const&, llvm::MachineRegisterInfo const&, std::__1::array<unsigned int, 4u>, llvm::ArrayRef<llvm::AMDGPURegisterBankInfo::OpRegBankEntry<4u> >) const
Line
Count
Source
178
5
    ArrayRef<OpRegBankEntry<NumOps>> Table) const {
179
5
180
5
  InstructionMappings AltMappings;
181
5
182
5
  SmallVector<const ValueMapping *, 10> Operands(MI.getNumOperands());
183
5
184
5
  unsigned Sizes[NumOps];
185
25
  for (unsigned I = 0; I < NumOps; 
++I20
) {
186
20
    Register Reg = MI.getOperand(RegSrcOpIdx[I]).getReg();
187
20
    Sizes[I] = getSizeInBits(Reg, MRI, *TRI);
188
20
  }
189
5
190
10
  for (unsigned I = 0, E = MI.getNumExplicitDefs(); I != E; 
++I5
) {
191
5
    unsigned SizeI = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
192
5
    Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
193
5
  }
194
5
195
5
  unsigned MappingID = 0;
196
20
  for (const auto &Entry : Table) {
197
100
    for (unsigned I = 0; I < NumOps; 
++I80
) {
198
80
      int OpIdx = RegSrcOpIdx[I];
199
80
      Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[I], Sizes[I]);
200
80
    }
201
20
202
20
    AltMappings.push_back(&getInstructionMapping(MappingID++, Entry.Cost,
203
20
                                                 getOperandsMapping(Operands),
204
20
                                                 Operands.size()));
205
20
  }
206
5
207
5
  return AltMappings;
208
5
}
Unexecuted instantiation: llvm::SmallVector<llvm::RegisterBankInfo::InstructionMapping const*, 4u> llvm::AMDGPURegisterBankInfo::addMappingFromTable<2u>(llvm::MachineInstr const&, llvm::MachineRegisterInfo const&, std::__1::array<unsigned int, 2u>, llvm::ArrayRef<llvm::AMDGPURegisterBankInfo::OpRegBankEntry<2u> >) const
llvm::SmallVector<llvm::RegisterBankInfo::InstructionMapping const*, 4u> llvm::AMDGPURegisterBankInfo::addMappingFromTable<1u>(llvm::MachineInstr const&, llvm::MachineRegisterInfo const&, std::__1::array<unsigned int, 1u>, llvm::ArrayRef<llvm::AMDGPURegisterBankInfo::OpRegBankEntry<1u> >) const
Line
Count
Source
178
4
    ArrayRef<OpRegBankEntry<NumOps>> Table) const {
179
4
180
4
  InstructionMappings AltMappings;
181
4
182
4
  SmallVector<const ValueMapping *, 10> Operands(MI.getNumOperands());
183
4
184
4
  unsigned Sizes[NumOps];
185
8
  for (unsigned I = 0; I < NumOps; 
++I4
) {
186
4
    Register Reg = MI.getOperand(RegSrcOpIdx[I]).getReg();
187
4
    Sizes[I] = getSizeInBits(Reg, MRI, *TRI);
188
4
  }
189
4
190
4
  for (unsigned I = 0, E = MI.getNumExplicitDefs(); I != E; 
++I0
) {
191
0
    unsigned SizeI = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
192
0
    Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
193
0
  }
194
4
195
4
  unsigned MappingID = 0;
196
8
  for (const auto &Entry : Table) {
197
16
    for (unsigned I = 0; I < NumOps; 
++I8
) {
198
8
      int OpIdx = RegSrcOpIdx[I];
199
8
      Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[I], Sizes[I]);
200
8
    }
201
8
202
8
    AltMappings.push_back(&getInstructionMapping(MappingID++, Entry.Cost,
203
8
                                                 getOperandsMapping(Operands),
204
8
                                                 Operands.size()));
205
8
  }
206
4
207
4
  return AltMappings;
208
4
}
209
210
RegisterBankInfo::InstructionMappings
211
AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsic(
212
51
    const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
213
51
  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
214
51
  case Intrinsic::amdgcn_readlane: {
215
4
    static const OpRegBankEntry<3> Table[2] = {
216
4
      // Perfectly legal.
217
4
      { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
218
4
219
4
      // Need a readfirstlane for the index.
220
4
      { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
221
4
    };
222
4
223
4
    const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
224
4
    return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
225
51
  }
226
51
  case Intrinsic::amdgcn_writelane: {
227
5
    static const OpRegBankEntry<4> Table[4] = {
228
5
      // Perfectly legal.
229
5
      { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
230
5
231
5
      // Need readfirstlane of first op
232
5
      { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
233
5
234
5
      // Need readfirstlane of second op
235
5
      { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
236
5
237
5
      // Need readfirstlane of both ops
238
5
      { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
239
5
    };
240
5
241
5
    // rsrc, voffset, offset
242
5
    const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
243
5
    return addMappingFromTable<4>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
244
51
  }
245
51
  default:
246
42
    return RegisterBankInfo::getInstrAlternativeMappings(MI);
247
51
  }
248
51
}
249
250
RegisterBankInfo::InstructionMappings
251
AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
252
48
    const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
253
48
254
48
  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
255
48
  case Intrinsic::amdgcn_buffer_load: {
256
8
    static const OpRegBankEntry<3> Table[4] = {
257
8
      // Perfectly legal.
258
8
      { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
259
8
      { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
260
8
261
8
      // Waterfall loop needed for rsrc. In the worst case this will execute
262
8
      // approximately an extra 10 * wavesize + 2 instructions.
263
8
      { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
264
8
      { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1000 }
265
8
    };
266
8
267
8
    // rsrc, voffset, offset
268
8
    const std::array<unsigned, 3> RegSrcOpIdx = { { 2, 3, 4 } };
269
8
    return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
270
48
  }
271
48
  case Intrinsic::amdgcn_s_buffer_load: {
272
0
    static const OpRegBankEntry<2> Table[4] = {
273
0
      // Perfectly legal.
274
0
      { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
275
0
276
0
      // Only need 1 register in loop
277
0
      { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
278
0
279
0
      // Have to waterfall the resource.
280
0
      { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
281
0
282
0
      // Have to waterfall the resource, and the offset.
283
0
      { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
284
0
    };
285
0
286
0
    // rsrc, offset
287
0
    const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
288
0
    return addMappingFromTable<2>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
289
48
  }
290
48
  case Intrinsic::amdgcn_ds_ordered_add:
291
8
  case Intrinsic::amdgcn_ds_ordered_swap: {
292
8
    // VGPR = M0, VGPR
293
8
    static const OpRegBankEntry<3> Table[2] = {
294
8
      // Perfectly legal.
295
8
      { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID  }, 1 },
296
8
297
8
      // Need a readfirstlane for m0
298
8
      { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
299
8
    };
300
8
301
8
    const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
302
8
    return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
303
8
  }
304
8
  case Intrinsic::amdgcn_s_sendmsg:
305
4
  case Intrinsic::amdgcn_s_sendmsghalt: {
306
4
    static const OpRegBankEntry<1> Table[2] = {
307
4
      // Perfectly legal.
308
4
      { { AMDGPU::SGPRRegBankID }, 1 },
309
4
310
4
      // Need readlane
311
4
      { { AMDGPU::VGPRRegBankID }, 3 }
312
4
    };
313
4
314
4
    const std::array<unsigned, 1> RegSrcOpIdx = { { 2 } };
315
4
    return addMappingFromTable<1>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
316
4
  }
317
28
  default:
318
28
    return RegisterBankInfo::getInstrAlternativeMappings(MI);
319
48
  }
320
48
}
321
322
188
static bool isInstrUniform(const MachineInstr &MI) {
323
188
  if (!MI.hasOneMemOperand())
324
0
    return false;
325
188
326
188
  const MachineMemOperand *MMO = *MI.memoperands_begin();
327
188
  return AMDGPUInstrInfo::isUniformMMO(MMO);
328
188
}
329
330
RegisterBankInfo::InstructionMappings
331
AMDGPURegisterBankInfo::getInstrAlternativeMappings(
332
2.67k
    const MachineInstr &MI) const {
333
2.67k
334
2.67k
  const MachineFunction &MF = *MI.getParent()->getParent();
335
2.67k
  const MachineRegisterInfo &MRI = MF.getRegInfo();
336
2.67k
337
2.67k
338
2.67k
  InstructionMappings AltMappings;
339
2.67k
  switch (MI.getOpcode()) {
340
2.67k
  case TargetOpcode::G_AND:
341
117
  case TargetOpcode::G_OR:
342
117
  case TargetOpcode::G_XOR: {
343
117
    unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
344
117
345
117
    if (Size == 1) {
346
30
      // s_{and|or|xor}_b32 set scc when the result of the 32-bit op is not 0.
347
30
      const InstructionMapping &SCCMapping = getInstructionMapping(
348
30
        1, 1, getOperandsMapping(
349
30
          {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, Size),
350
30
           AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
351
30
           AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
352
30
        3); // Num Operands
353
30
      AltMappings.push_back(&SCCMapping);
354
30
355
30
      const InstructionMapping &SGPRMapping = getInstructionMapping(
356
30
        1, 1, getOperandsMapping(
357
30
          {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
358
30
           AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
359
30
           AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
360
30
        3); // Num Operands
361
30
      AltMappings.push_back(&SGPRMapping);
362
30
363
30
      const InstructionMapping &VCCMapping0 = getInstructionMapping(
364
30
        2, 10, getOperandsMapping(
365
30
          {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
366
30
              AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
367
30
              AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size)}),
368
30
        3); // Num Operands
369
30
      AltMappings.push_back(&VCCMapping0);
370
30
      return AltMappings;
371
30
    }
372
87
373
87
    if (Size != 64)
374
24
      break;
375
63
376
63
    const InstructionMapping &SSMapping = getInstructionMapping(
377
63
      1, 1, getOperandsMapping(
378
63
        {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
379
63
         AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
380
63
         AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
381
63
      3); // Num Operands
382
63
    AltMappings.push_back(&SSMapping);
383
63
384
63
    const InstructionMapping &VVMapping = getInstructionMapping(
385
63
      2, 2, getOperandsMapping(
386
63
        {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
387
63
         AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
388
63
         AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
389
63
      3); // Num Operands
390
63
    AltMappings.push_back(&VVMapping);
391
63
392
63
    const InstructionMapping &SVMapping = getInstructionMapping(
393
63
      3, 3, getOperandsMapping(
394
63
        {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
395
63
         AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size),
396
63
         AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
397
63
      3); // Num Operands
398
63
    AltMappings.push_back(&SVMapping);
399
63
400
63
    // SGPR in LHS is slightly preferrable, so make it VS more expensive than
401
63
    // SV.
402
63
    const InstructionMapping &VSMapping = getInstructionMapping(
403
63
      3, 4, getOperandsMapping(
404
63
        {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
405
63
         AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
406
63
         AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}),
407
63
      3); // Num Operands
408
63
    AltMappings.push_back(&VSMapping);
409
63
    break;
410
63
  }
411
63
  case TargetOpcode::G_LOAD: {
412
16
    unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
413
16
    LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
414
16
    // FIXME: Should we be hard coding the size for these mappings?
415
16
    if (isInstrUniform(MI)) {
416
8
      const InstructionMapping &SSMapping = getInstructionMapping(
417
8
          1, 1, getOperandsMapping(
418
8
                    {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
419
8
                     AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
420
8
          2); // Num Operands
421
8
      AltMappings.push_back(&SSMapping);
422
8
    }
423
16
424
16
    const InstructionMapping &VVMapping = getInstructionMapping(
425
16
        2, 1, getOperandsMapping(
426
16
                  {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy),
427
16
                   AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
428
16
        2); // Num Operands
429
16
    AltMappings.push_back(&VVMapping);
430
16
431
16
    // It may be possible to have a vgpr = load sgpr mapping here, because
432
16
    // the mubuf instructions support this kind of load, but probably for only
433
16
    // gfx7 and older.  However, the addressing mode matching in the instruction
434
16
    // selector should be able to do a better job of detecting and selecting
435
16
    // these kinds of loads from the vgpr = load vgpr mapping.
436
16
437
16
    return AltMappings;
438
63
439
63
  }
440
225
  case TargetOpcode::G_ICMP: {
441
225
    unsigned Size = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
442
225
    const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
443
225
      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
444
225
                          nullptr, // Predicate operand.
445
225
                          AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
446
225
                          AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
447
225
      4); // Num Operands
448
225
    AltMappings.push_back(&SSMapping);
449
225
450
225
    const InstructionMapping &SVMapping = getInstructionMapping(2, 1,
451
225
      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
452
225
                          nullptr, // Predicate operand.
453
225
                          AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
454
225
                          AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
455
225
      4); // Num Operands
456
225
    AltMappings.push_back(&SVMapping);
457
225
458
225
    const InstructionMapping &VSMapping = getInstructionMapping(3, 1,
459
225
      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
460
225
                          nullptr, // Predicate operand.
461
225
                          AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
462
225
                          AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
463
225
      4); // Num Operands
464
225
    AltMappings.push_back(&VSMapping);
465
225
466
225
    const InstructionMapping &VVMapping = getInstructionMapping(4, 1,
467
225
      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
468
225
                          nullptr, // Predicate operand.
469
225
                          AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
470
225
                          AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
471
225
      4); // Num Operands
472
225
    AltMappings.push_back(&VVMapping);
473
225
474
225
    return AltMappings;
475
63
  }
476
80
  case TargetOpcode::G_SELECT: {
477
80
    unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
478
80
    const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
479
80
      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
480
80
                          AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
481
80
                          AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
482
80
                          AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
483
80
      4); // Num Operands
484
80
    AltMappings.push_back(&SSMapping);
485
80
486
80
    const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
487
80
      getOperandsMapping({AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
488
80
                          AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
489
80
                          AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
490
80
                          AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
491
80
      4); // Num Operands
492
80
    AltMappings.push_back(&VVMapping);
493
80
494
80
    return AltMappings;
495
63
  }
496
63
  case TargetOpcode::G_SMIN:
497
28
  case TargetOpcode::G_SMAX:
498
28
  case TargetOpcode::G_UMIN:
499
28
  case TargetOpcode::G_UMAX: {
500
28
    static const OpRegBankEntry<3> Table[4] = {
501
28
      { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
502
28
      { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
503
28
      { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
504
28
505
28
      // Scalar requires cmp+select, and extends if 16-bit.
506
28
      // FIXME: Should there be separate costs for 32 and 16-bit
507
28
      { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 3 }
508
28
    };
509
28
510
28
    const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 1, 2 } };
511
28
    return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
512
28
  }
513
28
  case TargetOpcode::G_UADDE:
514
20
  case TargetOpcode::G_USUBE:
515
20
  case TargetOpcode::G_SADDE:
516
20
  case TargetOpcode::G_SSUBE: {
517
20
    unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
518
20
    const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
519
20
      getOperandsMapping(
520
20
        {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
521
20
         AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
522
20
         AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
523
20
         AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
524
20
         AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1)}),
525
20
      5); // Num Operands
526
20
    AltMappings.push_back(&SSMapping);
527
20
528
20
    const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
529
20
      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
530
20
                          AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
531
20
                          AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
532
20
                          AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
533
20
                          AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
534
20
      5); // Num Operands
535
20
    AltMappings.push_back(&VVMapping);
536
20
    return AltMappings;
537
20
  }
538
55
  case AMDGPU::G_BRCOND: {
539
55
    assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
540
55
541
55
    const InstructionMapping &SMapping = getInstructionMapping(
542
55
      1, 1, getOperandsMapping(
543
55
        {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), nullptr}),
544
55
      2); // Num Operands
545
55
    AltMappings.push_back(&SMapping);
546
55
547
55
    const InstructionMapping &VMapping = getInstructionMapping(
548
55
      1, 1, getOperandsMapping(
549
55
        {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), nullptr }),
550
55
      2); // Num Operands
551
55
    AltMappings.push_back(&VMapping);
552
55
    return AltMappings;
553
20
  }
554
51
  case AMDGPU::G_INTRINSIC:
555
51
    return getInstrAlternativeMappingsIntrinsic(MI, MRI);
556
48
  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
557
48
    return getInstrAlternativeMappingsIntrinsicWSideEffects(MI, MRI);
558
2.03k
  default:
559
2.03k
    break;
560
2.12k
  }
561
2.12k
  return RegisterBankInfo::getInstrAlternativeMappings(MI);
562
2.12k
}
563
564
void AMDGPURegisterBankInfo::split64BitValueForMapping(
565
  MachineIRBuilder &B,
566
  SmallVector<Register, 2> &Regs,
567
  LLT HalfTy,
568
204
  Register Reg) const {
569
204
  assert(HalfTy.getSizeInBits() == 32);
570
204
  MachineRegisterInfo *MRI = B.getMRI();
571
204
  Register LoLHS = MRI->createGenericVirtualRegister(HalfTy);
572
204
  Register HiLHS = MRI->createGenericVirtualRegister(HalfTy);
573
204
  const RegisterBank *Bank = getRegBank(Reg, *MRI, *TRI);
574
204
  MRI->setRegBank(LoLHS, *Bank);
575
204
  MRI->setRegBank(HiLHS, *Bank);
576
204
577
204
  Regs.push_back(LoLHS);
578
204
  Regs.push_back(HiLHS);
579
204
580
204
  B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
581
204
    .addDef(LoLHS)
582
204
    .addDef(HiLHS)
583
204
    .addUse(Reg);
584
204
}
585
586
/// Replace the current type each register in \p Regs has with \p NewTy
587
static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef<Register> Regs,
588
270
                          LLT NewTy) {
589
540
  for (Register Reg : Regs) {
590
540
    assert(MRI.getType(Reg).getSizeInBits() == NewTy.getSizeInBits());
591
540
    MRI.setType(Reg, NewTy);
592
540
  }
593
270
}
594
595
190
static LLT getHalfSizedType(LLT Ty) {
596
190
  if (Ty.isVector()) {
597
80
    assert(Ty.getNumElements() % 2 == 0);
598
80
    return LLT::scalarOrVector(Ty.getNumElements() / 2, Ty.getElementType());
599
80
  }
600
110
601
110
  assert(Ty.getSizeInBits() % 2 == 0);
602
110
  return LLT::scalar(Ty.getSizeInBits() / 2);
603
110
}
604
605
/// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
606
/// any of the required SGPR operands are VGPRs, perform a waterfall loop to
607
/// execute the instruction for each unique combination of values in all lanes
608
/// in the wave. The block will be split such that rest of the instructions are
609
/// moved to a new block.
610
///
611
/// Essentially performs this loop:
612
//
613
/// Save Execution Mask
614
/// For (Lane : Wavefront) {
615
///   Enable Lane, Disable all other lanes
616
///   SGPR = read SGPR value for current lane from VGPR
617
///   VGPRResult[Lane] = use_op SGPR
618
/// }
619
/// Restore Execution Mask
620
///
621
/// There is additional complexity to try for compare values to identify the
622
/// unique values used.
623
void AMDGPURegisterBankInfo::executeInWaterfallLoop(
624
  MachineInstr &MI, MachineRegisterInfo &MRI,
625
34
  ArrayRef<unsigned> OpIndices) const {
626
34
  MachineFunction *MF = MI.getParent()->getParent();
627
34
  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
628
34
  const SIInstrInfo *TII = ST.getInstrInfo();
629
34
  MachineBasicBlock::iterator I(MI);
630
34
631
34
  MachineBasicBlock &MBB = *MI.getParent();
632
34
  const DebugLoc &DL = MI.getDebugLoc();
633
34
634
34
  // Use a set to avoid extra readfirstlanes in the case where multiple operands
635
34
  // are the same register.
636
34
  SmallSet<Register, 4> SGPROperandRegs;
637
42
  for (unsigned Op : OpIndices) {
638
42
    assert(MI.getOperand(Op).isUse());
639
42
    Register Reg = MI.getOperand(Op).getReg();
640
42
    const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI);
641
42
    if (OpBank->getID() == AMDGPU::VGPRRegBankID)
642
22
      SGPROperandRegs.insert(Reg);
643
42
  }
644
34
645
34
  // No operands need to be replaced, so no need to loop.
646
34
  if (SGPROperandRegs.empty())
647
14
    return;
648
20
649
20
  MachineIRBuilder B(MI);
650
20
  SmallVector<Register, 4> ResultRegs;
651
20
  SmallVector<Register, 4> InitResultRegs;
652
20
  SmallVector<Register, 4> PhiRegs;
653
20
  for (MachineOperand &Def : MI.defs()) {
654
20
    LLT ResTy = MRI.getType(Def.getReg());
655
20
    const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
656
20
    ResultRegs.push_back(Def.getReg());
657
20
    Register InitReg = B.buildUndef(ResTy).getReg(0);
658
20
    Register PhiReg = MRI.createGenericVirtualRegister(ResTy);
659
20
    InitResultRegs.push_back(InitReg);
660
20
    PhiRegs.push_back(PhiReg);
661
20
    MRI.setRegBank(PhiReg, *DefBank);
662
20
    MRI.setRegBank(InitReg, *DefBank);
663
20
  }
664
20
665
20
  Register SaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
666
20
  Register InitSaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
667
20
668
20
  // Don't bother using generic instructions/registers for the exec mask.
669
20
  B.buildInstr(TargetOpcode::IMPLICIT_DEF)
670
20
    .addDef(InitSaveExecReg);
671
20
672
20
  Register PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
673
20
  Register NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
674
20
675
20
  // To insert the loop we need to split the block. Move everything before this
676
20
  // point to a new block, and insert a new empty block before this instruction.
677
20
  MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
678
20
  MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
679
20
  MachineBasicBlock *RestoreExecBB = MF->CreateMachineBasicBlock();
680
20
  MachineFunction::iterator MBBI(MBB);
681
20
  ++MBBI;
682
20
  MF->insert(MBBI, LoopBB);
683
20
  MF->insert(MBBI, RestoreExecBB);
684
20
  MF->insert(MBBI, RemainderBB);
685
20
686
20
  LoopBB->addSuccessor(RestoreExecBB);
687
20
  LoopBB->addSuccessor(LoopBB);
688
20
689
20
  // Move the rest of the block into a new block.
690
20
  RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
691
20
  RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
692
20
693
20
  MBB.addSuccessor(LoopBB);
694
20
  RestoreExecBB->addSuccessor(RemainderBB);
695
20
696
20
  B.setInsertPt(*LoopBB, LoopBB->end());
697
20
698
20
  B.buildInstr(TargetOpcode::PHI)
699
20
    .addDef(PhiExec)
700
20
    .addReg(InitSaveExecReg)
701
20
    .addMBB(&MBB)
702
20
    .addReg(NewExec)
703
20
    .addMBB(LoopBB);
704
20
705
20
  for (auto Result : zip(InitResultRegs, ResultRegs, PhiRegs)) {
706
20
    B.buildInstr(TargetOpcode::G_PHI)
707
20
      .addDef(std::get<2>(Result))
708
20
      .addReg(std::get<0>(Result)) // Initial value / implicit_def
709
20
      .addMBB(&MBB)
710
20
      .addReg(std::get<1>(Result)) // Mid-loop value.
711
20
      .addMBB(LoopBB);
712
20
  }
713
20
714
20
  // Move the instruction into the loop.
715
20
  LoopBB->splice(LoopBB->end(), &MBB, I);
716
20
  I = std::prev(LoopBB->end());
717
20
718
20
  B.setInstr(*I);
719
20
720
20
  Register CondReg;
721
20
722
84
  for (MachineOperand &Op : MI.uses()) {
723
84
    if (!Op.isReg())
724
36
      continue;
725
48
726
48
    assert(!Op.isDef());
727
48
    if (SGPROperandRegs.count(Op.getReg())) {
728
22
      LLT OpTy = MRI.getType(Op.getReg());
729
22
      unsigned OpSize = OpTy.getSizeInBits();
730
22
731
22
      // Can only do a readlane of 32-bit pieces.
732
22
      if (OpSize == 32) {
733
10
        // Avoid extra copies in the simple case of one 32-bit register.
734
10
        Register CurrentLaneOpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
735
10
        MRI.setType(CurrentLaneOpReg, OpTy);
736
10
737
10
        constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
738
10
        // Read the next variant <- also loop target.
739
10
        BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentLaneOpReg)
740
10
          .addReg(Op.getReg());
741
10
742
10
        Register NewCondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
743
10
        bool First = CondReg == AMDGPU::NoRegister;
744
10
        if (First)
745
8
          CondReg = NewCondReg;
746
10
747
10
        // Compare the just read M0 value to all possible Idx values.
748
10
        B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
749
10
          .addDef(NewCondReg)
750
10
          .addReg(CurrentLaneOpReg)
751
10
          .addReg(Op.getReg());
752
10
        Op.setReg(CurrentLaneOpReg);
753
10
754
10
        if (!First) {
755
2
          Register AndReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
756
2
757
2
          // If there are multiple operands to consider, and the conditions.
758
2
          B.buildInstr(AMDGPU::S_AND_B64)
759
2
            .addDef(AndReg)
760
2
            .addReg(NewCondReg)
761
2
            .addReg(CondReg);
762
2
          CondReg = AndReg;
763
2
        }
764
12
      } else {
765
12
        LLT S32 = LLT::scalar(32);
766
12
        SmallVector<Register, 8> ReadlanePieces;
767
12
768
12
        // The compares can be done as 64-bit, but the extract needs to be done
769
12
        // in 32-bit pieces.
770
12
771
12
        bool Is64 = OpSize % 64 == 0;
772
12
773
12
        LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : 
LLT::scalar(32)0
;
774
12
        unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
775
12
                                          : 
AMDGPU::V_CMP_EQ_U32_e640
;
776
12
777
12
        // The compares can be done as 64-bit, but the extract needs to be done
778
12
        // in 32-bit pieces.
779
12
780
12
        // Insert the unmerge before the loop.
781
12
782
12
        B.setMBB(MBB);
783
12
        auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg());
784
12
        B.setInstr(*I);
785
12
786
12
        unsigned NumPieces = Unmerge->getNumOperands() - 1;
787
36
        for (unsigned PieceIdx = 0; PieceIdx != NumPieces; 
++PieceIdx24
) {
788
24
          unsigned UnmergePiece = Unmerge.getReg(PieceIdx);
789
24
790
24
          Register CurrentLaneOpReg;
791
24
          if (Is64) {
792
24
            Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
793
24
            Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32);
794
24
795
24
            MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass);
796
24
            MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass);
797
24
            MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass);
798
24
799
24
            // Read the next variant <- also loop target.
800
24
            BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
801
24
                    CurrentLaneOpRegLo)
802
24
              .addReg(UnmergePiece, 0, AMDGPU::sub0);
803
24
804
24
            // Read the next variant <- also loop target.
805
24
            BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
806
24
                    CurrentLaneOpRegHi)
807
24
              .addReg(UnmergePiece, 0, AMDGPU::sub1);
808
24
809
24
            CurrentLaneOpReg =
810
24
                B.buildMerge(LLT::scalar(64),
811
24
                             {CurrentLaneOpRegLo, CurrentLaneOpRegHi})
812
24
                    .getReg(0);
813
24
814
24
            MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass);
815
24
816
24
            if (OpTy.getScalarSizeInBits() == 64) {
817
0
              // If we need to produce a 64-bit element vector, so use the
818
0
              // merged pieces
819
0
              ReadlanePieces.push_back(CurrentLaneOpReg);
820
24
            } else {
821
24
              // 32-bit element type.
822
24
              ReadlanePieces.push_back(CurrentLaneOpRegLo);
823
24
              ReadlanePieces.push_back(CurrentLaneOpRegHi);
824
24
            }
825
24
          } else {
826
0
            CurrentLaneOpReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
827
0
            MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass);
828
0
            MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass);
829
0
830
0
            // Read the next variant <- also loop target.
831
0
            BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
832
0
                    CurrentLaneOpReg)
833
0
              .addReg(UnmergePiece);
834
0
            ReadlanePieces.push_back(CurrentLaneOpReg);
835
0
          }
836
24
837
24
          Register NewCondReg
838
24
            = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
839
24
          bool First = CondReg == AMDGPU::NoRegister;
840
24
          if (First)
841
12
            CondReg = NewCondReg;
842
24
843
24
          B.buildInstr(CmpOp)
844
24
            .addDef(NewCondReg)
845
24
            .addReg(CurrentLaneOpReg)
846
24
            .addReg(UnmergePiece);
847
24
848
24
          if (!First) {
849
12
            Register AndReg
850
12
              = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
851
12
852
12
            // If there are multiple operands to consider, and the conditions.
853
12
            B.buildInstr(AMDGPU::S_AND_B64)
854
12
              .addDef(AndReg)
855
12
              .addReg(NewCondReg)
856
12
              .addReg(CondReg);
857
12
            CondReg = AndReg;
858
12
          }
859
24
        }
860
12
861
12
        // FIXME: Build merge seems to switch to CONCAT_VECTORS but not
862
12
        // BUILD_VECTOR
863
12
        if (OpTy.isVector()) {
864
12
          auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
865
12
          Op.setReg(Merge.getReg(0));
866
12
        } else {
867
0
          auto Merge = B.buildMerge(OpTy, ReadlanePieces);
868
0
          Op.setReg(Merge.getReg(0));
869
0
        }
870
12
871
12
        MRI.setRegBank(Op.getReg(), getRegBank(AMDGPU::SGPRRegBankID));
872
12
      }
873
22
    }
874
48
  }
875
20
876
20
  B.setInsertPt(*LoopBB, LoopBB->end());
877
20
878
20
  // Update EXEC, save the original EXEC value to VCC.
879
20
  B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64)
880
20
    .addDef(NewExec)
881
20
    .addReg(CondReg, RegState::Kill);
882
20
883
20
  MRI.setSimpleHint(NewExec, CondReg);
884
20
885
20
  // Update EXEC, switch all done bits to 0 and all todo bits to 1.
886
20
  B.buildInstr(AMDGPU::S_XOR_B64_term)
887
20
    .addDef(AMDGPU::EXEC)
888
20
    .addReg(AMDGPU::EXEC)
889
20
    .addReg(NewExec);
890
20
891
20
  // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
892
20
  // s_cbranch_scc0?
893
20
894
20
  // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
895
20
  B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ)
896
20
    .addMBB(LoopBB);
897
20
898
20
  // Save the EXEC mask before the loop.
899
20
  BuildMI(MBB, MBB.end(), DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg)
900
20
    .addReg(AMDGPU::EXEC);
901
20
902
20
  // Restore the EXEC mask after the loop.
903
20
  B.setMBB(*RestoreExecBB);
904
20
  B.buildInstr(AMDGPU::S_MOV_B64_term)
905
20
    .addDef(AMDGPU::EXEC)
906
20
    .addReg(SaveExecReg);
907
20
}
908
909
// Legalize an operand that must be an SGPR by inserting a readfirstlane.
910
void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
911
52
    MachineInstr &MI, MachineRegisterInfo &MRI, unsigned OpIdx) const {
912
52
  Register Reg = MI.getOperand(OpIdx).getReg();
913
52
  const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
914
52
  if (Bank != &AMDGPU::VGPRRegBank)
915
28
    return;
916
24
917
24
  MachineIRBuilder B(MI);
918
24
  Register SGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
919
24
  B.buildInstr(AMDGPU::V_READFIRSTLANE_B32)
920
24
    .addDef(SGPR)
921
24
    .addReg(Reg);
922
24
923
24
  const TargetRegisterClass *Constrained =
924
24
      constrainGenericRegister(Reg, AMDGPU::VGPR_32RegClass, MRI);
925
24
  (void)Constrained;
926
24
  assert(Constrained && "Failed to constrain readfirstlane src reg");
927
24
928
24
  MI.getOperand(OpIdx).setReg(SGPR);
929
24
}
930
931
// When regbankselect repairs registers, it will insert a repair instruction
932
// which defines the repaired register.  Then it calls applyMapping and expects
933
// that the targets will either delete or rewrite the originally wrote to the
934
// repaired registers.  Beccause of this, we end up in a situation where
935
// we have 2 instructions defining the same registers.
936
static MachineInstr *getOtherVRegDef(const MachineRegisterInfo &MRI,
937
                                     Register Reg,
938
32
                                     const MachineInstr &MI) {
939
32
  // Is there some way we can assert that there are exactly 2 def instructions?
940
32
  for (MachineInstr &Other : MRI.def_instructions(Reg)) {
941
32
    if (&Other != &MI)
942
32
      return &Other;
943
32
  }
944
32
945
32
  
return nullptr0
;
946
32
}
947
948
bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
949
                        const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
950
172
                                              MachineRegisterInfo &MRI) const {
951
172
  Register DstReg = MI.getOperand(0).getReg();
952
172
  const LLT LoadTy =  MRI.getType(DstReg);
953
172
  unsigned LoadSize = LoadTy.getSizeInBits();
954
172
  const unsigned MaxNonSmrdLoadSize = 128;
955
172
  // 128-bit loads are supported for all instruction types.
956
172
  if (LoadSize <= MaxNonSmrdLoadSize)
957
140
    return false;
958
32
959
32
  SmallVector<unsigned, 16> DefRegs(OpdMapper.getVRegs(0));
960
32
  SmallVector<unsigned, 1> SrcRegs(OpdMapper.getVRegs(1));
961
32
962
32
  // If the pointer is an SGPR, we have nothing to do.
963
32
  if (SrcRegs.empty())
964
16
    return false;
965
16
966
16
  assert(LoadSize % MaxNonSmrdLoadSize == 0);
967
16
968
16
  // We want to get the repair instruction now, because it will help us
969
16
  // determine which instruction the legalizer inserts that will also
970
16
  // write to DstReg.
971
16
  MachineInstr *RepairInst = getOtherVRegDef(MRI, DstReg, MI);
972
16
973
16
  // RegBankSelect only emits scalar types, so we need to reset the pointer
974
16
  // operand to a pointer type.
975
16
  Register BasePtrReg = SrcRegs[0];
976
16
  LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());
977
16
  MRI.setType(BasePtrReg, PtrTy);
978
16
979
16
  MachineIRBuilder B(MI);
980
16
981
16
  unsigned SplitElts =
982
16
      MaxNonSmrdLoadSize / LoadTy.getScalarType().getSizeInBits();
983
16
  const LLT LoadSplitTy =  LLT::vector(SplitElts, LoadTy.getScalarType());
984
16
  ApplyRegBankMapping O(MRI, &AMDGPU::VGPRRegBank);
985
16
  GISelObserverWrapper Observer(&O);
986
16
  B.setChangeObserver(Observer);
987
16
  LegalizerHelper Helper(B.getMF(), Observer, B);
988
16
  if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized)
989
0
    return false;
990
16
991
16
  // At this point, the legalizer has split the original load into smaller
992
16
  // loads.  At the end of lowering, it inserts an instruction (LegalizedInst)
993
16
  // that combines the outputs of the lower loads and writes it to DstReg.
994
16
  // The register bank selector has also added the RepairInst which writes to
995
16
  // DstReg as well.
996
16
997
16
  MachineInstr *LegalizedInst = getOtherVRegDef(MRI, DstReg, *RepairInst);
998
16
999
16
  // Replace the output of the LegalizedInst with a temporary register, since
1000
16
  // RepairInst already defines DstReg.
1001
16
  Register TmpReg = MRI.createGenericVirtualRegister(MRI.getType(DstReg));
1002
16
  LegalizedInst->getOperand(0).setReg(TmpReg);
1003
16
  B.setInsertPt(*RepairInst->getParent(), RepairInst);
1004
16
1005
160
  for (unsigned DefIdx = 0, e = DefRegs.size(); DefIdx != e; 
++DefIdx144
) {
1006
144
    Register IdxReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
1007
144
    B.buildConstant(IdxReg, DefIdx);
1008
144
    MRI.setRegBank(IdxReg, getRegBank(AMDGPU::VGPRRegBankID));
1009
144
    B.buildExtractVectorElement(DefRegs[DefIdx], TmpReg, IdxReg);
1010
144
  }
1011
16
1012
16
  MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
1013
16
  return true;
1014
16
}
1015
1016
// For cases where only a single copy is inserted for matching register banks.
1017
// Replace the register in the instruction operand
1018
static void substituteSimpleCopyRegs(
1019
34
  const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx) {
1020
34
  SmallVector<unsigned, 1> SrcReg(OpdMapper.getVRegs(OpIdx));
1021
34
  if (!SrcReg.empty()) {
1022
14
    assert(SrcReg.size() == 1);
1023
14
    OpdMapper.getMI().getOperand(OpIdx).setReg(SrcReg[0]);
1024
14
  }
1025
34
}
1026
1027
void AMDGPURegisterBankInfo::applyMappingImpl(
1028
2.87k
    const OperandsMapper &OpdMapper) const {
1029
2.87k
  MachineInstr &MI = OpdMapper.getMI();
1030
2.87k
  unsigned Opc = MI.getOpcode();
1031
2.87k
  MachineRegisterInfo &MRI = OpdMapper.getMRI();
1032
2.87k
  switch (Opc) {
1033
2.87k
  case AMDGPU::G_SELECT: {
1034
160
    Register DstReg = MI.getOperand(0).getReg();
1035
160
    LLT DstTy = MRI.getType(DstReg);
1036
160
    if (DstTy.getSizeInBits() != 64)
1037
96
      break;
1038
64
1039
64
    LLT HalfTy = getHalfSizedType(DstTy);
1040
64
1041
64
    SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
1042
64
    SmallVector<Register, 1> Src0Regs(OpdMapper.getVRegs(1));
1043
64
    SmallVector<Register, 2> Src1Regs(OpdMapper.getVRegs(2));
1044
64
    SmallVector<Register, 2> Src2Regs(OpdMapper.getVRegs(3));
1045
64
1046
64
    // All inputs are SGPRs, nothing special to do.
1047
64
    if (DefRegs.empty()) {
1048
8
      assert(Src1Regs.empty() && Src2Regs.empty());
1049
8
      break;
1050
8
    }
1051
56
1052
56
    MachineIRBuilder B(MI);
1053
56
    if (Src0Regs.empty())
1054
56
      Src0Regs.push_back(MI.getOperand(1).getReg());
1055
0
    else {
1056
0
      assert(Src0Regs.size() == 1);
1057
0
    }
1058
56
1059
56
    if (Src1Regs.empty())
1060
0
      split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
1061
56
    else {
1062
56
      setRegsToType(MRI, Src1Regs, HalfTy);
1063
56
    }
1064
56
1065
56
    if (Src2Regs.empty())
1066
0
      split64BitValueForMapping(B, Src2Regs, HalfTy, MI.getOperand(3).getReg());
1067
56
    else
1068
56
      setRegsToType(MRI, Src2Regs, HalfTy);
1069
56
1070
56
    setRegsToType(MRI, DefRegs, HalfTy);
1071
56
1072
56
    B.buildSelect(DefRegs[0], Src0Regs[0], Src1Regs[0], Src2Regs[0]);
1073
56
    B.buildSelect(DefRegs[1], Src0Regs[0], Src1Regs[1], Src2Regs[1]);
1074
56
1075
56
    MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
1076
56
    MI.eraseFromParent();
1077
56
    return;
1078
56
  }
1079
234
  case AMDGPU::G_AND:
1080
234
  case AMDGPU::G_OR:
1081
234
  case AMDGPU::G_XOR: {
1082
234
    // 64-bit and is only available on the SALU, so split into 2 32-bit ops if
1083
234
    // there is a VGPR input.
1084
234
    Register DstReg = MI.getOperand(0).getReg();
1085
234
    LLT DstTy = MRI.getType(DstReg);
1086
234
    if (DstTy.getSizeInBits() != 64)
1087
108
      break;
1088
126
1089
126
    LLT HalfTy = getHalfSizedType(DstTy);
1090
126
    SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
1091
126
    SmallVector<Register, 2> Src0Regs(OpdMapper.getVRegs(1));
1092
126
    SmallVector<Register, 2> Src1Regs(OpdMapper.getVRegs(2));
1093
126
1094
126
    // All inputs are SGPRs, nothing special to do.
1095
126
    if (DefRegs.empty()) {
1096
24
      assert(Src0Regs.empty() && Src1Regs.empty());
1097
24
      break;
1098
24
    }
1099
102
1100
102
    assert(DefRegs.size() == 2);
1101
102
    assert(Src0Regs.size() == Src1Regs.size() &&
1102
102
           (Src0Regs.empty() || Src0Regs.size() == 2));
1103
102
1104
102
    // Depending on where the source registers came from, the generic code may
1105
102
    // have decided to split the inputs already or not. If not, we still need to
1106
102
    // extract the values.
1107
102
    MachineIRBuilder B(MI);
1108
102
1109
102
    if (Src0Regs.empty())
1110
102
      split64BitValueForMapping(B, Src0Regs, HalfTy, MI.getOperand(1).getReg());
1111
0
    else
1112
0
      setRegsToType(MRI, Src0Regs, HalfTy);
1113
102
1114
102
    if (Src1Regs.empty())
1115
102
      split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
1116
0
    else
1117
0
      setRegsToType(MRI, Src1Regs, HalfTy);
1118
102
1119
102
    setRegsToType(MRI, DefRegs, HalfTy);
1120
102
1121
102
    B.buildInstr(Opc)
1122
102
      .addDef(DefRegs[0])
1123
102
      .addUse(Src0Regs[0])
1124
102
      .addUse(Src1Regs[0]);
1125
102
1126
102
    B.buildInstr(Opc)
1127
102
      .addDef(DefRegs[1])
1128
102
      .addUse(Src0Regs[1])
1129
102
      .addUse(Src1Regs[1]);
1130
102
1131
102
    MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
1132
102
    MI.eraseFromParent();
1133
102
    return;
1134
102
  }
1135
102
  case AMDGPU::G_ADD:
1136
12
  case AMDGPU::G_SUB:
1137
12
  case AMDGPU::G_MUL: {
1138
12
    Register DstReg = MI.getOperand(0).getReg();
1139
12
    LLT DstTy = MRI.getType(DstReg);
1140
12
    if (DstTy != LLT::scalar(16))
1141
12
      break;
1142
0
1143
0
    const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
1144
0
    if (DstBank == &AMDGPU::VGPRRegBank)
1145
0
      break;
1146
0
1147
0
    // 16-bit operations are VALU only, but can be promoted to 32-bit SALU.
1148
0
    MachineFunction *MF = MI.getParent()->getParent();
1149
0
    MachineIRBuilder B(MI);
1150
0
    ApplyRegBankMapping ApplySALU(MRI, &AMDGPU::SGPRRegBank);
1151
0
    GISelObserverWrapper Observer(&ApplySALU);
1152
0
    LegalizerHelper Helper(*MF, Observer, B);
1153
0
1154
0
    if (Helper.widenScalar(MI, 0, LLT::scalar(32)) !=
1155
0
        LegalizerHelper::Legalized)
1156
0
      llvm_unreachable("widen scalar should have succeeded");
1157
0
    return;
1158
0
  }
1159
56
  case AMDGPU::G_SMIN:
1160
56
  case AMDGPU::G_SMAX:
1161
56
  case AMDGPU::G_UMIN:
1162
56
  case AMDGPU::G_UMAX: {
1163
56
    Register DstReg = MI.getOperand(0).getReg();
1164
56
    const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
1165
56
    if (DstBank == &AMDGPU::VGPRRegBank)
1166
24
      break;
1167
32
1168
32
    MachineFunction *MF = MI.getParent()->getParent();
1169
32
    MachineIRBuilder B(MI);
1170
32
    ApplyRegBankMapping ApplySALU(MRI, &AMDGPU::SGPRRegBank);
1171
32
    GISelObserverWrapper Observer(&ApplySALU);
1172
32
    LegalizerHelper Helper(*MF, Observer, B);
1173
32
1174
32
    // Turn scalar min/max into a compare and select.
1175
32
    LLT Ty = MRI.getType(DstReg);
1176
32
    LLT S32 = LLT::scalar(32);
1177
32
    LLT S16 = LLT::scalar(16);
1178
32
1179
32
    if (Ty == S16) {
1180
16
      // Need to widen to s32, and expand as cmp + select.
1181
16
      if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
1182
16
        
llvm_unreachable0
("widenScalar should have succeeded");
1183
16
1184
16
      // FIXME: This is relying on widenScalar leaving MI in place.
1185
16
      if (Helper.lower(MI, 0, S32) != LegalizerHelper::Legalized)
1186
16
        
llvm_unreachable0
("lower should have succeeded");
1187
16
    } else {
1188
16
      if (Helper.lower(MI, 0, Ty) != LegalizerHelper::Legalized)
1189
16
        
llvm_unreachable0
("lower should have succeeded");
1190
16
    }
1191
32
1192
32
    return;
1193
32
  }
1194
64
  case AMDGPU::G_SEXT:
1195
64
  case AMDGPU::G_ZEXT: {
1196
64
    Register SrcReg = MI.getOperand(1).getReg();
1197
64
    LLT SrcTy = MRI.getType(SrcReg);
1198
64
    bool Signed = Opc == AMDGPU::G_SEXT;
1199
64
1200
64
    MachineIRBuilder B(MI);
1201
64
    const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
1202
64
1203
64
    Register DstReg = MI.getOperand(0).getReg();
1204
64
    LLT DstTy = MRI.getType(DstReg);
1205
64
    if (DstTy.isScalar() &&
1206
64
        SrcBank != &AMDGPU::SGPRRegBank &&
1207
64
        
SrcBank != &AMDGPU::SCCRegBank44
&&
1208
64
        
SrcBank != &AMDGPU::VCCRegBank32
&&
1209
64
        // FIXME: Should handle any type that round to s64 when irregular
1210
64
        // breakdowns supported.
1211
64
        
DstTy.getSizeInBits() == 6420
&&
1212
64
        
SrcTy.getSizeInBits() <= 3212
) {
1213
12
      const LLT S32 = LLT::scalar(32);
1214
12
      SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
1215
12
1216
12
      // Extend to 32-bit, and then extend the low half.
1217
12
      if (Signed) {
1218
6
        // TODO: Should really be buildSExtOrCopy
1219
6
        B.buildSExtOrTrunc(DefRegs[0], SrcReg);
1220
6
1221
6
        // Replicate sign bit from 32-bit extended part.
1222
6
        auto ShiftAmt = B.buildConstant(S32, 31);
1223
6
        MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
1224
6
        B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt);
1225
6
      } else {
1226
6
        B.buildZExtOrTrunc(DefRegs[0], SrcReg);
1227
6
        B.buildConstant(DefRegs[1], 0);
1228
6
      }
1229
12
1230
12
      MRI.setRegBank(DstReg, *SrcBank);
1231
12
      MI.eraseFromParent();
1232
12
      return;
1233
12
    }
1234
52
1235
52
    if (SrcTy != LLT::scalar(1))
1236
8
      return;
1237
44
1238
44
    if (SrcBank == &AMDGPU::SCCRegBank || 
SrcBank == &AMDGPU::VCCRegBank32
) {
1239
24
      SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
1240
24
1241
24
      const RegisterBank *DstBank = SrcBank == &AMDGPU::SCCRegBank ?
1242
12
        &AMDGPU::SGPRRegBank : &AMDGPU::VGPRRegBank;
1243
24
1244
24
      unsigned DstSize = DstTy.getSizeInBits();
1245
24
      // 64-bit select is SGPR only
1246
24
      const bool UseSel64 = DstSize > 32 &&
1247
24
        
SrcBank->getID() == AMDGPU::SCCRegBankID8
;
1248
24
1249
24
      // TODO: Should s16 select be legal?
1250
24
      LLT SelType = UseSel64 ? 
LLT::scalar(64)4
:
LLT::scalar(32)20
;
1251
24
      auto True = B.buildConstant(SelType, Signed ? 
-112
:
112
);
1252
24
      auto False = B.buildConstant(SelType, 0);
1253
24
1254
24
      MRI.setRegBank(True.getReg(0), *DstBank);
1255
24
      MRI.setRegBank(False.getReg(0), *DstBank);
1256
24
      MRI.setRegBank(DstReg, *DstBank);
1257
24
1258
24
      if (DstSize > 32 && 
SrcBank->getID() != AMDGPU::SCCRegBankID8
) {
1259
4
        B.buildSelect(DefRegs[0], SrcReg, True, False);
1260
4
        B.buildCopy(DefRegs[1], DefRegs[0]);
1261
20
      } else if (DstSize < 32) {
1262
8
        auto Sel = B.buildSelect(SelType, SrcReg, True, False);
1263
8
        MRI.setRegBank(Sel.getReg(0), *DstBank);
1264
8
        B.buildTrunc(DstReg, Sel);
1265
12
      } else {
1266
12
        B.buildSelect(DstReg, SrcReg, True, False);
1267
12
      }
1268
24
1269
24
      MI.eraseFromParent();
1270
24
      return;
1271
24
    }
1272
20
1273
20
    // Fixup the case with an s1 src that isn't a condition register. Use shifts
1274
20
    // instead of introducing a compare to avoid an unnecessary condition
1275
20
    // register (and since there's no scalar 16-bit compares).
1276
20
    auto Ext = B.buildAnyExt(DstTy, SrcReg);
1277
20
    auto ShiftAmt = B.buildConstant(LLT::scalar(32), DstTy.getSizeInBits() - 1);
1278
20
    auto Shl = B.buildShl(DstTy, Ext, ShiftAmt);
1279
20
1280
20
    if (MI.getOpcode() == AMDGPU::G_SEXT)
1281
10
      B.buildAShr(DstReg, Shl, ShiftAmt);
1282
10
    else
1283
10
      B.buildLShr(DstReg, Shl, ShiftAmt);
1284
20
1285
20
    MRI.setRegBank(DstReg, *SrcBank);
1286
20
    MRI.setRegBank(Ext.getReg(0), *SrcBank);
1287
20
    MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
1288
20
    MRI.setRegBank(Shl.getReg(0), *SrcBank);
1289
20
    MI.eraseFromParent();
1290
20
    return;
1291
20
  }
1292
20
  case AMDGPU::G_EXTRACT_VECTOR_ELT:
1293
10
    applyDefaultMapping(OpdMapper);
1294
10
    executeInWaterfallLoop(MI, MRI, { 2 });
1295
10
    return;
1296
108
  case AMDGPU::G_INTRINSIC: {
1297
108
    switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1298
108
    case Intrinsic::amdgcn_s_buffer_load: {
1299
8
      // FIXME: Move to G_INTRINSIC_W_SIDE_EFFECTS
1300
8
      executeInWaterfallLoop(MI, MRI, { 2, 3 });
1301
8
      return;
1302
108
    }
1303
108
    case Intrinsic::amdgcn_readlane: {
1304
8
      substituteSimpleCopyRegs(OpdMapper, 2);
1305
8
1306
8
      assert(empty(OpdMapper.getVRegs(0)));
1307
8
      assert(empty(OpdMapper.getVRegs(3)));
1308
8
1309
8
      // Make sure the index is an SGPR. It doesn't make sense to run this in a
1310
8
      // waterfall loop, so assume it's a uniform value.
1311
8
      constrainOpWithReadfirstlane(MI, MRI, 3); // Index
1312
8
      return;
1313
108
    }
1314
108
    case Intrinsic::amdgcn_writelane: {
1315
10
      assert(empty(OpdMapper.getVRegs(0)));
1316
10
      assert(empty(OpdMapper.getVRegs(2)));
1317
10
      assert(empty(OpdMapper.getVRegs(3)));
1318
10
1319
10
      substituteSimpleCopyRegs(OpdMapper, 4); // VGPR input val
1320
10
      constrainOpWithReadfirstlane(MI, MRI, 2); // Source value
1321
10
      constrainOpWithReadfirstlane(MI, MRI, 3); // Index
1322
10
      return;
1323
108
    }
1324
108
    default:
1325
82
      break;
1326
82
    }
1327
82
    break;
1328
82
  }
1329
98
  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
1330
98
    switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1331
98
    case Intrinsic::amdgcn_buffer_load: {
1332
16
      executeInWaterfallLoop(MI, MRI, { 2 });
1333
16
      return;
1334
98
    }
1335
98
    case Intrinsic::amdgcn_ds_ordered_add:
1336
16
    case Intrinsic::amdgcn_ds_ordered_swap: {
1337
16
      // This is only allowed to execute with 1 lane, so readfirstlane is safe.
1338
16
      assert(empty(OpdMapper.getVRegs(0)));
1339
16
      substituteSimpleCopyRegs(OpdMapper, 3);
1340
16
      constrainOpWithReadfirstlane(MI, MRI, 2); // M0
1341
16
      return;
1342
16
    }
1343
16
    case Intrinsic::amdgcn_s_sendmsg:
1344
8
    case Intrinsic::amdgcn_s_sendmsghalt: {
1345
8
      // FIXME: Should this use a waterfall loop?
1346
8
      constrainOpWithReadfirstlane(MI, MRI, 2); // M0
1347
8
      return;
1348
8
    }
1349
58
    default:
1350
58
      break;
1351
58
    }
1352
58
    break;
1353
58
  }
1354
172
  case AMDGPU::G_LOAD: {
1355
172
    if (applyMappingWideLoad(MI, OpdMapper, MRI))
1356
16
      return;
1357
156
    break;
1358
156
  }
1359
1.96k
  default:
1360
1.96k
    break;
1361
2.53k
  }
1362
2.53k
1363
2.53k
  return applyDefaultMapping(OpdMapper);
1364
2.53k
}
1365
1366
564
bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
1367
564
  const MachineFunction &MF = *MI.getParent()->getParent();
1368
564
  const MachineRegisterInfo &MRI = MF.getRegInfo();
1369
1.80k
  for (unsigned i = 0, e = MI.getNumOperands();i != e; 
++i1.24k
) {
1370
1.58k
    if (!MI.getOperand(i).isReg())
1371
4
      continue;
1372
1.58k
    Register Reg = MI.getOperand(i).getReg();
1373
1.58k
    if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
1374
942
      if (Bank->getID() == AMDGPU::VGPRRegBankID)
1375
340
        return false;
1376
602
1377
602
      assert(Bank->getID() == AMDGPU::SGPRRegBankID ||
1378
602
             Bank->getID() == AMDGPU::SCCRegBankID);
1379
602
    }
1380
1.58k
  }
1381
564
  
return true224
;
1382
564
}
1383
1384
const RegisterBankInfo::InstructionMapping &
1385
173
AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr &MI) const {
1386
173
  const MachineFunction &MF = *MI.getParent()->getParent();
1387
173
  const MachineRegisterInfo &MRI = MF.getRegInfo();
1388
173
  SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
1389
173
1390
732
  for (unsigned i = 0, e = MI.getNumOperands(); i != e; 
++i559
) {
1391
559
    unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
1392
559
    unsigned BankID = Size == 1 ? 
AMDGPU::SCCRegBankID40
:
AMDGPU::SGPRRegBankID519
;
1393
559
    OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
1394
559
  }
1395
173
  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
1396
173
                               MI.getNumOperands());
1397
173
}
1398
1399
const RegisterBankInfo::InstructionMapping &
1400
259
AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
1401
259
  const MachineFunction &MF = *MI.getParent()->getParent();
1402
259
  const MachineRegisterInfo &MRI = MF.getRegInfo();
1403
259
  SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
1404
259
  unsigned OpdIdx = 0;
1405
259
1406
259
  unsigned Size0 = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1407
259
  OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
1408
259
1409
259
  if (MI.getOperand(OpdIdx).isIntrinsicID())
1410
24
    OpdsMapping[OpdIdx++] = nullptr;
1411
259
1412
259
  Register Reg1 = MI.getOperand(OpdIdx).getReg();
1413
259
  unsigned Size1 = getSizeInBits(Reg1, MRI, *TRI);
1414
259
1415
259
  unsigned DefaultBankID = Size1 == 1 ?
1416
211
    
AMDGPU::VCCRegBankID48
: AMDGPU::VGPRRegBankID;
1417
259
  unsigned Bank1 = getRegBankID(Reg1, MRI, *TRI, DefaultBankID);
1418
259
1419
259
  OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(Bank1, Size1);
1420
259
1421
566
  for (unsigned e = MI.getNumOperands(); OpdIdx != e; 
++OpdIdx307
) {
1422
307
    const MachineOperand &MO = MI.getOperand(OpdIdx);
1423
307
    if (!MO.isReg())
1424
0
      continue;
1425
307
1426
307
    unsigned Size = getSizeInBits(MO.getReg(), MRI, *TRI);
1427
307
    unsigned BankID = Size == 1 ? 
AMDGPU::VCCRegBankID32
:
AMDGPU::VGPRRegBankID275
;
1428
307
    OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(BankID, Size);
1429
307
  }
1430
259
1431
259
  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
1432
259
                               MI.getNumOperands());
1433
259
}
1434
1435
const RegisterBankInfo::InstructionMapping &
1436
114
AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
1437
114
  const MachineFunction &MF = *MI.getParent()->getParent();
1438
114
  const MachineRegisterInfo &MRI = MF.getRegInfo();
1439
114
  SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
1440
114
1441
622
  for (unsigned I = 0, E = MI.getNumOperands(); I != E; 
++I508
) {
1442
508
    const MachineOperand &Op = MI.getOperand(I);
1443
508
    if (!Op.isReg())
1444
168
      continue;
1445
340
1446
340
    unsigned Size = getSizeInBits(Op.getReg(), MRI, *TRI);
1447
340
    OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
1448
340
  }
1449
114
1450
114
  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
1451
114
                               MI.getNumOperands());
1452
114
}
1453
1454
const RegisterBankInfo::InstructionMapping &
1455
172
AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
1456
172
1457
172
  const MachineFunction &MF = *MI.getParent()->getParent();
1458
172
  const MachineRegisterInfo &MRI = MF.getRegInfo();
1459
172
  SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
1460
172
  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1461
172
  LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
1462
172
  unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
1463
172
1464
172
  const ValueMapping *ValMapping;
1465
172
  const ValueMapping *PtrMapping;
1466
172
1467
172
  if (isInstrUniform(MI)) {
1468
155
    // We have a uniform instruction so we want to use an SMRD load
1469
155
    ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1470
155
    PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
1471
155
  } else {
1472
17
    ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy);
1473
17
    // FIXME: What would happen if we used SGPRRegBankID here?
1474
17
    PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
1475
17
  }
1476
172
1477
172
  OpdsMapping[0] = ValMapping;
1478
172
  OpdsMapping[1] = PtrMapping;
1479
172
  const RegisterBankInfo::InstructionMapping &Mapping = getInstructionMapping(
1480
172
      1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands());
1481
172
  return Mapping;
1482
172
1483
172
  // FIXME: Do we want to add a mapping for FLAT load, or should we just
1484
172
  // handle that during instruction selection?
1485
172
}
1486
1487
unsigned
1488
AMDGPURegisterBankInfo::getRegBankID(Register Reg,
1489
                                     const MachineRegisterInfo &MRI,
1490
                                     const TargetRegisterInfo &TRI,
1491
2.64k
                                     unsigned Default) const {
1492
2.64k
1493
2.64k
  const RegisterBank *Bank = getRegBank(Reg, MRI, TRI);
1494
2.64k
  return Bank ? 
Bank->getID()2.60k
:
Default48
;
1495
2.64k
}
1496
1497
///
1498
/// This function must return a legal mapping, because
1499
/// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
1500
/// in RegBankSelect::Mode::Fast.  Any mapping that would cause a
1501
/// VGPR to SGPR generated is illegal.
1502
///
1503
const RegisterBankInfo::InstructionMapping &
1504
6.45k
AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
1505
6.45k
  const MachineFunction &MF = *MI.getParent()->getParent();
1506
6.45k
  const MachineRegisterInfo &MRI = MF.getRegInfo();
1507
6.45k
1508
6.45k
  if (MI.isRegSequence()) {
1509
16
    // If any input is a VGPR, the result must be a VGPR. The default handling
1510
16
    // assumes any copy between banks is legal.
1511
16
    unsigned BankID = AMDGPU::SGPRRegBankID;
1512
16
1513
28
    for (unsigned I = 1, E = MI.getNumOperands(); I != E; 
I += 212
) {
1514
24
      auto OpBank = getRegBankID(MI.getOperand(I).getReg(), MRI, *TRI);
1515
24
      // It doesn't make sense to use vcc or scc banks here, so just ignore
1516
24
      // them.
1517
24
      if (OpBank != AMDGPU::SGPRRegBankID) {
1518
12
        BankID = AMDGPU::VGPRRegBankID;
1519
12
        break;
1520
12
      }
1521
24
    }
1522
16
    unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1523
16
1524
16
    const ValueMapping &ValMap = getValueMapping(0, Size, getRegBank(BankID));
1525
16
    return getInstructionMapping(
1526
16
        1, /*Cost*/ 1,
1527
16
        /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1);
1528
16
  }
1529
6.43k
1530
6.43k
  // The default handling is broken and doesn't handle illegal SGPR->VGPR copies
1531
6.43k
  // properly.
1532
6.43k
  //
1533
6.43k
  // TODO: There are additional exec masking dependencies to analyze.
1534
6.43k
  if (MI.getOpcode() == TargetOpcode::G_PHI) {
1535
96
    // TODO: Generate proper invalid bank enum.
1536
96
    int ResultBank = -1;
1537
96
1538
204
    for (unsigned I = 1, E = MI.getNumOperands(); I != E; 
I += 2108
) {
1539
168
      unsigned Reg = MI.getOperand(I).getReg();
1540
168
      const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
1541
168
1542
168
      // FIXME: Assuming VGPR for any undetermined inputs.
1543
168
      if (!Bank || 
Bank->getID() == AMDGPU::VGPRRegBankID162
) {
1544
52
        ResultBank = AMDGPU::VGPRRegBankID;
1545
52
        break;
1546
52
      }
1547
116
1548
116
      unsigned OpBank = Bank->getID();
1549
116
      // scc, scc -> sgpr
1550
116
      if (OpBank == AMDGPU::SCCRegBankID) {
1551
40
        // There's only one SCC register, so a phi requires copying to SGPR.
1552
40
        OpBank = AMDGPU::SGPRRegBankID;
1553
76
      } else if (OpBank == AMDGPU::VCCRegBankID) {
1554
28
        // vcc, vcc -> vcc
1555
28
        // vcc, sgpr -> vgpr
1556
28
        if (ResultBank != -1 && 
ResultBank != AMDGPU::VCCRegBankID12
) {
1557
8
          ResultBank = AMDGPU::VGPRRegBankID;
1558
8
          break;
1559
8
        }
1560
108
      }
1561
108
1562
108
      ResultBank = OpBank;
1563
108
    }
1564
96
1565
96
    assert(ResultBank != -1);
1566
96
1567
96
    unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1568
96
1569
96
    const ValueMapping &ValMap =
1570
96
        getValueMapping(0, Size, getRegBank(ResultBank));
1571
96
    return getInstructionMapping(
1572
96
        1, /*Cost*/ 1,
1573
96
        /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1);
1574
96
  }
1575
6.34k
1576
6.34k
  const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI);
1577
6.34k
  if (Mapping.isValid())
1578
3.57k
    return Mapping;
1579
2.76k
1580
2.76k
  SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
1581
2.76k
1582
2.76k
  switch (MI.getOpcode()) {
1583
2.76k
  default:
1584
2
    return getInvalidInstructionMapping();
1585
2.76k
1586
2.76k
  case AMDGPU::G_AND:
1587
234
  case AMDGPU::G_OR:
1588
234
  case AMDGPU::G_XOR: {
1589
234
    unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1590
234
    if (Size == 1) {
1591
60
      const RegisterBank *DstBank
1592
60
        = getRegBank(MI.getOperand(0).getReg(), MRI, *TRI);
1593
60
1594
60
      unsigned TargetBankID = -1;
1595
60
      unsigned BankLHS = -1;
1596
60
      unsigned BankRHS = -1;
1597
60
      if (DstBank) {
1598
20
        TargetBankID = DstBank->getID();
1599
20
        if (DstBank == &AMDGPU::VCCRegBank) {
1600
8
          TargetBankID = AMDGPU::VCCRegBankID;
1601
8
          BankLHS = AMDGPU::VCCRegBankID;
1602
8
          BankRHS = AMDGPU::VCCRegBankID;
1603
12
        } else if (DstBank == &AMDGPU::SCCRegBank) {
1604
8
          TargetBankID = AMDGPU::SCCRegBankID;
1605
8
          BankLHS = AMDGPU::SGPRRegBankID;
1606
8
          BankRHS = AMDGPU::SGPRRegBankID;
1607
8
        } else {
1608
4
          BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
1609
4
                                 AMDGPU::SGPRRegBankID);
1610
4
          BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
1611
4
                                 AMDGPU::SGPRRegBankID);
1612
4
        }
1613
40
      } else {
1614
40
        BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
1615
40
                               AMDGPU::VCCRegBankID);
1616
40
        BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
1617
40
                               AMDGPU::VCCRegBankID);
1618
40
1619
40
        // Both inputs should be true booleans to produce a boolean result.
1620
40
        if (BankLHS == AMDGPU::VGPRRegBankID || 
BankRHS == AMDGPU::VGPRRegBankID36
) {
1621
6
          TargetBankID = AMDGPU::VGPRRegBankID;
1622
34
        } else if (BankLHS == AMDGPU::VCCRegBankID || 
BankRHS == AMDGPU::VCCRegBankID26
) {
1623
18
          TargetBankID = AMDGPU::VCCRegBankID;
1624
18
          BankLHS = AMDGPU::VCCRegBankID;
1625
18
          BankRHS = AMDGPU::VCCRegBankID;
1626
18
        } else 
if (16
BankLHS == AMDGPU::SGPRRegBankID16
&&
BankRHS == AMDGPU::SGPRRegBankID10
) {
1627
6
          TargetBankID = AMDGPU::SGPRRegBankID;
1628
10
        } else if (BankLHS == AMDGPU::SCCRegBankID || 
BankRHS == AMDGPU::SCCRegBankID4
) {
1629
10
          // The operation must be done on a 32-bit register, but it will set
1630
10
          // scc. The result type could interchangably be SCC or SGPR, since
1631
10
          // both values will be produced.
1632
10
          TargetBankID = AMDGPU::SCCRegBankID;
1633
10
          BankLHS = AMDGPU::SGPRRegBankID;
1634
10
          BankRHS = AMDGPU::SGPRRegBankID;
1635
10
        }
1636
40
      }
1637
60
1638
60
      OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID, Size);
1639
60
      OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS, Size);
1640
60
      OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS, Size);
1641
60
      break;
1642
60
    }
1643
174
1644
174
    if (Size == 64) {
1645
126
1646
126
      if (isSALUMapping(MI)) {
1647
24
        OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size);
1648
24
        OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
1649
102
      } else {
1650
102
        OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size);
1651
102
        unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI/*, DefaultBankID*/);
1652
102
        OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size);
1653
102
1654
102
        unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI/*, DefaultBankID*/);
1655
102
        OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size);
1656
102
      }
1657
126
1658
126
      break;
1659
126
    }
1660
48
1661
48
    LLVM_FALLTHROUGH;
1662
48
  }
1663
48
1664
320
  case AMDGPU::G_GEP:
1665
320
  case AMDGPU::G_ADD:
1666
320
  case AMDGPU::G_SUB:
1667
320
  case AMDGPU::G_MUL:
1668
320
  case AMDGPU::G_SHL:
1669
320
  case AMDGPU::G_LSHR:
1670
320
  case AMDGPU::G_ASHR:
1671
320
  case AMDGPU::G_UADDO:
1672
320
  case AMDGPU::G_SADDO:
1673
320
  case AMDGPU::G_USUBO:
1674
320
  case AMDGPU::G_SSUBO:
1675
320
  case AMDGPU::G_UADDE:
1676
320
  case AMDGPU::G_SADDE:
1677
320
  case AMDGPU::G_USUBE:
1678
320
  case AMDGPU::G_SSUBE:
1679
320
  case AMDGPU::G_UMULH:
1680
320
  case AMDGPU::G_SMULH:
1681
320
  case AMDGPU::G_SMIN:
1682
320
  case AMDGPU::G_SMAX:
1683
320
  case AMDGPU::G_UMIN:
1684
320
  case AMDGPU::G_UMAX:
1685
320
    if (isSALUMapping(MI))
1686
173
      return getDefaultMappingSOP(MI);
1687
147
    LLVM_FALLTHROUGH;
1688
147
1689
235
  case AMDGPU::G_FADD:
1690
235
  case AMDGPU::G_FSUB:
1691
235
  case AMDGPU::G_FPTOSI:
1692
235
  case AMDGPU::G_FPTOUI:
1693
235
  case AMDGPU::G_FMUL:
1694
235
  case AMDGPU::G_FMA:
1695
235
  case AMDGPU::G_FSQRT:
1696
235
  case AMDGPU::G_SITOFP:
1697
235
  case AMDGPU::G_UITOFP:
1698
235
  case AMDGPU::G_FPTRUNC:
1699
235
  case AMDGPU::G_FPEXT:
1700
235
  case AMDGPU::G_FEXP2:
1701
235
  case AMDGPU::G_FLOG2:
1702
235
  case AMDGPU::G_FMINNUM:
1703
235
  case AMDGPU::G_FMAXNUM:
1704
235
  case AMDGPU::G_FMINNUM_IEEE:
1705
235
  case AMDGPU::G_FMAXNUM_IEEE:
1706
235
  case AMDGPU::G_FCANONICALIZE:
1707
235
  case AMDGPU::G_INTRINSIC_TRUNC:
1708
235
  case AMDGPU::G_INTRINSIC_ROUND:
1709
235
    return getDefaultMappingVOP(MI);
1710
235
  case AMDGPU::G_IMPLICIT_DEF: {
1711
22
    unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1712
22
    OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1713
22
    break;
1714
235
  }
1715
298
  case AMDGPU::G_FCONSTANT:
1716
298
  case AMDGPU::G_CONSTANT:
1717
298
  case AMDGPU::G_FRAME_INDEX:
1718
298
  case AMDGPU::G_BLOCK_ADDR: {
1719
298
    unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1720
298
    OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1721
298
    break;
1722
298
  }
1723
298
  case AMDGPU::G_INSERT: {
1724
10
    unsigned BankID = isSALUMapping(MI) ? 
AMDGPU::SGPRRegBankID4
:
1725
10
                                          
AMDGPU::VGPRRegBankID6
;
1726
10
    unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1727
10
    unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
1728
10
    unsigned EltSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
1729
10
    OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
1730
10
    OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
1731
10
    OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
1732
10
    OpdsMapping[3] = nullptr;
1733
10
    break;
1734
298
  }
1735
298
  case AMDGPU::G_EXTRACT: {
1736
13
    unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
1737
13
    unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1738
13
    unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
1739
13
    OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
1740
13
    OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
1741
13
    OpdsMapping[2] = nullptr;
1742
13
    break;
1743
298
  }
1744
298
  case AMDGPU::G_MERGE_VALUES:
1745
80
  case AMDGPU::G_BUILD_VECTOR:
1746
80
  case AMDGPU::G_CONCAT_VECTORS: {
1747
80
    unsigned Bank = isSALUMapping(MI) ?
1748
62
      
AMDGPU::SGPRRegBankID18
: AMDGPU::VGPRRegBankID;
1749
80
    unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1750
80
    unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1751
80
1752
80
    OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
1753
80
    // Op1 and Dst should use the same register bank.
1754
240
    for (unsigned i = 1, e = MI.getNumOperands(); i != e; 
++i160
)
1755
160
      OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
1756
80
    break;
1757
80
  }
1758
80
  case AMDGPU::G_BITCAST:
1759
50
  case AMDGPU::G_INTTOPTR:
1760
50
  case AMDGPU::G_PTRTOINT:
1761
50
  case AMDGPU::G_CTLZ:
1762
50
  case AMDGPU::G_CTLZ_ZERO_UNDEF:
1763
50
  case AMDGPU::G_CTTZ:
1764
50
  case AMDGPU::G_CTTZ_ZERO_UNDEF:
1765
50
  case AMDGPU::G_CTPOP:
1766
50
  case AMDGPU::G_BSWAP:
1767
50
  case AMDGPU::G_FABS:
1768
50
  case AMDGPU::G_FNEG: {
1769
50
    unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1770
50
    unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
1771
50
    OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
1772
50
    break;
1773
50
  }
1774
280
  case AMDGPU::G_TRUNC: {
1775
280
    Register Dst = MI.getOperand(0).getReg();
1776
280
    Register Src = MI.getOperand(1).getReg();
1777
280
    unsigned Bank = getRegBankID(Src, MRI, *TRI);
1778
280
    unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
1779
280
    unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
1780
280
    OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
1781
280
    OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
1782
280
    break;
1783
50
  }
1784
101
  case AMDGPU::G_ZEXT:
1785
101
  case AMDGPU::G_SEXT:
1786
101
  case AMDGPU::G_ANYEXT: {
1787
101
    Register Dst = MI.getOperand(0).getReg();
1788
101
    Register Src = MI.getOperand(1).getReg();
1789
101
    unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
1790
101
    unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
1791
101
1792
101
    unsigned DstBank;
1793
101
    const RegisterBank *SrcBank = getRegBank(Src, MRI, *TRI);
1794
101
    assert(SrcBank);
1795
101
    switch (SrcBank->getID()) {
1796
101
    case AMDGPU::SCCRegBankID:
1797
55
    case AMDGPU::SGPRRegBankID:
1798
55
      DstBank = AMDGPU::SGPRRegBankID;
1799
55
      break;
1800
55
    default:
1801
46
      DstBank = AMDGPU::VGPRRegBankID;
1802
46
      break;
1803
101
    }
1804
101
1805
101
    // TODO: Should anyext be split into 32-bit part as well?
1806
101
    if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
1807
37
      OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
1808
37
      OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
1809
64
    } else {
1810
64
      // Scalar extend can use 64-bit BFE, but VGPRs require extending to
1811
64
      // 32-bits, and then to 64.
1812
64
      OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
1813
64
      OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
1814
64
                                                         SrcSize);
1815
64
    }
1816
101
    break;
1817
101
  }
1818
101
  case AMDGPU::G_FCMP: {
1819
6
    unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1820
6
    unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1821
6
    OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
1822
6
    OpdsMapping[1] = nullptr; // Predicate Operand.
1823
6
    OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
1824
6
    OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
1825
6
    break;
1826
101
  }
1827
102
  case AMDGPU::G_STORE: {
1828
102
    assert(MI.getOperand(0).isReg());
1829
102
    unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1830
102
    // FIXME: We need to specify a different reg bank once scalar stores
1831
102
    // are supported.
1832
102
    const ValueMapping *ValMapping =
1833
102
        AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
1834
102
    // FIXME: Depending on the type of store, the pointer could be in
1835
102
    // the SGPR Reg bank.
1836
102
    // FIXME: Pointer size should be based on the address space.
1837
102
    const ValueMapping *PtrMapping =
1838
102
        AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
1839
102
1840
102
    OpdsMapping[0] = ValMapping;
1841
102
    OpdsMapping[1] = PtrMapping;
1842
102
    break;
1843
101
  }
1844
101
1845
460
  case AMDGPU::G_ICMP: {
1846
460
    auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1847
460
    unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1848
460
    unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1849
460
    unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
1850
460
1851
460
    bool CanUseSCC = Op2Bank == AMDGPU::SGPRRegBankID &&
1852
460
                     
Op3Bank == AMDGPU::SGPRRegBankID294
&&
1853
460
      
(270
Size == 32270
||
(14
Size == 6414
&&
1854
14
                      
(12
Pred == CmpInst::ICMP_EQ12
||
Pred == CmpInst::ICMP_NE8
) &&
1855
14
                      
MF.getSubtarget<GCNSubtarget>().hasScalarCompareEq64()8
));
1856
460
1857
460
    unsigned Op0Bank = CanUseSCC ? 
AMDGPU::SCCRegBankID260
:
AMDGPU::VCCRegBankID200
;
1858
460
1859
460
    OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, 1);
1860
460
    OpdsMapping[1] = nullptr; // Predicate Operand.
1861
460
    OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
1862
460
    OpdsMapping[3] = AMDGPU::getValueMapping(Op3Bank, Size);
1863
460
    break;
1864
101
  }
1865
101
  case AMDGPU::G_EXTRACT_VECTOR_ELT: {
1866
10
    unsigned OutputBankID = isSALUMapping(MI) ?
1867
8
                            
AMDGPU::SGPRRegBankID2
: AMDGPU::VGPRRegBankID;
1868
10
    unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1869
10
    unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1870
10
    unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1871
10
1872
10
    OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
1873
10
    OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
1874
10
1875
10
    // The index can be either if the source vector is VGPR.
1876
10
    OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
1877
10
    break;
1878
101
  }
1879
101
  case AMDGPU::G_INSERT_VECTOR_ELT: {
1880
16
    unsigned OutputBankID = isSALUMapping(MI) ?
1881
14
      
AMDGPU::SGPRRegBankID2
: AMDGPU::VGPRRegBankID;
1882
16
1883
16
    unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1884
16
    unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1885
16
    unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
1886
16
    unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1887
16
    unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
1888
16
1889
16
    OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
1890
16
    OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
1891
16
    OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize);
1892
16
1893
16
    // The index can be either if the source vector is VGPR.
1894
16
    OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
1895
16
    break;
1896
101
  }
1897
101
  case AMDGPU::G_UNMERGE_VALUES: {
1898
2
    unsigned Bank = isSALUMapping(MI) ? 
AMDGPU::SGPRRegBankID1
:
1899
2
      
AMDGPU::VGPRRegBankID1
;
1900
2
1901
2
    // Op1 and Dst should use the same register bank.
1902
2
    // FIXME: Shouldn't this be the default? Why do we need to handle this?
1903
8
    for (unsigned i = 0, e = MI.getNumOperands(); i != e; 
++i6
) {
1904
6
      unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
1905
6
      OpdsMapping[i] = AMDGPU::getValueMapping(Bank, Size);
1906
6
    }
1907
2
    break;
1908
101
  }
1909
108
  case AMDGPU::G_INTRINSIC: {
1910
108
    switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1911
108
    default:
1912
0
      return getInvalidInstructionMapping();
1913
108
    case Intrinsic::amdgcn_div_fmas:
1914
24
    case Intrinsic::amdgcn_trig_preop:
1915
24
    case Intrinsic::amdgcn_sin:
1916
24
    case Intrinsic::amdgcn_cos:
1917
24
    case Intrinsic::amdgcn_log_clamp:
1918
24
    case Intrinsic::amdgcn_rcp:
1919
24
    case Intrinsic::amdgcn_rcp_legacy:
1920
24
    case Intrinsic::amdgcn_rsq:
1921
24
    case Intrinsic::amdgcn_rsq_legacy:
1922
24
    case Intrinsic::amdgcn_rsq_clamp:
1923
24
    case Intrinsic::amdgcn_ldexp:
1924
24
    case Intrinsic::amdgcn_frexp_mant:
1925
24
    case Intrinsic::amdgcn_frexp_exp:
1926
24
    case Intrinsic::amdgcn_fract:
1927
24
    case Intrinsic::amdgcn_cvt_pkrtz:
1928
24
    case Intrinsic::amdgcn_cvt_pknorm_i16:
1929
24
    case Intrinsic::amdgcn_cvt_pknorm_u16:
1930
24
    case Intrinsic::amdgcn_cvt_pk_i16:
1931
24
    case Intrinsic::amdgcn_cvt_pk_u16:
1932
24
    case Intrinsic::amdgcn_fmed3:
1933
24
    case Intrinsic::amdgcn_cubeid:
1934
24
    case Intrinsic::amdgcn_cubema:
1935
24
    case Intrinsic::amdgcn_cubesc:
1936
24
    case Intrinsic::amdgcn_cubetc:
1937
24
    case Intrinsic::amdgcn_sffbh:
1938
24
    case Intrinsic::amdgcn_fmad_ftz:
1939
24
    case Intrinsic::amdgcn_mbcnt_lo:
1940
24
    case Intrinsic::amdgcn_mbcnt_hi:
1941
24
    case Intrinsic::amdgcn_ubfe:
1942
24
    case Intrinsic::amdgcn_sbfe:
1943
24
    case Intrinsic::amdgcn_lerp:
1944
24
    case Intrinsic::amdgcn_sad_u8:
1945
24
    case Intrinsic::amdgcn_msad_u8:
1946
24
    case Intrinsic::amdgcn_sad_hi_u8:
1947
24
    case Intrinsic::amdgcn_sad_u16:
1948
24
    case Intrinsic::amdgcn_qsad_pk_u16_u8:
1949
24
    case Intrinsic::amdgcn_mqsad_pk_u16_u8:
1950
24
    case Intrinsic::amdgcn_mqsad_u32_u8:
1951
24
    case Intrinsic::amdgcn_cvt_pk_u8_f32:
1952
24
    case Intrinsic::amdgcn_alignbit:
1953
24
    case Intrinsic::amdgcn_alignbyte:
1954
24
    case Intrinsic::amdgcn_fdot2:
1955
24
    case Intrinsic::amdgcn_sdot2:
1956
24
    case Intrinsic::amdgcn_udot2:
1957
24
    case Intrinsic::amdgcn_sdot4:
1958
24
    case Intrinsic::amdgcn_udot4:
1959
24
    case Intrinsic::amdgcn_sdot8:
1960
24
    case Intrinsic::amdgcn_udot8:
1961
24
    case Intrinsic::amdgcn_fdiv_fast:
1962
24
    case Intrinsic::amdgcn_wwm:
1963
24
    case Intrinsic::amdgcn_wqm:
1964
24
      return getDefaultMappingVOP(MI);
1965
24
    case Intrinsic::amdgcn_ds_permute:
1966
8
    case Intrinsic::amdgcn_ds_bpermute:
1967
8
    case Intrinsic::amdgcn_update_dpp:
1968
8
      return getDefaultMappingAllVGPR(MI);
1969
8
    case Intrinsic::amdgcn_kernarg_segment_ptr:
1970
6
    case Intrinsic::amdgcn_s_getpc:
1971
6
    case Intrinsic::amdgcn_groupstaticsize: {
1972
6
      unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1973
6
      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1974
6
      break;
1975
6
    }
1976
6
    case Intrinsic::amdgcn_wqm_vote: {
1977
6
      unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1978
6
      OpdsMapping[0] = OpdsMapping[2]
1979
6
        = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size);
1980
6
      break;
1981
6
    }
1982
8
    case Intrinsic::amdgcn_s_buffer_load: {
1983
8
      // FIXME: This should be moved to G_INTRINSIC_W_SIDE_EFFECTS
1984
8
      Register RSrc = MI.getOperand(2).getReg();   // SGPR
1985
8
      Register Offset = MI.getOperand(3).getReg(); // SGPR/imm
1986
8
1987
8
      unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1988
8
      unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
1989
8
      unsigned Size3 = MRI.getType(Offset).getSizeInBits();
1990
8
1991
8
      unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
1992
8
      unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
1993
8
1994
8
      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size0);
1995
8
      OpdsMapping[1] = nullptr; // intrinsic id
1996
8
1997
8
      // Lie and claim everything is legal, even though some need to be
1998
8
      // SGPRs. applyMapping will have to deal with it as a waterfall loop.
1999
8
      OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
2000
8
      OpdsMapping[3] = AMDGPU::getValueMapping(OffsetBank, Size3);
2001
8
      OpdsMapping[4] = nullptr;
2002
8
      break;
2003
6
    }
2004
8
    case Intrinsic::amdgcn_div_scale: {
2005
8
      unsigned Dst0Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2006
8
      unsigned Dst1Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
2007
8
      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
2008
8
      OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
2009
8
2010
8
      unsigned SrcSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
2011
8
      OpdsMapping[3] = AMDGPU::getValueMapping(
2012
8
        getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI), SrcSize);
2013
8
      OpdsMapping[4] = AMDGPU::getValueMapping(
2014
8
        getRegBankID(MI.getOperand(4).getReg(), MRI, *TRI), SrcSize);
2015
8
2016
8
      break;
2017
6
    }
2018
8
    case Intrinsic::amdgcn_class: {
2019
8
      Register Src0Reg = MI.getOperand(2).getReg();
2020
8
      Register Src1Reg = MI.getOperand(3).getReg();
2021
8
      unsigned Src0Size = MRI.getType(Src0Reg).getSizeInBits();
2022
8
      unsigned Src1Size = MRI.getType(Src1Reg).getSizeInBits();
2023
8
      unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2024
8
      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
2025
8
      OpdsMapping[2] = AMDGPU::getValueMapping(getRegBankID(Src0Reg, MRI, *TRI),
2026
8
                                               Src0Size);
2027
8
      OpdsMapping[3] = AMDGPU::getValueMapping(getRegBankID(Src1Reg, MRI, *TRI),
2028
8
                                               Src1Size);
2029
8
      break;
2030
6
    }
2031
16
    case Intrinsic::amdgcn_icmp:
2032
16
    case Intrinsic::amdgcn_fcmp: {
2033
16
      unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2034
16
      // This is not VCCRegBank because this is not used in boolean contexts.
2035
16
      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
2036
16
      unsigned OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
2037
16
      unsigned Op1Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
2038
16
      unsigned Op2Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
2039
16
      OpdsMapping[2] = AMDGPU::getValueMapping(Op1Bank, OpSize);
2040
16
      OpdsMapping[3] = AMDGPU::getValueMapping(Op2Bank, OpSize);
2041
16
      break;
2042
16
    }
2043
16
    case Intrinsic::amdgcn_readlane: {
2044
8
      // This must be an SGPR, but accept a VGPR.
2045
8
      unsigned IdxReg = MI.getOperand(3).getReg();
2046
8
      unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
2047
8
      unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
2048
8
      OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
2049
8
      LLVM_FALLTHROUGH;
2050
8
    }
2051
12
    case Intrinsic::amdgcn_readfirstlane: {
2052
12
      unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2053
12
      unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
2054
12
      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
2055
12
      OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
2056
12
      break;
2057
8
    }
2058
10
    case Intrinsic::amdgcn_writelane: {
2059
10
      unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2060
10
      unsigned SrcReg = MI.getOperand(2).getReg();
2061
10
      unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
2062
10
      unsigned SrcBank = getRegBankID(SrcReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
2063
10
      unsigned IdxReg = MI.getOperand(3).getReg();
2064
10
      unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
2065
10
      unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
2066
10
      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
2067
10
2068
10
      // These 2 must be SGPRs, but accept VGPRs. Readfirstlane will be inserted
2069
10
      // to legalize.
2070
10
      OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
2071
10
      OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
2072
10
      OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
2073
10
      break;
2074
8
    }
2075
8
    case Intrinsic::amdgcn_if_break: {
2076
2
      unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
2077
2
      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2078
2
      OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
2079
2
      OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2080
2
      break;
2081
76
    }
2082
76
    }
2083
76
    break;
2084
76
  }
2085
98
  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
2086
98
    switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
2087
98
    default:
2088
0
      return getInvalidInstructionMapping();
2089
98
    case Intrinsic::amdgcn_s_getreg:
2090
8
    case Intrinsic::amdgcn_s_memtime:
2091
8
    case Intrinsic::amdgcn_s_memrealtime:
2092
8
    case Intrinsic::amdgcn_s_get_waveid_in_workgroup: {
2093
8
      unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2094
8
      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2095
8
      break;
2096
8
    }
2097
40
    case Intrinsic::amdgcn_ds_append:
2098
40
    case Intrinsic::amdgcn_ds_consume:
2099
40
    case Intrinsic::amdgcn_ds_fadd:
2100
40
    case Intrinsic::amdgcn_ds_fmin:
2101
40
    case Intrinsic::amdgcn_ds_fmax:
2102
40
    case Intrinsic::amdgcn_atomic_inc:
2103
40
    case Intrinsic::amdgcn_atomic_dec:
2104
40
      return getDefaultMappingAllVGPR(MI);
2105
40
    case Intrinsic::amdgcn_ds_ordered_add:
2106
16
    case Intrinsic::amdgcn_ds_ordered_swap: {
2107
16
      unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2108
16
      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
2109
16
      unsigned M0Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
2110
16
                                 AMDGPU::SGPRRegBankID);
2111
16
      OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
2112
16
      OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2113
16
      break;
2114
16
    }
2115
16
    case Intrinsic::amdgcn_exp_compr:
2116
4
      OpdsMapping[0] = nullptr; // IntrinsicID
2117
4
      // FIXME: These are immediate values which can't be read from registers.
2118
4
      OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2119
4
      OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2120
4
      // FIXME: Could we support packed types here?
2121
4
      OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2122
4
      OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2123
4
      // FIXME: These are immediate values which can't be read from registers.
2124
4
      OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2125
4
      OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2126
4
      break;
2127
16
    case Intrinsic::amdgcn_exp:
2128
4
      OpdsMapping[0] = nullptr; // IntrinsicID
2129
4
      // FIXME: These are immediate values which can't be read from registers.
2130
4
      OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2131
4
      OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2132
4
      // FIXME: Could we support packed types here?
2133
4
      OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2134
4
      OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2135
4
      OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2136
4
      OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2137
4
      // FIXME: These are immediate values which can't be read from registers.
2138
4
      OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2139
4
      OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2140
4
      break;
2141
16
    case Intrinsic::amdgcn_buffer_load: {
2142
16
      Register RSrc = MI.getOperand(2).getReg();   // SGPR
2143
16
      Register VIndex = MI.getOperand(3).getReg(); // VGPR
2144
16
      Register Offset = MI.getOperand(4).getReg(); // SGPR/VGPR/imm
2145
16
2146
16
      unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2147
16
      unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
2148
16
      unsigned Size3 = MRI.getType(VIndex).getSizeInBits();
2149
16
      unsigned Size4 = MRI.getType(Offset).getSizeInBits();
2150
16
2151
16
      unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
2152
16
      unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
2153
16
2154
16
      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
2155
16
      OpdsMapping[1] = nullptr; // intrinsic id
2156
16
2157
16
      // Lie and claim everything is legal, even though some need to be
2158
16
      // SGPRs. applyMapping will have to deal with it as a waterfall loop.
2159
16
      OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
2160
16
      OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size3);
2161
16
      OpdsMapping[4] = AMDGPU::getValueMapping(OffsetBank, Size4);
2162
16
      OpdsMapping[5] = nullptr;
2163
16
      OpdsMapping[6] = nullptr;
2164
16
      break;
2165
16
    }
2166
16
    case Intrinsic::amdgcn_s_sendmsg:
2167
8
    case Intrinsic::amdgcn_s_sendmsghalt: {
2168
8
      // This must be an SGPR, but accept a VGPR.
2169
8
      unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
2170
8
                                   AMDGPU::SGPRRegBankID);
2171
8
      OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
2172
8
      break;
2173
8
    }
2174
8
    case Intrinsic::amdgcn_end_cf: {
2175
2
      unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
2176
2
      OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2177
2
      break;
2178
58
    }
2179
58
    }
2180
58
    break;
2181
58
  }
2182
160
  case AMDGPU::G_SELECT: {
2183
160
    unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2184
160
    unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
2185
160
                                    AMDGPU::SGPRRegBankID);
2186
160
    unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI,
2187
160
                                    AMDGPU::SGPRRegBankID);
2188
160
    bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&
2189
160
                    
Op3Bank == AMDGPU::SGPRRegBankID114
;
2190
160
2191
160
    unsigned CondBankDefault = SGPRSrcs ?
2192
96
      
AMDGPU::SCCRegBankID64
: AMDGPU::VCCRegBankID;
2193
160
    unsigned CondBank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
2194
160
                                     CondBankDefault);
2195
160
    if (CondBank == AMDGPU::SGPRRegBankID)
2196
36
      CondBank = SGPRSrcs ? 
AMDGPU::SCCRegBankID26
:
AMDGPU::VCCRegBankID10
;
2197
124
    else if (CondBank == AMDGPU::VGPRRegBankID)
2198
40
      CondBank = AMDGPU::VCCRegBankID;
2199
160
2200
160
    unsigned Bank = SGPRSrcs && 
CondBank == AMDGPU::SCCRegBankID64
?
2201
124
      
AMDGPU::SGPRRegBankID36
: AMDGPU::VGPRRegBankID;
2202
160
2203
160
    assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SCCRegBankID);
2204
160
2205
160
    if (Size == 64) {
2206
64
      OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
2207
64
      OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
2208
64
      OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
2209
64
      OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
2210
96
    } else {
2211
96
      OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size);
2212
96
      OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
2213
96
      OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size);
2214
96
      OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size);
2215
96
    }
2216
160
2217
160
    break;
2218
58
  }
2219
58
2220
172
  case AMDGPU::G_LOAD:
2221
172
    return getInstrMappingForLoad(MI);
2222
58
2223
66
  case AMDGPU::G_ATOMICRMW_XCHG:
2224
66
  case AMDGPU::G_ATOMICRMW_ADD:
2225
66
  case AMDGPU::G_ATOMICRMW_SUB:
2226
66
  case AMDGPU::G_ATOMICRMW_AND:
2227
66
  case AMDGPU::G_ATOMICRMW_OR:
2228
66
  case AMDGPU::G_ATOMICRMW_XOR:
2229
66
  case AMDGPU::G_ATOMICRMW_MAX:
2230
66
  case AMDGPU::G_ATOMICRMW_MIN:
2231
66
  case AMDGPU::G_ATOMICRMW_UMAX:
2232
66
  case AMDGPU::G_ATOMICRMW_UMIN:
2233
66
  case AMDGPU::G_ATOMIC_CMPXCHG: {
2234
66
    return getDefaultMappingAllVGPR(MI);
2235
66
  }
2236
118
  case AMDGPU::G_BRCOND: {
2237
118
    unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI,
2238
118
                                 AMDGPU::SGPRRegBankID);
2239
118
    assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
2240
118
    if (Bank != AMDGPU::SCCRegBankID)
2241
20
      Bank = AMDGPU::VCCRegBankID;
2242
118
2243
118
    OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
2244
118
    break;
2245
2.04k
  }
2246
2.04k
  }
2247
2.04k
2248
2.04k
  return getInstructionMapping(/*ID*/1, /*Cost*/1,
2249
2.04k
                               getOperandsMapping(OpdsMapping),
2250
2.04k
                               MI.getNumOperands());
2251
2.04k
}
2252