Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
/// \file
9
/// This file implements the targeting of the InstructionSelector class for
10
/// AMDGPU.
11
/// \todo This should be generated by TableGen.
12
//===----------------------------------------------------------------------===//
13
14
#include "AMDGPUInstructionSelector.h"
15
#include "AMDGPUInstrInfo.h"
16
#include "AMDGPURegisterBankInfo.h"
17
#include "AMDGPURegisterInfo.h"
18
#include "AMDGPUSubtarget.h"
19
#include "AMDGPUTargetMachine.h"
20
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21
#include "SIMachineFunctionInfo.h"
22
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
24
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
25
#include "llvm/CodeGen/GlobalISel/Utils.h"
26
#include "llvm/CodeGen/MachineBasicBlock.h"
27
#include "llvm/CodeGen/MachineFunction.h"
28
#include "llvm/CodeGen/MachineInstr.h"
29
#include "llvm/CodeGen/MachineInstrBuilder.h"
30
#include "llvm/CodeGen/MachineRegisterInfo.h"
31
#include "llvm/IR/Type.h"
32
#include "llvm/Support/Debug.h"
33
#include "llvm/Support/raw_ostream.h"
34
35
0
#define DEBUG_TYPE "amdgpu-isel"
36
37
using namespace llvm;
38
using namespace MIPatternMatch;
39
40
#define GET_GLOBALISEL_IMPL
41
63.5k
#define AMDGPUSubtarget GCNSubtarget
42
#include "AMDGPUGenGlobalISel.inc"
43
#undef GET_GLOBALISEL_IMPL
44
#undef AMDGPUSubtarget
45
46
AMDGPUInstructionSelector::AMDGPUInstructionSelector(
47
    const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
48
    const AMDGPUTargetMachine &TM)
49
    : InstructionSelector(), TII(*STI.getInstrInfo()),
50
      TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
51
      STI(STI),
52
      EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
53
#define GET_GLOBALISEL_PREDICATES_INIT
54
#include "AMDGPUGenGlobalISel.inc"
55
#undef GET_GLOBALISEL_PREDICATES_INIT
56
#define GET_GLOBALISEL_TEMPORARIES_INIT
57
#include "AMDGPUGenGlobalISel.inc"
58
#undef GET_GLOBALISEL_TEMPORARIES_INIT
59
3.64k
{
60
3.64k
}
61
62
0
const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
63
64
309
static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
65
309
  if (TargetRegisterInfo::isPhysicalRegister(Reg))
66
0
    return Reg == AMDGPU::SCC;
67
309
68
309
  auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
69
309
  const TargetRegisterClass *RC =
70
309
      RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
71
309
  if (RC) {
72
147
    // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the
73
147
    // context of the register bank has been lost.
74
147
    if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
75
86
      return false;
76
61
    const LLT Ty = MRI.getType(Reg);
77
61
    return Ty.isValid() && Ty.getSizeInBits() == 1;
78
61
  }
79
162
80
162
  const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
81
162
  return RB->getID() == AMDGPU::SCCRegBankID;
82
162
}
83
84
bool AMDGPUInstructionSelector::isVCC(Register Reg,
85
2.86k
                                      const MachineRegisterInfo &MRI) const {
86
2.86k
  if (TargetRegisterInfo::isPhysicalRegister(Reg))
87
486
    return Reg == TRI.getVCC();
88
2.38k
89
2.38k
  auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
90
2.38k
  const TargetRegisterClass *RC =
91
2.38k
      RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
92
2.38k
  if (RC) {
93
2.15k
    const LLT Ty = MRI.getType(Reg);
94
2.15k
    return RC->hasSuperClassEq(TRI.getBoolRC()) &&
95
2.15k
           
Ty.isValid()390
&&
Ty.getSizeInBits() == 1382
;
96
2.15k
  }
97
227
98
227
  const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
99
227
  return RB->getID() == AMDGPU::VCCRegBankID;
100
227
}
101
102
2.81k
bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
103
2.81k
  const DebugLoc &DL = I.getDebugLoc();
104
2.81k
  MachineBasicBlock *BB = I.getParent();
105
2.81k
  MachineFunction *MF = BB->getParent();
106
2.81k
  MachineRegisterInfo &MRI = MF->getRegInfo();
107
2.81k
  I.setDesc(TII.get(TargetOpcode::COPY));
108
2.81k
109
2.81k
  const MachineOperand &Src = I.getOperand(1);
110
2.81k
  MachineOperand &Dst = I.getOperand(0);
111
2.81k
  Register DstReg = Dst.getReg();
112
2.81k
  Register SrcReg = Src.getReg();
113
2.81k
114
2.81k
  if (isVCC(DstReg, MRI)) {
115
52
    if (SrcReg == AMDGPU::SCC) {
116
4
      const TargetRegisterClass *RC
117
4
        = TRI.getConstrainedRegClassForOperand(Dst, MRI);
118
4
      if (!RC)
119
4
        return true;
120
0
      return RBI.constrainGenericRegister(DstReg, *RC, MRI);
121
0
    }
122
48
123
48
    if (!isVCC(SrcReg, MRI)) {
124
37
      // TODO: Should probably leave the copy and let copyPhysReg expand it.
125
37
      if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI))
126
0
        return false;
127
37
128
37
      BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
129
37
        .addImm(0)
130
37
        .addReg(SrcReg);
131
37
132
37
      if (!MRI.getRegClassOrNull(SrcReg))
133
36
        MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
134
37
      I.eraseFromParent();
135
37
      return true;
136
37
    }
137
11
138
11
    const TargetRegisterClass *RC =
139
11
      TRI.getConstrainedRegClassForOperand(Dst, MRI);
140
11
    if (RC && 
!RBI.constrainGenericRegister(DstReg, *RC, MRI)4
)
141
0
      return false;
142
11
143
11
    // Don't constrain the source register to a class so the def instruction
144
11
    // handles it (unless it's undef).
145
11
    //
146
11
    // FIXME: This is a hack. When selecting the def, we neeed to know
147
11
    // specifically know that the result is VCCRegBank, and not just an SGPR
148
11
    // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
149
11
    if (Src.isUndef()) {
150
2
      const TargetRegisterClass *SrcRC =
151
2
        TRI.getConstrainedRegClassForOperand(Src, MRI);
152
2
      if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
153
0
        return false;
154
11
    }
155
11
156
11
    return true;
157
11
  }
158
2.76k
159
5.52k
  
for (const MachineOperand &MO : I.operands())2.76k
{
160
5.52k
    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
161
2.35k
      continue;
162
3.16k
163
3.16k
    const TargetRegisterClass *RC =
164
3.16k
            TRI.getConstrainedRegClassForOperand(MO, MRI);
165
3.16k
    if (!RC)
166
2.22k
      continue;
167
942
    RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
168
942
  }
169
2.76k
  return true;
170
2.76k
}
171
172
10
bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
173
10
  MachineBasicBlock *BB = I.getParent();
174
10
  MachineFunction *MF = BB->getParent();
175
10
  MachineRegisterInfo &MRI = MF->getRegInfo();
176
10
177
10
  const Register DefReg = I.getOperand(0).getReg();
178
10
  const LLT DefTy = MRI.getType(DefReg);
179
10
180
10
  // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
181
10
182
10
  const RegClassOrRegBank &RegClassOrBank =
183
10
    MRI.getRegClassOrRegBank(DefReg);
184
10
185
10
  const TargetRegisterClass *DefRC
186
10
    = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
187
10
  if (!DefRC) {
188
2
    if (!DefTy.isValid()) {
189
0
      LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
190
0
      return false;
191
0
    }
192
2
193
2
    const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
194
2
    if (RB.getID() == AMDGPU::SCCRegBankID) {
195
1
      LLVM_DEBUG(dbgs() << "illegal scc phi\n");
196
1
      return false;
197
1
    }
198
1
199
1
    DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI);
200
1
    if (!DefRC) {
201
0
      LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
202
0
      return false;
203
0
    }
204
9
  }
205
9
206
9
  I.setDesc(TII.get(TargetOpcode::PHI));
207
9
  return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
208
9
}
209
210
MachineOperand
211
AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
212
                                           const TargetRegisterClass &SubRC,
213
460
                                           unsigned SubIdx) const {
214
460
215
460
  MachineInstr *MI = MO.getParent();
216
460
  MachineBasicBlock *BB = MO.getParent()->getParent();
217
460
  MachineFunction *MF = BB->getParent();
218
460
  MachineRegisterInfo &MRI = MF->getRegInfo();
219
460
  Register DstReg = MRI.createVirtualRegister(&SubRC);
220
460
221
460
  if (MO.isReg()) {
222
460
    unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
223
460
    unsigned Reg = MO.getReg();
224
460
    BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
225
460
            .addReg(Reg, 0, ComposedSubIdx);
226
460
227
460
    return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
228
460
                                     MO.isKill(), MO.isDead(), MO.isUndef(),
229
460
                                     MO.isEarlyClobber(), 0, MO.isDebug(),
230
460
                                     MO.isInternalRead());
231
460
  }
232
0
233
0
  assert(MO.isImm());
234
0
235
0
  APInt Imm(64, MO.getImm());
236
0
237
0
  switch (SubIdx) {
238
0
  default:
239
0
    llvm_unreachable("do not know to split immediate with this sub index.");
240
0
  case AMDGPU::sub0:
241
0
    return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
242
0
  case AMDGPU::sub1:
243
0
    return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
244
0
  }
245
0
}
246
247
16
static int64_t getConstant(const MachineInstr *MI) {
248
16
  return MI->getOperand(1).getCImm()->getSExtValue();
249
16
}
250
251
78
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
252
78
  switch (Opc) {
253
78
  case AMDGPU::G_AND:
254
26
    return Is64 ? 
AMDGPU::S_AND_B6410
:
AMDGPU::S_AND_B3216
;
255
78
  case AMDGPU::G_OR:
256
26
    return Is64 ? 
AMDGPU::S_OR_B6410
:
AMDGPU::S_OR_B3216
;
257
78
  case AMDGPU::G_XOR:
258
26
    return Is64 ? 
AMDGPU::S_XOR_B6410
:
AMDGPU::S_XOR_B3216
;
259
78
  default:
260
0
    llvm_unreachable("not a bit op");
261
78
  }
262
78
}
263
264
114
bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
265
114
  MachineBasicBlock *BB = I.getParent();
266
114
  MachineFunction *MF = BB->getParent();
267
114
  MachineRegisterInfo &MRI = MF->getRegInfo();
268
114
  MachineOperand &Dst = I.getOperand(0);
269
114
  MachineOperand &Src0 = I.getOperand(1);
270
114
  MachineOperand &Src1 = I.getOperand(2);
271
114
  Register DstReg = Dst.getReg();
272
114
  unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
273
114
274
114
  const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
275
114
  if (DstRB->getID() == AMDGPU::VCCRegBankID) {
276
24
    const TargetRegisterClass *RC = TRI.getBoolRC();
277
24
    unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(),
278
24
                                           RC == &AMDGPU::SReg_64RegClass);
279
24
    I.setDesc(TII.get(InstOpc));
280
24
281
24
    // FIXME: Hack to avoid turning the register bank into a register class.
282
24
    // The selector for G_ICMP relies on seeing the register bank for the result
283
24
    // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
284
24
    // be ambiguous whether it's a scalar or vector bool.
285
24
    if (Src0.isUndef() && 
!MRI.getRegClassOrNull(Src0.getReg())6
)
286
6
      MRI.setRegClass(Src0.getReg(), RC);
287
24
    if (Src1.isUndef() && 
!MRI.getRegClassOrNull(Src1.getReg())6
)
288
6
      MRI.setRegClass(Src1.getReg(), RC);
289
24
290
24
    return RBI.constrainGenericRegister(DstReg, *RC, MRI);
291
24
  }
292
90
293
90
  // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
294
90
  // the result?
295
90
  if (DstRB->getID() == AMDGPU::SGPRRegBankID) {
296
54
    unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32);
297
54
    I.setDesc(TII.get(InstOpc));
298
54
299
54
    const TargetRegisterClass *RC
300
54
      = TRI.getConstrainedRegClassForOperand(Dst, MRI);
301
54
    if (!RC)
302
6
      return false;
303
48
    return RBI.constrainGenericRegister(DstReg, *RC, MRI) &&
304
48
           RBI.constrainGenericRegister(Src0.getReg(), *RC, MRI) &&
305
48
           RBI.constrainGenericRegister(Src1.getReg(), *RC, MRI);
306
48
  }
307
36
308
36
  return false;
309
36
}
310
311
193
bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
312
193
  MachineBasicBlock *BB = I.getParent();
313
193
  MachineFunction *MF = BB->getParent();
314
193
  MachineRegisterInfo &MRI = MF->getRegInfo();
315
193
  Register DstReg = I.getOperand(0).getReg();
316
193
  const DebugLoc &DL = I.getDebugLoc();
317
193
  unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
318
193
  const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
319
193
  const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
320
193
  const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;
321
193
322
193
  if (Size == 32) {
323
78
    if (IsSALU) {
324
22
      const unsigned Opc = Sub ? 
AMDGPU::S_SUB_U324
:
AMDGPU::S_ADD_U3218
;
325
22
      MachineInstr *Add =
326
22
        BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
327
22
        .add(I.getOperand(1))
328
22
        .add(I.getOperand(2));
329
22
      I.eraseFromParent();
330
22
      return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
331
22
    }
332
56
333
56
    if (STI.hasAddNoCarry()) {
334
25
      const unsigned Opc = Sub ? 
AMDGPU::V_SUB_U32_e646
:
AMDGPU::V_ADD_U32_e6419
;
335
25
      I.setDesc(TII.get(Opc));
336
25
      I.addOperand(*MF, MachineOperand::CreateImm(0));
337
25
      I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
338
25
      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
339
25
    }
340
31
341
31
    const unsigned Opc = Sub ? 
AMDGPU::V_SUB_I32_e646
:
AMDGPU::V_ADD_I32_e6425
;
342
31
343
31
    Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass());
344
31
    MachineInstr *Add
345
31
      = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
346
31
      .addDef(UnusedCarry, RegState::Dead)
347
31
      .add(I.getOperand(1))
348
31
      .add(I.getOperand(2))
349
31
      .addImm(0);
350
31
    I.eraseFromParent();
351
31
    return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
352
31
  }
353
115
354
115
  assert(!Sub && "illegal sub should not reach here");
355
115
356
115
  const TargetRegisterClass &RC
357
115
    = IsSALU ? 
AMDGPU::SReg_64_XEXECRegClass17
:
AMDGPU::VReg_64RegClass98
;
358
115
  const TargetRegisterClass &HalfRC
359
115
    = IsSALU ? 
AMDGPU::SReg_32RegClass17
:
AMDGPU::VGPR_32RegClass98
;
360
115
361
115
  MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
362
115
  MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
363
115
  MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
364
115
  MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
365
115
366
115
  Register DstLo = MRI.createVirtualRegister(&HalfRC);
367
115
  Register DstHi = MRI.createVirtualRegister(&HalfRC);
368
115
369
115
  if (IsSALU) {
370
17
    BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
371
17
      .add(Lo1)
372
17
      .add(Lo2);
373
17
    BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
374
17
      .add(Hi1)
375
17
      .add(Hi2);
376
98
  } else {
377
98
    const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
378
98
    Register CarryReg = MRI.createVirtualRegister(CarryRC);
379
98
    BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
380
98
      .addDef(CarryReg)
381
98
      .add(Lo1)
382
98
      .add(Lo2)
383
98
      .addImm(0);
384
98
    MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
385
98
      .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
386
98
      .add(Hi1)
387
98
      .add(Hi2)
388
98
      .addReg(CarryReg, RegState::Kill)
389
98
      .addImm(0);
390
98
391
98
    if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI))
392
0
      return false;
393
115
  }
394
115
395
115
  BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
396
115
    .addReg(DstLo)
397
115
    .addImm(AMDGPU::sub0)
398
115
    .addReg(DstHi)
399
115
    .addImm(AMDGPU::sub1);
400
115
401
115
402
115
  if (!RBI.constrainGenericRegister(DstReg, RC, MRI))
403
0
    return false;
404
115
405
115
  I.eraseFromParent();
406
115
  return true;
407
115
}
408
409
16
bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
410
16
  MachineBasicBlock *BB = I.getParent();
411
16
  MachineFunction *MF = BB->getParent();
412
16
  MachineRegisterInfo &MRI = MF->getRegInfo();
413
16
  assert(I.getOperand(2).getImm() % 32 == 0);
414
16
  unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
415
16
  const DebugLoc &DL = I.getDebugLoc();
416
16
  MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
417
16
                               I.getOperand(0).getReg())
418
16
                               .addReg(I.getOperand(1).getReg(), 0, SubReg);
419
16
420
32
  for (const MachineOperand &MO : Copy->operands()) {
421
32
    const TargetRegisterClass *RC =
422
32
            TRI.getConstrainedRegClassForOperand(MO, MRI);
423
32
    if (!RC)
424
31
      continue;
425
1
    RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
426
1
  }
427
16
  I.eraseFromParent();
428
16
  return true;
429
16
}
430
431
62
bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
432
62
  MachineBasicBlock *BB = MI.getParent();
433
62
  MachineFunction *MF = BB->getParent();
434
62
  MachineRegisterInfo &MRI = MF->getRegInfo();
435
62
  Register DstReg = MI.getOperand(0).getReg();
436
62
  LLT DstTy = MRI.getType(DstReg);
437
62
  LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
438
62
439
62
  const unsigned SrcSize = SrcTy.getSizeInBits();
440
62
  if (SrcSize < 32)
441
0
    return false;
442
62
443
62
  const DebugLoc &DL = MI.getDebugLoc();
444
62
  const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI);
445
62
  const unsigned DstSize = DstTy.getSizeInBits();
446
62
  const TargetRegisterClass *DstRC =
447
62
    TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI);
448
62
  if (!DstRC)
449
5
    return false;
450
57
451
57
  ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
452
57
  MachineInstrBuilder MIB =
453
57
    BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
454
235
  for (int I = 0, E = MI.getNumOperands() - 1; I != E; 
++I178
) {
455
178
    MachineOperand &Src = MI.getOperand(I + 1);
456
178
    MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef()));
457
178
    MIB.addImm(SubRegs[I]);
458
178
459
178
    const TargetRegisterClass *SrcRC
460
178
      = TRI.getConstrainedRegClassForOperand(Src, MRI);
461
178
    if (SrcRC && 
!RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI)176
)
462
0
      return false;
463
178
  }
464
57
465
57
  if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI))
466
0
    return false;
467
57
468
57
  MI.eraseFromParent();
469
57
  return true;
470
57
}
471
472
11
bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
473
11
  MachineBasicBlock *BB = MI.getParent();
474
11
  MachineFunction *MF = BB->getParent();
475
11
  MachineRegisterInfo &MRI = MF->getRegInfo();
476
11
  const int NumDst = MI.getNumOperands() - 1;
477
11
478
11
  MachineOperand &Src = MI.getOperand(NumDst);
479
11
480
11
  Register SrcReg = Src.getReg();
481
11
  Register DstReg0 = MI.getOperand(0).getReg();
482
11
  LLT DstTy = MRI.getType(DstReg0);
483
11
  LLT SrcTy = MRI.getType(SrcReg);
484
11
485
11
  const unsigned DstSize = DstTy.getSizeInBits();
486
11
  const unsigned SrcSize = SrcTy.getSizeInBits();
487
11
  const DebugLoc &DL = MI.getDebugLoc();
488
11
  const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
489
11
490
11
  const TargetRegisterClass *SrcRC =
491
11
    TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI);
492
11
  if (!SrcRC || 
!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI)10
)
493
1
    return false;
494
10
495
10
  const unsigned SrcFlags = getUndefRegState(Src.isUndef());
496
10
497
10
  // Note we could have mixed SGPR and VGPR destination banks for an SGPR
498
10
  // source, and this relies on the fact that the same subregister indices are
499
10
  // used for both.
500
10
  ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
501
33
  for (int I = 0, E = NumDst; I != E; 
++I23
) {
502
23
    MachineOperand &Dst = MI.getOperand(I);
503
23
    BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
504
23
      .addReg(SrcReg, SrcFlags, SubRegs[I]);
505
23
506
23
    const TargetRegisterClass *DstRC =
507
23
      TRI.getConstrainedRegClassForOperand(Dst, MRI);
508
23
    if (DstRC && 
!RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI)21
)
509
0
      return false;
510
23
  }
511
10
512
10
  MI.eraseFromParent();
513
10
  return true;
514
10
}
515
516
165
bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
517
165
  return selectG_ADD_SUB(I);
518
165
}
519
520
38
bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
521
38
  MachineBasicBlock *BB = I.getParent();
522
38
  MachineFunction *MF = BB->getParent();
523
38
  MachineRegisterInfo &MRI = MF->getRegInfo();
524
38
  const MachineOperand &MO = I.getOperand(0);
525
38
526
38
  // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
527
38
  // regbank check here is to know why getConstrainedRegClassForOperand failed.
528
38
  const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
529
38
  if ((!RC && 
!MRI.getRegBankOrNull(MO.getReg())28
) ||
530
38
      
(10
RC10
&&
RBI.constrainGenericRegister(MO.getReg(), *RC, MRI)10
)) {
531
38
    I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
532
38
    return true;
533
38
  }
534
0
535
0
  return false;
536
0
}
537
538
16
bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
539
16
  MachineBasicBlock *BB = I.getParent();
540
16
  MachineFunction *MF = BB->getParent();
541
16
  MachineRegisterInfo &MRI = MF->getRegInfo();
542
16
  unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
543
16
  DebugLoc DL = I.getDebugLoc();
544
16
  MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
545
16
                               .addDef(I.getOperand(0).getReg())
546
16
                               .addReg(I.getOperand(1).getReg())
547
16
                               .addReg(I.getOperand(2).getReg())
548
16
                               .addImm(SubReg);
549
16
550
64
  for (const MachineOperand &MO : Ins->operands()) {
551
64
    if (!MO.isReg())
552
16
      continue;
553
48
    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
554
0
      continue;
555
48
556
48
    const TargetRegisterClass *RC =
557
48
            TRI.getConstrainedRegClassForOperand(MO, MRI);
558
48
    if (!RC)
559
31
      continue;
560
17
    RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
561
17
  }
562
16
  I.eraseFromParent();
563
16
  return true;
564
16
}
565
566
bool AMDGPUInstructionSelector::selectG_INTRINSIC(
567
11
  MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
568
11
  unsigned IntrinsicID =  I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
569
11
  switch (IntrinsicID) {
570
11
  case Intrinsic::amdgcn_if_break: {
571
2
    MachineBasicBlock *BB = I.getParent();
572
2
    MachineFunction *MF = BB->getParent();
573
2
    MachineRegisterInfo &MRI = MF->getRegInfo();
574
2
575
2
    // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
576
2
    // SelectionDAG uses for wave32 vs wave64.
577
2
    BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
578
2
      .add(I.getOperand(0))
579
2
      .add(I.getOperand(2))
580
2
      .add(I.getOperand(3));
581
2
582
2
    Register DstReg = I.getOperand(0).getReg();
583
2
    Register Src0Reg = I.getOperand(2).getReg();
584
2
    Register Src1Reg = I.getOperand(3).getReg();
585
2
586
2
    I.eraseFromParent();
587
2
588
6
    for (Register Reg : { DstReg, Src0Reg, Src1Reg }) {
589
6
      if (!MRI.getRegClassOrNull(Reg))
590
4
        MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
591
6
    }
592
2
593
2
    return true;
594
11
  }
595
11
  default:
596
9
    return selectImpl(I, CoverageInfo);
597
11
  }
598
11
}
599
600
100
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
601
100
  if (Size != 32 && 
Size != 6426
)
602
0
    return -1;
603
100
  switch (P) {
604
100
  default:
605
0
    llvm_unreachable("Unknown condition code!");
606
100
  case CmpInst::ICMP_NE:
607
26
    return Size == 32 ? 
AMDGPU::V_CMP_NE_U32_e6424
:
AMDGPU::V_CMP_NE_U64_e642
;
608
100
  case CmpInst::ICMP_EQ:
609
34
    return Size == 32 ? 
AMDGPU::V_CMP_EQ_U32_e6426
:
AMDGPU::V_CMP_EQ_U64_e648
;
610
100
  case CmpInst::ICMP_SGT:
611
5
    return Size == 32 ? 
AMDGPU::V_CMP_GT_I32_e643
:
AMDGPU::V_CMP_GT_I64_e642
;
612
100
  case CmpInst::ICMP_SGE:
613
5
    return Size == 32 ? 
AMDGPU::V_CMP_GE_I32_e643
:
AMDGPU::V_CMP_GE_I64_e642
;
614
100
  case CmpInst::ICMP_SLT:
615
5
    return Size == 32 ? 
AMDGPU::V_CMP_LT_I32_e643
:
AMDGPU::V_CMP_LT_I64_e642
;
616
100
  case CmpInst::ICMP_SLE:
617
5
    return Size == 32 ? 
AMDGPU::V_CMP_LE_I32_e643
:
AMDGPU::V_CMP_LE_I64_e642
;
618
100
  case CmpInst::ICMP_UGT:
619
5
    return Size == 32 ? 
AMDGPU::V_CMP_GT_U32_e643
:
AMDGPU::V_CMP_GT_U64_e642
;
620
100
  case CmpInst::ICMP_UGE:
621
5
    return Size == 32 ? 
AMDGPU::V_CMP_GE_U32_e643
:
AMDGPU::V_CMP_GE_U64_e642
;
622
100
  case CmpInst::ICMP_ULT:
623
5
    return Size == 32 ? 
AMDGPU::V_CMP_LT_U32_e643
:
AMDGPU::V_CMP_LT_U64_e642
;
624
100
  case CmpInst::ICMP_ULE:
625
5
    return Size == 32 ? 
AMDGPU::V_CMP_LE_U32_e643
:
AMDGPU::V_CMP_LE_U64_e642
;
626
100
  }
627
100
}
628
629
int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
630
85
                                              unsigned Size) const {
631
85
  if (Size == 64) {
632
12
    if (!STI.hasScalarCompareEq64())
633
6
      return -1;
634
6
635
6
    switch (P) {
636
6
    case CmpInst::ICMP_NE:
637
1
      return AMDGPU::S_CMP_LG_U64;
638
6
    case CmpInst::ICMP_EQ:
639
4
      return AMDGPU::S_CMP_EQ_U64;
640
6
    default:
641
1
      return -1;
642
73
    }
643
73
  }
644
73
645
73
  if (Size != 32)
646
0
    return -1;
647
73
648
73
  switch (P) {
649
73
  case CmpInst::ICMP_NE:
650
12
    return AMDGPU::S_CMP_LG_U32;
651
73
  case CmpInst::ICMP_EQ:
652
37
    return AMDGPU::S_CMP_EQ_U32;
653
73
  case CmpInst::ICMP_SGT:
654
3
    return AMDGPU::S_CMP_GT_I32;
655
73
  case CmpInst::ICMP_SGE:
656
3
    return AMDGPU::S_CMP_GE_I32;
657
73
  case CmpInst::ICMP_SLT:
658
3
    return AMDGPU::S_CMP_LT_I32;
659
73
  case CmpInst::ICMP_SLE:
660
3
    return AMDGPU::S_CMP_LE_I32;
661
73
  case CmpInst::ICMP_UGT:
662
3
    return AMDGPU::S_CMP_GT_U32;
663
73
  case CmpInst::ICMP_UGE:
664
3
    return AMDGPU::S_CMP_GE_U32;
665
73
  case CmpInst::ICMP_ULT:
666
3
    return AMDGPU::S_CMP_LT_U32;
667
73
  case CmpInst::ICMP_ULE:
668
3
    return AMDGPU::S_CMP_LE_U32;
669
73
  default:
670
0
    llvm_unreachable("Unknown condition code!");
671
73
  }
672
73
}
673
674
185
bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
675
185
  MachineBasicBlock *BB = I.getParent();
676
185
  MachineFunction *MF = BB->getParent();
677
185
  MachineRegisterInfo &MRI = MF->getRegInfo();
678
185
  const DebugLoc &DL = I.getDebugLoc();
679
185
680
185
  unsigned SrcReg = I.getOperand(2).getReg();
681
185
  unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
682
185
683
185
  auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
684
185
685
185
  unsigned CCReg = I.getOperand(0).getReg();
686
185
  if (isSCC(CCReg, MRI)) {
687
85
    int Opcode = getS_CMPOpcode(Pred, Size);
688
85
    if (Opcode == -1)
689
7
      return false;
690
78
    MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
691
78
            .add(I.getOperand(2))
692
78
            .add(I.getOperand(3));
693
78
    BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
694
78
      .addReg(AMDGPU::SCC);
695
78
    bool Ret =
696
78
        constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
697
78
        RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
698
78
    I.eraseFromParent();
699
78
    return Ret;
700
78
  }
701
100
702
100
  int Opcode = getV_CMPOpcode(Pred, Size);
703
100
  if (Opcode == -1)
704
0
    return false;
705
100
706
100
  MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
707
100
            I.getOperand(0).getReg())
708
100
            .add(I.getOperand(2))
709
100
            .add(I.getOperand(3));
710
100
  RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
711
100
                               *TRI.getBoolRC(), MRI);
712
100
  bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
713
100
  I.eraseFromParent();
714
100
  return Ret;
715
100
}
716
717
static MachineInstr *
718
buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
719
         unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
720
4
         unsigned VM, bool Compr, unsigned Enabled, bool Done) {
721
4
  const DebugLoc &DL = Insert->getDebugLoc();
722
4
  MachineBasicBlock &BB = *Insert->getParent();
723
4
  unsigned Opcode = Done ? 
AMDGPU::EXP_DONE2
:
AMDGPU::EXP2
;
724
4
  return BuildMI(BB, Insert, DL, TII.get(Opcode))
725
4
          .addImm(Tgt)
726
4
          .addReg(Reg0)
727
4
          .addReg(Reg1)
728
4
          .addReg(Reg2)
729
4
          .addReg(Reg3)
730
4
          .addImm(VM)
731
4
          .addImm(Compr)
732
4
          .addImm(Enabled);
733
4
}
734
735
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
736
7
  MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
737
7
  MachineBasicBlock *BB = I.getParent();
738
7
  MachineFunction *MF = BB->getParent();
739
7
  MachineRegisterInfo &MRI = MF->getRegInfo();
740
7
741
7
  unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
742
7
  switch (IntrinsicID) {
743
7
  case Intrinsic::amdgcn_exp: {
744
2
    int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
745
2
    int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
746
2
    int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
747
2
    int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
748
2
749
2
    MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
750
2
                                 I.getOperand(4).getReg(),
751
2
                                 I.getOperand(5).getReg(),
752
2
                                 I.getOperand(6).getReg(),
753
2
                                 VM, false, Enabled, Done);
754
2
755
2
    I.eraseFromParent();
756
2
    return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
757
7
  }
758
7
  case Intrinsic::amdgcn_exp_compr: {
759
2
    const DebugLoc &DL = I.getDebugLoc();
760
2
    int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
761
2
    int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
762
2
    unsigned Reg0 = I.getOperand(3).getReg();
763
2
    unsigned Reg1 = I.getOperand(4).getReg();
764
2
    unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
765
2
    int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
766
2
    int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
767
2
768
2
    BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
769
2
    MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
770
2
                                 true,  Enabled, Done);
771
2
772
2
    I.eraseFromParent();
773
2
    return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
774
7
  }
775
7
  case Intrinsic::amdgcn_end_cf: {
776
2
    // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
777
2
    // SelectionDAG uses for wave32 vs wave64.
778
2
    BuildMI(*BB, &I, I.getDebugLoc(),
779
2
            TII.get(AMDGPU::SI_END_CF))
780
2
      .add(I.getOperand(1));
781
2
782
2
    Register Reg = I.getOperand(1).getReg();
783
2
    I.eraseFromParent();
784
2
785
2
    if (!MRI.getRegClassOrNull(Reg))
786
2
      MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
787
2
    return true;
788
7
  }
789
7
  default:
790
1
    return selectImpl(I, CoverageInfo);
791
7
  }
792
7
}
793
794
98
bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
795
98
  MachineBasicBlock *BB = I.getParent();
796
98
  MachineFunction *MF = BB->getParent();
797
98
  MachineRegisterInfo &MRI = MF->getRegInfo();
798
98
  const DebugLoc &DL = I.getDebugLoc();
799
98
800
98
  unsigned DstReg = I.getOperand(0).getReg();
801
98
  unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
802
98
  assert(Size <= 32 || Size == 64);
803
98
  const MachineOperand &CCOp = I.getOperand(1);
804
98
  unsigned CCReg = CCOp.getReg();
805
98
  if (isSCC(CCReg, MRI)) {
806
40
    unsigned SelectOpcode = Size == 64 ? 
AMDGPU::S_CSELECT_B642
:
807
40
                                         
AMDGPU::S_CSELECT_B3238
;
808
40
    MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
809
40
            .addReg(CCReg);
810
40
811
40
    // The generic constrainSelectedInstRegOperands doesn't work for the scc register
812
40
    // bank, because it does not cover the register class that we used to represent
813
40
    // for it.  So we need to manually set the register class here.
814
40
    if (!MRI.getRegClassOrNull(CCReg))
815
40
        MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
816
40
    MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
817
40
            .add(I.getOperand(2))
818
40
            .add(I.getOperand(3));
819
40
820
40
    bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
821
40
               constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
822
40
    I.eraseFromParent();
823
40
    return Ret;
824
40
  }
825
58
826
58
  // Wide VGPR select should have been split in RegBankSelect.
827
58
  if (Size > 32)
828
0
    return false;
829
58
830
58
  MachineInstr *Select =
831
58
      BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
832
58
              .addImm(0)
833
58
              .add(I.getOperand(3))
834
58
              .addImm(0)
835
58
              .add(I.getOperand(2))
836
58
              .add(I.getOperand(1));
837
58
838
58
  bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
839
58
  I.eraseFromParent();
840
58
  return Ret;
841
58
}
842
843
147
bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
844
147
  MachineBasicBlock *BB = I.getParent();
845
147
  MachineFunction *MF = BB->getParent();
846
147
  MachineRegisterInfo &MRI = MF->getRegInfo();
847
147
  DebugLoc DL = I.getDebugLoc();
848
147
  unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI);
849
147
  if (PtrSize != 64) {
850
6
    LLVM_DEBUG(dbgs() << "Unhandled address space\n");
851
6
    return false;
852
6
  }
853
141
854
141
  unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
855
141
  unsigned Opcode;
856
141
857
141
  // FIXME: Remove this when integers > s32 naturally selected.
858
141
  switch (StoreSize) {
859
141
  default:
860
0
    return false;
861
141
  case 32:
862
51
    Opcode = AMDGPU::FLAT_STORE_DWORD;
863
51
    break;
864
141
  case 64:
865
42
    Opcode = AMDGPU::FLAT_STORE_DWORDX2;
866
42
    break;
867
141
  case 96:
868
16
    Opcode = AMDGPU::FLAT_STORE_DWORDX3;
869
16
    break;
870
141
  case 128:
871
32
    Opcode = AMDGPU::FLAT_STORE_DWORDX4;
872
32
    break;
873
141
  }
874
141
875
141
  MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
876
141
          .add(I.getOperand(1))
877
141
          .add(I.getOperand(0))
878
141
          .addImm(0)  // offset
879
141
          .addImm(0)  // glc
880
141
          .addImm(0)  // slc
881
141
          .addImm(0); // dlc
882
141
883
141
884
141
  // Now that we selected an opcode, we need to constrain the register
885
141
  // operands to use appropriate classes.
886
141
  bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
887
141
888
141
  I.eraseFromParent();
889
141
  return Ret;
890
141
}
891
892
18
static int sizeToSubRegIndex(unsigned Size) {
893
18
  switch (Size) {
894
18
  case 32:
895
2
    return AMDGPU::sub0;
896
18
  case 64:
897
2
    return AMDGPU::sub0_sub1;
898
18
  case 96:
899
2
    return AMDGPU::sub0_sub1_sub2;
900
18
  case 128:
901
2
    return AMDGPU::sub0_sub1_sub2_sub3;
902
18
  case 256:
903
2
    return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
904
18
  default:
905
8
    if (Size < 32)
906
8
      return AMDGPU::sub0;
907
0
    if (Size > 256)
908
0
      return -1;
909
0
    return sizeToSubRegIndex(PowerOf2Ceil(Size));
910
18
  }
911
18
}
912
913
126
bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
914
126
  MachineBasicBlock *BB = I.getParent();
915
126
  MachineFunction *MF = BB->getParent();
916
126
  MachineRegisterInfo &MRI = MF->getRegInfo();
917
126
918
126
  unsigned DstReg = I.getOperand(0).getReg();
919
126
  unsigned SrcReg = I.getOperand(1).getReg();
920
126
  const LLT DstTy = MRI.getType(DstReg);
921
126
  const LLT SrcTy = MRI.getType(SrcReg);
922
126
  if (!DstTy.isScalar())
923
0
    return false;
924
126
925
126
  const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
926
126
  const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
927
126
  if (SrcRB != DstRB)
928
0
    return false;
929
126
930
126
  unsigned DstSize = DstTy.getSizeInBits();
931
126
  unsigned SrcSize = SrcTy.getSizeInBits();
932
126
933
126
  const TargetRegisterClass *SrcRC
934
126
    = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
935
126
  const TargetRegisterClass *DstRC
936
126
    = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
937
126
938
126
  if (SrcSize > 32) {
939
18
    int SubRegIdx = sizeToSubRegIndex(DstSize);
940
18
    if (SubRegIdx == -1)
941
0
      return false;
942
18
943
18
    // Deal with weird cases where the class only partially supports the subreg
944
18
    // index.
945
18
    SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
946
18
    if (!SrcRC)
947
0
      return false;
948
18
949
18
    I.getOperand(1).setSubReg(SubRegIdx);
950
18
  }
951
126
952
126
  if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
953
126
      !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
954
0
    LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
955
0
    return false;
956
0
  }
957
126
958
126
  I.setDesc(TII.get(TargetOpcode::COPY));
959
126
  return true;
960
126
}
961
962
/// \returns true if a bitmask for \p Size bits will be an inline immediate.
963
6
static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
964
6
  Mask = maskTrailingOnes<unsigned>(Size);
965
6
  int SignedMask = static_cast<int>(Mask);
966
6
  return SignedMask >= -16 && SignedMask <= 64;
967
6
}
968
969
83
bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
970
83
  bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
971
83
  const DebugLoc &DL = I.getDebugLoc();
972
83
  MachineBasicBlock &MBB = *I.getParent();
973
83
  MachineFunction &MF = *MBB.getParent();
974
83
  MachineRegisterInfo &MRI = MF.getRegInfo();
975
83
  const unsigned DstReg = I.getOperand(0).getReg();
976
83
  const unsigned SrcReg = I.getOperand(1).getReg();
977
83
978
83
  const LLT DstTy = MRI.getType(DstReg);
979
83
  const LLT SrcTy = MRI.getType(SrcReg);
980
83
  const LLT S1 = LLT::scalar(1);
981
83
  const unsigned SrcSize = SrcTy.getSizeInBits();
982
83
  const unsigned DstSize = DstTy.getSizeInBits();
983
83
  if (!DstTy.isScalar())
984
0
    return false;
985
83
986
83
  const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
987
83
988
83
  if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
989
24
    if (SrcTy != S1 || DstSize > 64) // Invalid
990
0
      return false;
991
24
992
24
    unsigned Opcode =
993
24
        DstSize > 32 ? 
AMDGPU::S_CSELECT_B643
:
AMDGPU::S_CSELECT_B3221
;
994
24
    const TargetRegisterClass *DstRC =
995
24
        DstSize > 32 ? 
&AMDGPU::SReg_64RegClass3
:
&AMDGPU::SReg_32RegClass21
;
996
24
997
24
    // FIXME: Create an extra copy to avoid incorrectly constraining the result
998
24
    // of the scc producer.
999
24
    unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1000
24
    BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
1001
24
      .addReg(SrcReg);
1002
24
    BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
1003
24
      .addReg(TmpReg);
1004
24
1005
24
    // The instruction operands are backwards from what you would expect.
1006
24
    BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
1007
24
      .addImm(0)
1008
24
      .addImm(Signed ? 
-18
:
116
);
1009
24
    return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
1010
24
  }
1011
59
1012
59
  if (SrcBank->getID() == AMDGPU::VCCRegBankID && 
DstSize <= 3235
) {
1013
35
    if (SrcTy != S1) // Invalid
1014
0
      return false;
1015
35
1016
35
    MachineInstr *ExtI =
1017
35
      BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
1018
35
      .addImm(0)               // src0_modifiers
1019
35
      .addImm(0)               // src0
1020
35
      .addImm(0)               // src1_modifiers
1021
35
      .addImm(Signed ? 
-17
:
128
) // src1
1022
35
      .addUse(SrcReg);
1023
35
    return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1024
35
  }
1025
24
1026
24
  if (I.getOpcode() == AMDGPU::G_ANYEXT)
1027
8
    return selectCOPY(I);
1028
16
1029
16
  if (SrcBank->getID() == AMDGPU::VGPRRegBankID && 
DstSize <= 326
) {
1030
6
    // 64-bit should have been split up in RegBankSelect
1031
6
1032
6
    // Try to use an and with a mask if it will save code size.
1033
6
    unsigned Mask;
1034
6
    if (!Signed && 
shouldUseAndMask(SrcSize, Mask)3
) {
1035
1
      MachineInstr *ExtI =
1036
1
      BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
1037
1
        .addImm(Mask)
1038
1
        .addReg(SrcReg);
1039
1
      return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1040
1
    }
1041
5
1042
5
    const unsigned BFE = Signed ? 
AMDGPU::V_BFE_I323
:
AMDGPU::V_BFE_U322
;
1043
5
    MachineInstr *ExtI =
1044
5
      BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
1045
5
      .addReg(SrcReg)
1046
5
      .addImm(0) // Offset
1047
5
      .addImm(SrcSize); // Width
1048
5
    return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1049
5
  }
1050
10
1051
10
  if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
1052
10
    if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
1053
0
      return false;
1054
10
1055
10
    if (Signed && 
DstSize == 325
&&
(3
SrcSize == 83
||
SrcSize == 162
)) {
1056
2
      const unsigned SextOpc = SrcSize == 8 ?
1057
1
        AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
1058
2
      BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
1059
2
        .addReg(SrcReg);
1060
2
      return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
1061
2
    }
1062
8
1063
8
    const unsigned BFE64 = Signed ? 
AMDGPU::S_BFE_I643
:
AMDGPU::S_BFE_U645
;
1064
8
    const unsigned BFE32 = Signed ? 
AMDGPU::S_BFE_I323
:
AMDGPU::S_BFE_U325
;
1065
8
1066
8
    // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
1067
8
    if (DstSize > 32 && 
SrcSize <= 324
) {
1068
4
      // We need a 64-bit register source, but the high bits don't matter.
1069
4
      unsigned ExtReg
1070
4
        = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
1071
4
      unsigned UndefReg
1072
4
        = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1073
4
      BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
1074
4
      BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
1075
4
        .addReg(SrcReg)
1076
4
        .addImm(AMDGPU::sub0)
1077
4
        .addReg(UndefReg)
1078
4
        .addImm(AMDGPU::sub1);
1079
4
1080
4
      BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
1081
4
        .addReg(ExtReg)
1082
4
        .addImm(SrcSize << 16);
1083
4
1084
4
      return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
1085
4
    }
1086
4
1087
4
    unsigned Mask;
1088
4
    if (!Signed && 
shouldUseAndMask(SrcSize, Mask)3
) {
1089
1
      BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
1090
1
        .addReg(SrcReg)
1091
1
        .addImm(Mask);
1092
3
    } else {
1093
3
      BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
1094
3
        .addReg(SrcReg)
1095
3
        .addImm(SrcSize << 16);
1096
3
    }
1097
4
1098
4
    return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
1099
4
  }
1100
0
1101
0
  return false;
1102
0
}
1103
1104
179
bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
1105
179
  MachineBasicBlock *BB = I.getParent();
1106
179
  MachineFunction *MF = BB->getParent();
1107
179
  MachineRegisterInfo &MRI = MF->getRegInfo();
1108
179
  MachineOperand &ImmOp = I.getOperand(1);
1109
179
1110
179
  // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
1111
179
  if (ImmOp.isFPImm()) {
1112
4
    const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
1113
4
    ImmOp.ChangeToImmediate(Imm.getZExtValue());
1114
175
  } else if (ImmOp.isCImm()) {
1115
175
    ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
1116
175
  }
1117
179
1118
179
  unsigned DstReg = I.getOperand(0).getReg();
1119
179
  unsigned Size;
1120
179
  bool IsSgpr;
1121
179
  const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
1122
179
  if (RB) {
1123
107
    IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
1124
107
    Size = MRI.getType(DstReg).getSizeInBits();
1125
107
  } else {
1126
72
    const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
1127
72
    IsSgpr = TRI.isSGPRClass(RC);
1128
72
    Size = TRI.getRegSizeInBits(*RC);
1129
72
  }
1130
179
1131
179
  if (Size != 32 && 
Size != 64110
)
1132
0
    return false;
1133
179
1134
179
  unsigned Opcode = IsSgpr ? 
AMDGPU::S_MOV_B3247
:
AMDGPU::V_MOV_B32_e32132
;
1135
179
  if (Size == 32) {
1136
69
    I.setDesc(TII.get(Opcode));
1137
69
    I.addImplicitDefUseOperands(*MF);
1138
69
    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1139
69
  }
1140
110
1141
110
  DebugLoc DL = I.getDebugLoc();
1142
110
  const TargetRegisterClass *RC = IsSgpr ? 
&AMDGPU::SReg_32_XM0RegClass20
:
1143
110
                                           
&AMDGPU::VGPR_32RegClass90
;
1144
110
  unsigned LoReg = MRI.createVirtualRegister(RC);
1145
110
  unsigned HiReg = MRI.createVirtualRegister(RC);
1146
110
  const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
1147
110
1148
110
  BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
1149
110
          .addImm(Imm.trunc(32).getZExtValue());
1150
110
1151
110
  BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
1152
110
          .addImm(Imm.ashr(32).getZExtValue());
1153
110
1154
110
  const MachineInstr *RS =
1155
110
      BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1156
110
              .addReg(LoReg)
1157
110
              .addImm(AMDGPU::sub0)
1158
110
              .addReg(HiReg)
1159
110
              .addImm(AMDGPU::sub1);
1160
110
1161
110
  // We can't call constrainSelectedInstRegOperands here, because it doesn't
1162
110
  // work for target independent opcodes
1163
110
  I.eraseFromParent();
1164
110
  const TargetRegisterClass *DstRC =
1165
110
      TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
1166
110
  if (!DstRC)
1167
6
    return true;
1168
104
  return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
1169
104
}
1170
1171
546
static bool isConstant(const MachineInstr &MI) {
1172
546
  return MI.getOpcode() == TargetOpcode::G_CONSTANT;
1173
546
}
1174
1175
void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
1176
972
    const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
1177
972
1178
972
  const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
1179
972
1180
972
  assert(PtrMI);
1181
972
1182
972
  if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
1183
699
    return;
1184
273
1185
273
  GEPInfo GEPInfo(*PtrMI);
1186
273
1187
819
  for (unsigned i = 1, e = 3; i < e; 
++i546
) {
1188
546
    const MachineOperand &GEPOp = PtrMI->getOperand(i);
1189
546
    const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
1190
546
    assert(OpDef);
1191
546
    if (isConstant(*OpDef)) {
1192
273
      // FIXME: Is it possible to have multiple Imm parts?  Maybe if we
1193
273
      // are lacking other optimizations.
1194
273
      assert(GEPInfo.Imm == 0);
1195
273
      GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
1196
273
      continue;
1197
273
    }
1198
273
    const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
1199
273
    if (OpBank->getID() == AMDGPU::SGPRRegBankID)
1200
273
      GEPInfo.SgprParts.push_back(GEPOp.getReg());
1201
0
    else
1202
0
      GEPInfo.VgprParts.push_back(GEPOp.getReg());
1203
273
  }
1204
273
1205
273
  AddrInfo.push_back(GEPInfo);
1206
273
  getAddrModeInfo(*PtrMI, MRI, AddrInfo);
1207
273
}
1208
1209
396
bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
1210
396
  if (!MI.hasOneMemOperand())
1211
0
    return false;
1212
396
1213
396
  const MachineMemOperand *MMO = *MI.memoperands_begin();
1214
396
  const Value *Ptr = MMO->getValue();
1215
396
1216
396
  // UndefValue means this is a load of a kernel input.  These are uniform.
1217
396
  // Sometimes LDS instructions have constant pointers.
1218
396
  // If Ptr is null, then that means this mem operand contains a
1219
396
  // PseudoSourceValue like GOT.
1220
396
  if (!Ptr || 
isa<UndefValue>(Ptr)214
||
isa<Argument>(Ptr)214
||
1221
396
      
isa<Constant>(Ptr)151
||
isa<GlobalValue>(Ptr)151
)
1222
245
    return true;
1223
151
1224
151
  if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
1225
0
    return true;
1226
151
1227
151
  const Instruction *I = dyn_cast<Instruction>(Ptr);
1228
151
  return I && I->getMetadata("amdgpu.uniform");
1229
151
}
1230
1231
396
bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
1232
396
  for (const GEPInfo &GEPInfo : AddrInfo) {
1233
141
    if (!GEPInfo.VgprParts.empty())
1234
0
      return true;
1235
141
  }
1236
396
  return false;
1237
396
}
1238
1239
0
bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
1240
0
  // TODO: Can/should we insert m0 initialization here for DS instructions and
1241
0
  // call the normal selector?
1242
0
  return false;
1243
0
}
1244
1245
26
bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
1246
26
  MachineBasicBlock *BB = I.getParent();
1247
26
  MachineFunction *MF = BB->getParent();
1248
26
  MachineRegisterInfo &MRI = MF->getRegInfo();
1249
26
  MachineOperand &CondOp = I.getOperand(0);
1250
26
  Register CondReg = CondOp.getReg();
1251
26
  const DebugLoc &DL = I.getDebugLoc();
1252
26
1253
26
  unsigned BrOpcode;
1254
26
  Register CondPhysReg;
1255
26
  const TargetRegisterClass *ConstrainRC;
1256
26
1257
26
  // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1258
26
  // whether the branch is uniform when selecting the instruction. In
1259
26
  // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1260
26
  // RegBankSelect knows what it's doing if the branch condition is scc, even
1261
26
  // though it currently does not.
1262
26
  if (isSCC(CondReg, MRI)) {
1263
21
    CondPhysReg = AMDGPU::SCC;
1264
21
    BrOpcode = AMDGPU::S_CBRANCH_SCC1;
1265
21
    ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
1266
21
  } else 
if (5
isVCC(CondReg, MRI)5
) {
1267
3
    // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1268
3
    // We sort of know that a VCC producer based on the register bank, that ands
1269
3
    // inactive lanes with 0. What if there was a logical operation with vcc
1270
3
    // producers in different blocks/with different exec masks?
1271
3
    // FIXME: Should scc->vcc copies and with exec?
1272
3
    CondPhysReg = TRI.getVCC();
1273
3
    BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
1274
3
    ConstrainRC = TRI.getBoolRC();
1275
3
  } else
1276
2
    return false;
1277
24
1278
24
  if (!MRI.getRegClassOrNull(CondReg))
1279
24
    MRI.setRegClass(CondReg, ConstrainRC);
1280
24
1281
24
  BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
1282
24
    .addReg(CondReg);
1283
24
  BuildMI(*BB, &I, DL, TII.get(BrOpcode))
1284
24
    .addMBB(I.getOperand(1).getMBB());
1285
24
1286
24
  I.eraseFromParent();
1287
24
  return true;
1288
24
}
1289
1290
6
bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
1291
6
  MachineBasicBlock *BB = I.getParent();
1292
6
  MachineFunction *MF = BB->getParent();
1293
6
  MachineRegisterInfo &MRI = MF->getRegInfo();
1294
6
1295
6
  Register DstReg = I.getOperand(0).getReg();
1296
6
  const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
1297
6
  const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1298
6
  I.setDesc(TII.get(IsVGPR ? 
AMDGPU::V_MOV_B32_e325
:
AMDGPU::S_MOV_B321
));
1299
6
  if (IsVGPR)
1300
5
    I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
1301
6
1302
6
  return RBI.constrainGenericRegister(
1303
6
    DstReg, IsVGPR ? 
AMDGPU::VGPR_32RegClass5
:
AMDGPU::SReg_32RegClass1
, MRI);
1304
6
}
1305
1306
bool AMDGPUInstructionSelector::select(MachineInstr &I,
1307
6.33k
                                       CodeGenCoverage &CoverageInfo) const {
1308
6.33k
  if (I.isPHI())
1309
10
    return selectPHI(I);
1310
6.32k
1311
6.32k
  if (!isPreISelGenericOpcode(I.getOpcode())) {
1312
3.64k
    if (I.isCopy())
1313
2.78k
      return selectCOPY(I);
1314
854
    return true;
1315
854
  }
1316
2.67k
1317
2.67k
  switch (I.getOpcode()) {
1318
2.67k
  case TargetOpcode::G_AND:
1319
114
  case TargetOpcode::G_OR:
1320
114
  case TargetOpcode::G_XOR:
1321
114
    if (selectG_AND_OR_XOR(I))
1322
72
      return true;
1323
42
    return selectImpl(I, CoverageInfo);
1324
42
  case TargetOpcode::G_ADD:
1325
28
  case TargetOpcode::G_SUB:
1326
28
    if (selectG_ADD_SUB(I))
1327
28
      return true;
1328
0
    LLVM_FALLTHROUGH;
1329
354
  default:
1330
354
    return selectImpl(I, CoverageInfo);
1331
17
  case TargetOpcode::G_INTTOPTR:
1332
17
  case TargetOpcode::G_BITCAST:
1333
17
    return selectCOPY(I);
1334
179
  case TargetOpcode::G_CONSTANT:
1335
179
  case TargetOpcode::G_FCONSTANT:
1336
179
    return selectG_CONSTANT(I);
1337
179
  case TargetOpcode::G_EXTRACT:
1338
16
    return selectG_EXTRACT(I);
1339
179
  case TargetOpcode::G_MERGE_VALUES:
1340
62
  case TargetOpcode::G_BUILD_VECTOR:
1341
62
  case TargetOpcode::G_CONCAT_VECTORS:
1342
62
    return selectG_MERGE_VALUES(I);
1343
62
  case TargetOpcode::G_UNMERGE_VALUES:
1344
11
    return selectG_UNMERGE_VALUES(I);
1345
165
  case TargetOpcode::G_GEP:
1346
165
    return selectG_GEP(I);
1347
62
  case TargetOpcode::G_IMPLICIT_DEF:
1348
38
    return selectG_IMPLICIT_DEF(I);
1349
62
  case TargetOpcode::G_INSERT:
1350
16
    return selectG_INSERT(I);
1351
62
  case TargetOpcode::G_INTRINSIC:
1352
11
    return selectG_INTRINSIC(I, CoverageInfo);
1353
62
  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1354
7
    return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
1355
185
  case TargetOpcode::G_ICMP:
1356
185
    if (selectG_ICMP(I))
1357
178
      return true;
1358
7
    return selectImpl(I, CoverageInfo);
1359
508
  case TargetOpcode::G_LOAD:
1360
508
    return selectImpl(I, CoverageInfo);
1361
98
  case TargetOpcode::G_SELECT:
1362
98
    return selectG_SELECT(I);
1363
428
  case TargetOpcode::G_STORE:
1364
428
    if (selectImpl(I, CoverageInfo))
1365
281
      return true;
1366
147
    return selectG_STORE(I);
1367
147
  case TargetOpcode::G_TRUNC:
1368
126
    return selectG_TRUNC(I);
1369
147
  case TargetOpcode::G_SEXT:
1370
83
  case TargetOpcode::G_ZEXT:
1371
83
  case TargetOpcode::G_ANYEXT:
1372
83
    if (selectG_SZA_EXT(I)) {
1373
83
      I.eraseFromParent();
1374
83
      return true;
1375
83
    }
1376
0
1377
0
    return false;
1378
26
  case TargetOpcode::G_BRCOND:
1379
26
    return selectG_BRCOND(I);
1380
6
  case TargetOpcode::G_FRAME_INDEX:
1381
6
    return selectG_FRAME_INDEX(I);
1382
200
  case TargetOpcode::G_FENCE:
1383
200
    // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
1384
200
    // is checking for G_CONSTANT
1385
200
    I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE));
1386
200
    return true;
1387
0
  }
1388
0
  return false;
1389
0
}
1390
1391
InstructionSelector::ComplexRendererFns
1392
0
AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
1393
0
  return {{
1394
0
      [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1395
0
  }};
1396
0
1397
0
}
1398
1399
std::pair<Register, unsigned>
1400
AMDGPUInstructionSelector::selectVOP3ModsImpl(
1401
198
  Register Src, const MachineRegisterInfo &MRI) const {
1402
198
  unsigned Mods = 0;
1403
198
  MachineInstr *MI = MRI.getVRegDef(Src);
1404
198
1405
198
  if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
1406
19
    Src = MI->getOperand(1).getReg();
1407
19
    Mods |= SISrcMods::NEG;
1408
19
    MI = MRI.getVRegDef(Src);
1409
19
  }
1410
198
1411
198
  if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
1412
13
    Src = MI->getOperand(1).getReg();
1413
13
    Mods |= SISrcMods::ABS;
1414
13
  }
1415
198
1416
198
  return std::make_pair(Src, Mods);
1417
198
}
1418
1419
///
1420
/// This will select either an SGPR or VGPR operand and will save us from
1421
/// having to write an extra tablegen pattern.
1422
InstructionSelector::ComplexRendererFns
1423
2
AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
1424
2
  return {{
1425
2
      [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1426
2
  }};
1427
2
}
1428
1429
InstructionSelector::ComplexRendererFns
1430
94
AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
1431
94
  MachineRegisterInfo &MRI
1432
94
    = Root.getParent()->getParent()->getParent()->getRegInfo();
1433
94
1434
94
  Register Src;
1435
94
  unsigned Mods;
1436
94
  std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1437
94
1438
94
  return {{
1439
94
      [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1440
94
      [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1441
94
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },    // clamp
1442
94
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }     // omod
1443
94
  }};
1444
94
}
1445
InstructionSelector::ComplexRendererFns
1446
2
AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1447
2
  return {{
1448
2
      [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1449
2
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1450
2
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // omod
1451
2
  }};
1452
2
}
1453
1454
InstructionSelector::ComplexRendererFns
1455
104
AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
1456
104
  MachineRegisterInfo &MRI
1457
104
    = Root.getParent()->getParent()->getParent()->getRegInfo();
1458
104
1459
104
  Register Src;
1460
104
  unsigned Mods;
1461
104
  std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1462
104
1463
104
  return {{
1464
110
      [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1465
110
      [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }  // src_mods
1466
104
  }};
1467
104
}
1468
1469
InstructionSelector::ComplexRendererFns
1470
174
AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1471
174
  MachineRegisterInfo &MRI =
1472
174
      Root.getParent()->getParent()->getParent()->getRegInfo();
1473
174
1474
174
  SmallVector<GEPInfo, 4> AddrInfo;
1475
174
  getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1476
174
1477
174
  if (AddrInfo.empty() || 
AddrInfo[0].SgprParts.size() != 1102
)
1478
72
    return None;
1479
102
1480
102
  const GEPInfo &GEPInfo = AddrInfo[0];
1481
102
1482
102
  if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1483
25
    return None;
1484
77
1485
77
  unsigned PtrReg = GEPInfo.SgprParts[0];
1486
77
  int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1487
77
  return {{
1488
77
    [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1489
77
    [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1490
77
  }};
1491
77
}
1492
1493
InstructionSelector::ComplexRendererFns
1494
41
AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1495
41
  MachineRegisterInfo &MRI =
1496
41
      Root.getParent()->getParent()->getParent()->getRegInfo();
1497
41
1498
41
  SmallVector<GEPInfo, 4> AddrInfo;
1499
41
  getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1500
41
1501
41
  if (AddrInfo.empty() || 
AddrInfo[0].SgprParts.size() != 111
)
1502
30
    return None;
1503
11
1504
11
  const GEPInfo &GEPInfo = AddrInfo[0];
1505
11
  unsigned PtrReg = GEPInfo.SgprParts[0];
1506
11
  int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1507
11
  if (!isUInt<32>(EncodedImm))
1508
2
    return None;
1509
9
1510
9
  return {{
1511
9
    [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1512
9
    [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1513
9
  }};
1514
9
}
1515
1516
InstructionSelector::ComplexRendererFns
1517
88
AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1518
88
  MachineInstr *MI = Root.getParent();
1519
88
  MachineBasicBlock *MBB = MI->getParent();
1520
88
  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1521
88
1522
88
  SmallVector<GEPInfo, 4> AddrInfo;
1523
88
  getAddrModeInfo(*MI, MRI, AddrInfo);
1524
88
1525
88
  // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1526
88
  // then we can select all ptr + 32-bit offsets not just immediate offsets.
1527
88
  if (AddrInfo.empty() || 
AddrInfo[0].SgprParts.size() != 116
)
1528
72
    return None;
1529
16
1530
16
  const GEPInfo &GEPInfo = AddrInfo[0];
1531
16
  if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1532
9
    return None;
1533
7
1534
7
  // If we make it this far we have a load with an 32-bit immediate offset.
1535
7
  // It is OK to select this using a sgpr offset, because we have already
1536
7
  // failed trying to select this load into one of the _IMM variants since
1537
7
  // the _IMM Patterns are considered before the _SGPR patterns.
1538
7
  unsigned PtrReg = GEPInfo.SgprParts[0];
1539
7
  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1540
7
  BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1541
7
          .addImm(GEPInfo.Imm);
1542
7
  return {{
1543
7
    [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1544
7
    [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1545
7
  }};
1546
7
}
1547
1548
template <bool Signed>
1549
InstructionSelector::ComplexRendererFns
1550
413
AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
1551
413
  MachineInstr *MI = Root.getParent();
1552
413
  MachineBasicBlock *MBB = MI->getParent();
1553
413
  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1554
413
1555
413
  InstructionSelector::ComplexRendererFns Default = {{
1556
413
      [=](MachineInstrBuilder &MIB) 
{ MIB.addReg(Root.getReg()); }397
,
llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > llvm::AMDGPUInstructionSelector::selectFlatOffsetImpl<false>(llvm::MachineOperand&) const::'lambda'(llvm::MachineInstrBuilder&)::operator()(llvm::MachineInstrBuilder&) const
Line
Count
Source
1556
322
      [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > llvm::AMDGPUInstructionSelector::selectFlatOffsetImpl<true>(llvm::MachineOperand&) const::'lambda'(llvm::MachineInstrBuilder&)::operator()(llvm::MachineInstrBuilder&) const
Line
Count
Source
1556
75
      [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
1557
413
      [=](MachineInstrBuilder &MIB) 
{ MIB.addImm(0); }397
, // offset
llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > llvm::AMDGPUInstructionSelector::selectFlatOffsetImpl<false>(llvm::MachineOperand&) const::'lambda0'(llvm::MachineInstrBuilder&)::operator()(llvm::MachineInstrBuilder&) const
Line
Count
Source
1557
322
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },  // offset
llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > llvm::AMDGPUInstructionSelector::selectFlatOffsetImpl<true>(llvm::MachineOperand&) const::'lambda0'(llvm::MachineInstrBuilder&)::operator()(llvm::MachineInstrBuilder&) const
Line
Count
Source
1557
75
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },  // offset
1558
413
      [=](MachineInstrBuilder &MIB) 
{ MIB.addImm(0); }397
// slc
llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > llvm::AMDGPUInstructionSelector::selectFlatOffsetImpl<false>(llvm::MachineOperand&) const::'lambda1'(llvm::MachineInstrBuilder&)::operator()(llvm::MachineInstrBuilder&) const
Line
Count
Source
1558
322
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // slc
llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > llvm::AMDGPUInstructionSelector::selectFlatOffsetImpl<true>(llvm::MachineOperand&) const::'lambda1'(llvm::MachineInstrBuilder&)::operator()(llvm::MachineInstrBuilder&) const
Line
Count
Source
1558
75
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // slc
1559
413
    }};
1560
413
1561
413
  if (!STI.hasFlatInstOffsets())
1562
276
    return Default;
1563
137
1564
137
  const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg());
1565
137
  if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP)
1566
85
    return Default;
1567
52
1568
52
  Optional<int64_t> Offset =
1569
52
    getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI);
1570
52
  if (!Offset.hasValue())
1571
0
    return Default;
1572
52
1573
52
  unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
1574
52
  if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed))
1575
36
    return Default;
1576
16
1577
16
  Register BasePtr = OpDef->getOperand(1).getReg();
1578
16
1579
16
  return {{
1580
16
      [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); },
llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > llvm::AMDGPUInstructionSelector::selectFlatOffsetImpl<false>(llvm::MachineOperand&) const::'lambda2'(llvm::MachineInstrBuilder&)::operator()(llvm::MachineInstrBuilder&) const
Line
Count
Source
1580
4
      [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); },
llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > llvm::AMDGPUInstructionSelector::selectFlatOffsetImpl<true>(llvm::MachineOperand&) const::'lambda2'(llvm::MachineInstrBuilder&)::operator()(llvm::MachineInstrBuilder&) const
Line
Count
Source
1580
12
      [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); },
1581
16
      [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); },
llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > llvm::AMDGPUInstructionSelector::selectFlatOffsetImpl<false>(llvm::MachineOperand&) const::'lambda3'(llvm::MachineInstrBuilder&)::operator()(llvm::MachineInstrBuilder&) const
Line
Count
Source
1581
4
      [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); },
llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > llvm::AMDGPUInstructionSelector::selectFlatOffsetImpl<true>(llvm::MachineOperand&) const::'lambda3'(llvm::MachineInstrBuilder&)::operator()(llvm::MachineInstrBuilder&) const
Line
Count
Source
1581
12
      [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); },
1582
16
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // slc
llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > llvm::AMDGPUInstructionSelector::selectFlatOffsetImpl<false>(llvm::MachineOperand&) const::'lambda4'(llvm::MachineInstrBuilder&)::operator()(llvm::MachineInstrBuilder&) const
Line
Count
Source
1582
4
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // slc
llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > llvm::AMDGPUInstructionSelector::selectFlatOffsetImpl<true>(llvm::MachineOperand&) const::'lambda4'(llvm::MachineInstrBuilder&)::operator()(llvm::MachineInstrBuilder&) const
Line
Count
Source
1582
12
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // slc
1583
16
    }};
1584
16
}
llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > llvm::AMDGPUInstructionSelector::selectFlatOffsetImpl<false>(llvm::MachineOperand&) const
Line
Count
Source
1550
326
AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
1551
326
  MachineInstr *MI = Root.getParent();
1552
326
  MachineBasicBlock *MBB = MI->getParent();
1553
326
  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1554
326
1555
326
  InstructionSelector::ComplexRendererFns Default = {{
1556
326
      [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
1557
326
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },  // offset
1558
326
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // slc
1559
326
    }};
1560
326
1561
326
  if (!STI.hasFlatInstOffsets())
1562
276
    return Default;
1563
50
1564
50
  const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg());
1565
50
  if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP)
1566
24
    return Default;
1567
26
1568
26
  Optional<int64_t> Offset =
1569
26
    getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI);
1570
26
  if (!Offset.hasValue())
1571
0
    return Default;
1572
26
1573
26
  unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
1574
26
  if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed))
1575
22
    return Default;
1576
4
1577
4
  Register BasePtr = OpDef->getOperand(1).getReg();
1578
4
1579
4
  return {{
1580
4
      [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); },
1581
4
      [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); },
1582
4
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // slc
1583
4
    }};
1584
4
}
llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > llvm::AMDGPUInstructionSelector::selectFlatOffsetImpl<true>(llvm::MachineOperand&) const
Line
Count
Source
1550
87
AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
1551
87
  MachineInstr *MI = Root.getParent();
1552
87
  MachineBasicBlock *MBB = MI->getParent();
1553
87
  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1554
87
1555
87
  InstructionSelector::ComplexRendererFns Default = {{
1556
87
      [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
1557
87
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },  // offset
1558
87
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // slc
1559
87
    }};
1560
87
1561
87
  if (!STI.hasFlatInstOffsets())
1562
0
    return Default;
1563
87
1564
87
  const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg());
1565
87
  if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP)
1566
61
    return Default;
1567
26
1568
26
  Optional<int64_t> Offset =
1569
26
    getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI);
1570
26
  if (!Offset.hasValue())
1571
0
    return Default;
1572
26
1573
26
  unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
1574
26
  if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed))
1575
14
    return Default;
1576
12
1577
12
  Register BasePtr = OpDef->getOperand(1).getReg();
1578
12
1579
12
  return {{
1580
12
      [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); },
1581
12
      [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); },
1582
12
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // slc
1583
12
    }};
1584
12
}
1585
1586
InstructionSelector::ComplexRendererFns
1587
326
AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const {
1588
326
  return selectFlatOffsetImpl<false>(Root);
1589
326
}
1590
1591
InstructionSelector::ComplexRendererFns
1592
87
AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const {
1593
87
  return selectFlatOffsetImpl<true>(Root);
1594
87
}
1595
1596
// FIXME: Implement
1597
static bool signBitIsZero(const MachineOperand &Op,
1598
5
                          const MachineRegisterInfo &MRI) {
1599
5
  return false;
1600
5
}
1601
1602
12
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1603
12
  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1604
12
  return PSV && 
PSV->isStack()0
;
1605
12
}
1606
1607
InstructionSelector::ComplexRendererFns
1608
52
AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
1609
52
  MachineInstr *MI = Root.getParent();
1610
52
  MachineBasicBlock *MBB = MI->getParent();
1611
52
  MachineFunction *MF = MBB->getParent();
1612
52
  MachineRegisterInfo &MRI = MF->getRegInfo();
1613
52
  const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
1614
52
1615
52
  int64_t Offset = 0;
1616
52
  if (mi_match(Root.getReg(), MRI, m_ICst(Offset))) {
1617
4
    Register HighBits = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1618
4
1619
4
    // TODO: Should this be inside the render function? The iterator seems to
1620
4
    // move.
1621
4
    BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
1622
4
            HighBits)
1623
4
      .addImm(Offset & ~4095);
1624
4
1625
4
    return {{[=](MachineInstrBuilder &MIB) { // rsrc
1626
4
               MIB.addReg(Info->getScratchRSrcReg());
1627
4
             },
1628
4
             [=](MachineInstrBuilder &MIB) { // vaddr
1629
4
               MIB.addReg(HighBits);
1630
4
             },
1631
4
             [=](MachineInstrBuilder &MIB) { // soffset
1632
4
               const MachineMemOperand *MMO = *MI->memoperands_begin();
1633
4
               const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
1634
4
1635
4
               Register SOffsetReg = isStackPtrRelative(PtrInfo)
1636
4
                                         ? 
Info->getStackPtrOffsetReg()0
1637
4
                                         : Info->getScratchWaveOffsetReg();
1638
4
               MIB.addReg(SOffsetReg);
1639
4
             },
1640
4
             [=](MachineInstrBuilder &MIB) { // offset
1641
4
               MIB.addImm(Offset & 4095);
1642
4
             }}};
1643
4
  }
1644
48
1645
48
  assert(Offset == 0);
1646
48
1647
48
  // Try to fold a frame index directly into the MUBUF vaddr field, and any
1648
48
  // offsets.
1649
48
  Optional<int> FI;
1650
48
  Register VAddr = Root.getReg();
1651
48
  if (const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg())) {
1652
48
    if (isBaseWithConstantOffset(Root, MRI)) {
1653
30
      const MachineOperand &LHS = RootDef->getOperand(1);
1654
30
      const MachineOperand &RHS = RootDef->getOperand(2);
1655
30
      const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
1656
30
      const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
1657
30
      if (LHSDef && RHSDef) {
1658
30
        int64_t PossibleOffset =
1659
30
            RHSDef->getOperand(1).getCImm()->getSExtValue();
1660
30
        if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) &&
1661
30
            
(10
!STI.privateMemoryResourceIsRangeChecked()10
||
1662
10
             
signBitIsZero(LHS, MRI)5
)) {
1663
5
          if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX)
1664
2
            FI = LHSDef->getOperand(1).getIndex();
1665
3
          else
1666
3
            VAddr = LHS.getReg();
1667
5
          Offset = PossibleOffset;
1668
5
        }
1669
30
      }
1670
30
    } else 
if (18
RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX18
) {
1671
2
      FI = RootDef->getOperand(1).getIndex();
1672
2
    }
1673
48
  }
1674
48
1675
48
  // If we don't know this private access is a local stack object, it needs to
1676
48
  // be relative to the entry point's scratch wave offset register.
1677
48
  // TODO: Should split large offsets that don't fit like above.
1678
48
  // TODO: Don't use scratch wave offset just because the offset didn't fit.
1679
48
  Register SOffset = FI.hasValue() ? 
Info->getStackPtrOffsetReg()4
1680
48
                                   : 
Info->getScratchWaveOffsetReg()44
;
1681
48
1682
48
  return {{[=](MachineInstrBuilder &MIB) { // rsrc
1683
48
             MIB.addReg(Info->getScratchRSrcReg());
1684
48
           },
1685
48
           [=](MachineInstrBuilder &MIB) { // vaddr
1686
48
             if (FI.hasValue())
1687
4
               MIB.addFrameIndex(FI.getValue());
1688
44
             else
1689
44
               MIB.addReg(VAddr);
1690
48
           },
1691
48
           [=](MachineInstrBuilder &MIB) { // soffset
1692
48
             MIB.addReg(SOffset);
1693
48
           },
1694
48
           [=](MachineInstrBuilder &MIB) { // offset
1695
48
             MIB.addImm(Offset);
1696
48
           }}};
1697
48
}
1698
1699
InstructionSelector::ComplexRendererFns
1700
AMDGPUInstructionSelector::selectMUBUFScratchOffset(
1701
68
    MachineOperand &Root) const {
1702
68
  MachineInstr *MI = Root.getParent();
1703
68
  MachineBasicBlock *MBB = MI->getParent();
1704
68
  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1705
68
1706
68
  int64_t Offset = 0;
1707
68
  if (!mi_match(Root.getReg(), MRI, m_ICst(Offset)) ||
1708
68
      
!SIInstrInfo::isLegalMUBUFImmOffset(Offset)14
)
1709
60
    return {};
1710
8
1711
8
  const MachineFunction *MF = MBB->getParent();
1712
8
  const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
1713
8
  const MachineMemOperand *MMO = *MI->memoperands_begin();
1714
8
  const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
1715
8
1716
8
  Register SOffsetReg = isStackPtrRelative(PtrInfo)
1717
8
                            ? 
Info->getStackPtrOffsetReg()0
1718
8
                            : Info->getScratchWaveOffsetReg();
1719
8
  return {{
1720
8
      [=](MachineInstrBuilder &MIB) {
1721
8
        MIB.addReg(Info->getScratchRSrcReg());
1722
8
      },                                                         // rsrc
1723
8
      [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffsetReg); }, // soffset
1724
8
      [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }      // offset
1725
8
  }};
1726
8
}