Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//==-----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// Defines an instruction selector for the AMDGPU target.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "AMDGPU.h"
15
#include "AMDGPUArgumentUsageInfo.h"
16
#include "AMDGPUISelLowering.h" // For AMDGPUISD
17
#include "AMDGPUInstrInfo.h"
18
#include "AMDGPUPerfHintAnalysis.h"
19
#include "AMDGPURegisterInfo.h"
20
#include "AMDGPUSubtarget.h"
21
#include "AMDGPUTargetMachine.h"
22
#include "SIDefines.h"
23
#include "SIISelLowering.h"
24
#include "SIInstrInfo.h"
25
#include "SIMachineFunctionInfo.h"
26
#include "SIRegisterInfo.h"
27
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
28
#include "llvm/ADT/APInt.h"
29
#include "llvm/ADT/SmallVector.h"
30
#include "llvm/ADT/StringRef.h"
31
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
32
#include "llvm/Analysis/ValueTracking.h"
33
#include "llvm/CodeGen/FunctionLoweringInfo.h"
34
#include "llvm/CodeGen/ISDOpcodes.h"
35
#include "llvm/CodeGen/MachineFunction.h"
36
#include "llvm/CodeGen/MachineRegisterInfo.h"
37
#include "llvm/CodeGen/SelectionDAG.h"
38
#include "llvm/CodeGen/SelectionDAGISel.h"
39
#include "llvm/CodeGen/SelectionDAGNodes.h"
40
#include "llvm/CodeGen/ValueTypes.h"
41
#include "llvm/IR/BasicBlock.h"
42
#ifdef EXPENSIVE_CHECKS
43
#include "llvm/IR/Dominators.h"
44
#endif
45
#include "llvm/IR/Instruction.h"
46
#include "llvm/MC/MCInstrDesc.h"
47
#include "llvm/Support/Casting.h"
48
#include "llvm/Support/CodeGen.h"
49
#include "llvm/Support/ErrorHandling.h"
50
#include "llvm/Support/MachineValueType.h"
51
#include "llvm/Support/MathExtras.h"
52
#include <cassert>
53
#include <cstdint>
54
#include <new>
55
#include <vector>
56
57
#define DEBUG_TYPE "isel"
58
59
using namespace llvm;
60
61
namespace llvm {
62
63
class R600InstrInfo;
64
65
} // end namespace llvm
66
67
//===----------------------------------------------------------------------===//
68
// Instruction Selector Implementation
69
//===----------------------------------------------------------------------===//
70
71
namespace {
72
73
35
static bool isNullConstantOrUndef(SDValue V) {
74
35
  if (V.isUndef())
75
4
    return true;
76
31
77
31
  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
78
31
  return Const != nullptr && 
Const->isNullValue()27
;
79
31
}
80
81
1.22k
static bool getConstantValue(SDValue N, uint32_t &Out) {
82
1.22k
  // This is only used for packed vectors, where ussing 0 for undef should
83
1.22k
  // always be good.
84
1.22k
  if (N.isUndef()) {
85
21
    Out = 0;
86
21
    return true;
87
21
  }
88
1.20k
89
1.20k
  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
90
288
    Out = C->getAPIntValue().getSExtValue();
91
288
    return true;
92
288
  }
93
912
94
912
  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
95
339
    Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
96
339
    return true;
97
339
  }
98
573
99
573
  return false;
100
573
}
101
102
// TODO: Handle undef as zero
103
static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
104
863
                                 bool Negate = false) {
105
863
  assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
106
863
  uint32_t LHSVal, RHSVal;
107
863
  if (getConstantValue(N->getOperand(0), LHSVal) &&
108
863
      
getConstantValue(N->getOperand(1), RHSVal)358
) {
109
290
    SDLoc SL(N);
110
290
    uint32_t K = Negate ?
111
38
      (-LHSVal & 0xffff) | (-RHSVal << 16) :
112
290
      
(LHSVal & 0xffff) | (RHSVal << 16)252
;
113
290
    return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
114
290
                              DAG.getTargetConstant(K, SL, MVT::i32));
115
290
  }
116
573
117
573
  return nullptr;
118
573
}
119
120
38
static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
121
38
  return packConstantV2I16(N, DAG, true);
122
38
}
123
124
/// AMDGPU specific code to select AMDGPU machine instructions for
125
/// SelectionDAG operations.
126
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
127
  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
128
  // make the right decision when generating code for different targets.
129
  const GCNSubtarget *Subtarget;
130
  bool EnableLateStructurizeCFG;
131
132
public:
133
  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
134
                              CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
135
2.68k
    : SelectionDAGISel(*TM, OptLevel) {
136
2.68k
    EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
137
2.68k
  }
138
2.66k
  ~AMDGPUDAGToDAGISel() override = default;
139
140
2.66k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
141
2.66k
    AU.addRequired<AMDGPUArgumentUsageInfo>();
142
2.66k
    AU.addRequired<LegacyDivergenceAnalysis>();
143
#ifdef EXPENSIVE_CHECKS
144
    AU.addRequired<DominatorTreeWrapperPass>();
145
    AU.addRequired<LoopInfoWrapperPass>();
146
#endif
147
    SelectionDAGISel::getAnalysisUsage(AU);
148
2.66k
  }
149
150
  bool matchLoadD16FromBuildVector(SDNode *N) const;
151
152
  bool runOnMachineFunction(MachineFunction &MF) override;
153
  void PreprocessISelDAG() override;
154
  void Select(SDNode *N) override;
155
  StringRef getPassName() const override;
156
  void PostprocessISelDAG() override;
157
158
protected:
159
  void SelectBuildVector(SDNode *N, unsigned RegClassID);
160
161
private:
162
  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
163
  bool isNoNanSrc(SDValue N) const;
164
  bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
165
46
  bool isNegInlineImmediate(const SDNode *N) const {
166
46
    return isInlineImmediate(N, true);
167
46
  }
168
169
  bool isVGPRImm(const SDNode *N) const;
170
  bool isUniformLoad(const SDNode *N) const;
171
  bool isUniformBr(const SDNode *N) const;
172
173
  MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
174
175
  SDNode *glueCopyToM0LDSInit(SDNode *N) const;
176
  SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
177
178
  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
179
  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
180
  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
181
  bool isDSOffsetLegal(SDValue Base, unsigned Offset,
182
                       unsigned OffsetBits) const;
183
  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
184
  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
185
                                 SDValue &Offset1) const;
186
  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
187
                   SDValue &SOffset, SDValue &Offset, SDValue &Offen,
188
                   SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
189
                   SDValue &TFE, SDValue &DLC) const;
190
  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
191
                         SDValue &SOffset, SDValue &Offset, SDValue &GLC,
192
                         SDValue &SLC, SDValue &TFE, SDValue &DLC) const;
193
  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
194
                         SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
195
                         SDValue &SLC) const;
196
  bool SelectMUBUFScratchOffen(SDNode *Parent,
197
                               SDValue Addr, SDValue &RSrc, SDValue &VAddr,
198
                               SDValue &SOffset, SDValue &ImmOffset) const;
199
  bool SelectMUBUFScratchOffset(SDNode *Parent,
200
                                SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
201
                                SDValue &Offset) const;
202
203
  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
204
                         SDValue &Offset, SDValue &GLC, SDValue &SLC,
205
                         SDValue &TFE, SDValue &DLC) const;
206
  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
207
                         SDValue &Offset, SDValue &SLC) const;
208
  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
209
                         SDValue &Offset) const;
210
211
  bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
212
                        SDValue &Offset, SDValue &SLC) const;
213
  bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
214
                              SDValue &Offset, SDValue &SLC) const;
215
216
  template <bool IsSigned>
217
  bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
218
                        SDValue &Offset, SDValue &SLC) const;
219
220
  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
221
                        bool &Imm) const;
222
  SDValue Expand32BitAddress(SDValue Addr) const;
223
  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
224
                  bool &Imm) const;
225
  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
226
  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
227
  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
228
  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
229
  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
230
  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
231
232
  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
233
  bool SelectVOP3Mods_f32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
234
  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
235
  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
236
  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
237
  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
238
                       SDValue &Clamp, SDValue &Omod) const;
239
  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
240
                         SDValue &Clamp, SDValue &Omod) const;
241
242
  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
243
                                 SDValue &Clamp,
244
                                 SDValue &Omod) const;
245
246
  bool SelectVOP3OMods(SDValue In, SDValue &Src,
247
                       SDValue &Clamp, SDValue &Omod) const;
248
249
  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
250
  bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
251
                        SDValue &Clamp) const;
252
253
  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
254
  bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
255
                        SDValue &Clamp) const;
256
257
  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
258
  bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
259
                            SDValue &Clamp) const;
260
  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
261
  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
262
263
  SDValue getHi16Elt(SDValue In) const;
264
265
  void SelectADD_SUB_I64(SDNode *N);
266
  void SelectAddcSubb(SDNode *N);
267
  void SelectUADDO_USUBO(SDNode *N);
268
  void SelectDIV_SCALE(SDNode *N);
269
  void SelectDIV_FMAS(SDNode *N);
270
  void SelectMAD_64_32(SDNode *N);
271
  void SelectFMA_W_CHAIN(SDNode *N);
272
  void SelectFMUL_W_CHAIN(SDNode *N);
273
274
  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
275
                   uint32_t Offset, uint32_t Width);
276
  void SelectS_BFEFromShifts(SDNode *N);
277
  void SelectS_BFE(SDNode *N);
278
  bool isCBranchSCC(const SDNode *N) const;
279
  void SelectBRCOND(SDNode *N);
280
  void SelectFMAD_FMA(SDNode *N);
281
  void SelectATOMIC_CMP_SWAP(SDNode *N);
282
  void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
283
  void SelectDS_GWS(SDNode *N, unsigned IntrID);
284
  void SelectINTRINSIC_W_CHAIN(SDNode *N);
285
  void SelectINTRINSIC_VOID(SDNode *N);
286
287
protected:
288
  // Include the pieces autogenerated from the target description.
289
#include "AMDGPUGenDAGISel.inc"
290
};
291
292
class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
293
  const R600Subtarget *Subtarget;
294
295
  bool isConstantLoad(const MemSDNode *N, int cbID) const;
296
  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
297
  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
298
                                       SDValue& Offset);
299
public:
300
  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
301
280
      AMDGPUDAGToDAGISel(TM, OptLevel) {}
302
303
  void Select(SDNode *N) override;
304
305
  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
306
                          SDValue &Offset) override;
307
  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
308
                          SDValue &Offset) override;
309
310
  bool runOnMachineFunction(MachineFunction &MF) override;
311
312
2.47k
  void PreprocessISelDAG() override {}
313
314
protected:
315
  // Include the pieces autogenerated from the target description.
316
#include "R600GenDAGISel.inc"
317
};
318
319
4.38k
static SDValue stripBitcast(SDValue Val) {
320
4.38k
  return Val.getOpcode() == ISD::BITCAST ? 
Val.getOperand(0)651
:
Val3.73k
;
321
4.38k
}
322
323
// Figure out if this is really an extract of the high 16-bits of a dword.
324
1.19k
static bool isExtractHiElt(SDValue In, SDValue &Out) {
325
1.19k
  In = stripBitcast(In);
326
1.19k
  if (In.getOpcode() != ISD::TRUNCATE)
327
709
    return false;
328
481
329
481
  SDValue Srl = In.getOperand(0);
330
481
  if (Srl.getOpcode() == ISD::SRL) {
331
232
    if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
332
232
      if (ShiftAmt->getZExtValue() == 16) {
333
208
        Out = stripBitcast(Srl.getOperand(0));
334
208
        return true;
335
208
      }
336
273
    }
337
232
  }
338
273
339
273
  return false;
340
273
}
341
342
// Look through operations that obscure just looking at the low 16-bits of the
343
// same register.
344
730
static SDValue stripExtractLoElt(SDValue In) {
345
730
  if (In.getOpcode() == ISD::TRUNCATE) {
346
158
    SDValue Src = In.getOperand(0);
347
158
    if (Src.getValueType().getSizeInBits() == 32)
348
157
      return stripBitcast(Src);
349
573
  }
350
573
351
573
  return In;
352
573
}
353
354
}  // end anonymous namespace
355
356
101k
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
357
101k
                      "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
358
101k
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
359
101k
INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
360
101k
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
361
#ifdef EXPENSIVE_CHECKS
362
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
363
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
364
#endif
365
101k
INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
366
                    "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
367
368
/// This pass converts a legalized DAG into a AMDGPU-specific
369
// DAG, ready for instruction scheduling.
370
FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
371
2.40k
                                        CodeGenOpt::Level OptLevel) {
372
2.40k
  return new AMDGPUDAGToDAGISel(TM, OptLevel);
373
2.40k
}
374
375
/// This pass converts a legalized DAG into a R600-specific
376
// DAG, ready for instruction scheduling.
377
FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
378
280
                                      CodeGenOpt::Level OptLevel) {
379
280
  return new R600DAGToDAGISel(TM, OptLevel);
380
280
}
381
382
25.1k
bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
383
#ifdef EXPENSIVE_CHECKS
384
  DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
385
  LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
386
  for (auto &L : LI->getLoopsInPreorder()) {
387
    assert(L->isLCSSAForm(DT));
388
  }
389
#endif
390
  Subtarget = &MF.getSubtarget<GCNSubtarget>();
391
25.1k
  return SelectionDAGISel::runOnMachineFunction(MF);
392
25.1k
}
393
394
7.19k
bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
395
7.19k
  assert(Subtarget->d16PreservesUnusedBits());
396
7.19k
  MVT VT = N->getValueType(0).getSimpleVT();
397
7.19k
  if (VT != MVT::v2i16 && 
VT != MVT::v2f166.71k
)
398
6.31k
    return false;
399
878
400
878
  SDValue Lo = N->getOperand(0);
401
878
  SDValue Hi = N->getOperand(1);
402
878
403
878
  LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
404
878
405
878
  // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
406
878
  // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
407
878
  // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
408
878
409
878
  // Need to check for possible indirect dependencies on the other half of the
410
878
  // vector to avoid introducing a cycle.
411
878
  if (LdHi && 
Hi.hasOneUse()86
&&
!LdHi->isPredecessorOf(Lo.getNode())79
) {
412
78
    SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
413
78
414
78
    SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
415
78
    SDValue Ops[] = {
416
78
      LdHi->getChain(), LdHi->getBasePtr(), TiedIn
417
78
    };
418
78
419
78
    unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
420
78
    if (LdHi->getMemoryVT() == MVT::i8) {
421
24
      LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
422
13
        
AMDGPUISD::LOAD_D16_HI_I811
: AMDGPUISD::LOAD_D16_HI_U8;
423
54
    } else {
424
54
      assert(LdHi->getMemoryVT() == MVT::i16);
425
54
    }
426
78
427
78
    SDValue NewLoadHi =
428
78
      CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
429
78
                                  Ops, LdHi->getMemoryVT(),
430
78
                                  LdHi->getMemOperand());
431
78
432
78
    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
433
78
    CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
434
78
    return true;
435
78
  }
436
800
437
800
  // build_vector (load ptr), hi -> load_d16_lo ptr, hi
438
800
  // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
439
800
  // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
440
800
  LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
441
800
  if (LdLo && 
Lo.hasOneUse()71
) {
442
64
    SDValue TiedIn = getHi16Elt(Hi);
443
64
    if (!TiedIn || 
LdLo->isPredecessorOf(TiedIn.getNode())52
)
444
15
      return false;
445
49
446
49
    SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
447
49
    unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
448
49
    if (LdLo->getMemoryVT() == MVT::i8) {
449
21
      LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
450
11
        
AMDGPUISD::LOAD_D16_LO_I810
: AMDGPUISD::LOAD_D16_LO_U8;
451
28
    } else {
452
28
      assert(LdLo->getMemoryVT() == MVT::i16);
453
28
    }
454
49
455
49
    TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
456
49
457
49
    SDValue Ops[] = {
458
49
      LdLo->getChain(), LdLo->getBasePtr(), TiedIn
459
49
    };
460
49
461
49
    SDValue NewLoadLo =
462
49
      CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
463
49
                                  Ops, LdLo->getMemoryVT(),
464
49
                                  LdLo->getMemOperand());
465
49
466
49
    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
467
49
    CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
468
49
    return true;
469
49
  }
470
736
471
736
  return false;
472
736
}
473
474
28.4k
void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
475
28.4k
  if (!Subtarget->d16PreservesUnusedBits())
476
20.9k
    return;
477
7.44k
478
7.44k
  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
479
7.44k
480
7.44k
  bool MadeChange = false;
481
231k
  while (Position != CurDAG->allnodes_begin()) {
482
223k
    SDNode *N = &*--Position;
483
223k
    if (N->use_empty())
484
7.44k
      continue;
485
216k
486
216k
    switch (N->getOpcode()) {
487
216k
    case ISD::BUILD_VECTOR:
488
7.19k
      MadeChange |= matchLoadD16FromBuildVector(N);
489
7.19k
      break;
490
216k
    default:
491
209k
      break;
492
216k
    }
493
216k
  }
494
7.44k
495
7.44k
  if (MadeChange) {
496
122
    CurDAG->RemoveDeadNodes();
497
122
    LLVM_DEBUG(dbgs() << "After PreProcess:\n";
498
122
               CurDAG->dump(););
499
122
  }
500
7.44k
}
501
502
246
bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
503
246
  if (TM.Options.NoNaNsFPMath)
504
189
    return true;
505
57
506
57
  // TODO: Move into isKnownNeverNaN
507
57
  if (N->getFlags().isDefined())
508
57
    return N->getFlags().hasNoNaNs();
509
0
510
0
  return CurDAG->isKnownNeverNaN(N);
511
0
}
512
513
bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
514
9.24k
                                           bool Negated) const {
515
9.24k
  if (N->isUndef())
516
0
    return true;
517
9.24k
518
9.24k
  const SIInstrInfo *TII = Subtarget->getInstrInfo();
519
9.24k
  if (Negated) {
520
46
    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
521
42
      return TII->isInlineConstant(-C->getAPIntValue());
522
4
523
4
    if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
524
0
      return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
525
9.20k
526
9.20k
  } else {
527
9.20k
    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
528
8.03k
      return TII->isInlineConstant(C->getAPIntValue());
529
1.16k
530
1.16k
    if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
531
594
      return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
532
579
  }
533
579
534
579
  return false;
535
579
}
536
537
/// Determine the register class for \p OpNo
538
/// \returns The register class of the virtual register that will be used for
539
/// the given operand number \OpNo or NULL if the register class cannot be
540
/// determined.
541
const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
542
32.8k
                                                          unsigned OpNo) const {
543
32.8k
  if (!N->isMachineOpcode()) {
544
1.16k
    if (N->getOpcode() == ISD::CopyToReg) {
545
1.16k
      unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
546
1.16k
      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
547
449
        MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
548
449
        return MRI.getRegClass(Reg);
549
449
      }
550
720
551
720
      const SIRegisterInfo *TRI
552
720
        = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
553
720
      return TRI->getPhysRegClass(Reg);
554
720
    }
555
0
556
0
    return nullptr;
557
0
  }
558
31.6k
559
31.6k
  switch (N->getMachineOpcode()) {
560
31.6k
  default: {
561
29.5k
    const MCInstrDesc &Desc =
562
29.5k
        Subtarget->getInstrInfo()->get(N->getMachineOpcode());
563
29.5k
    unsigned OpIdx = Desc.getNumDefs() + OpNo;
564
29.5k
    if (OpIdx >= Desc.getNumOperands())
565
0
      return nullptr;
566
29.5k
    int RegClass = Desc.OpInfo[OpIdx].RegClass;
567
29.5k
    if (RegClass == -1)
568
12
      return nullptr;
569
29.5k
570
29.5k
    return Subtarget->getRegisterInfo()->getRegClass(RegClass);
571
29.5k
  }
572
29.5k
  case AMDGPU::REG_SEQUENCE: {
573
2.12k
    unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
574
2.12k
    const TargetRegisterClass *SuperRC =
575
2.12k
        Subtarget->getRegisterInfo()->getRegClass(RCID);
576
2.12k
577
2.12k
    SDValue SubRegOp = N->getOperand(OpNo + 1);
578
2.12k
    unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
579
2.12k
    return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
580
2.12k
                                                              SubRegIdx);
581
29.5k
  }
582
31.6k
  }
583
31.6k
}
584
585
9.09k
SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
586
9.09k
  const SITargetLowering& Lowering =
587
9.09k
    *static_cast<const SITargetLowering*>(getTargetLowering());
588
9.09k
589
9.09k
  assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
590
9.09k
591
9.09k
  SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N),
592
9.09k
                                 Val);
593
9.09k
594
9.09k
  SDValue Glue = M0.getValue(1);
595
9.09k
596
9.09k
  SmallVector <SDValue, 8> Ops;
597
9.09k
  Ops.push_back(M0); // Replace the chain.
598
32.2k
  for (unsigned i = 1, e = N->getNumOperands(); i != e; 
++i23.1k
)
599
23.1k
    Ops.push_back(N->getOperand(i));
600
9.09k
601
9.09k
  Ops.push_back(Glue);
602
9.09k
  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
603
9.09k
}
604
605
93.2k
SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
606
93.2k
  unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
607
93.2k
  if (AS == AMDGPUAS::LOCAL_ADDRESS) {
608
12.3k
    if (Subtarget->ldsRequiresM0Init())
609
8.79k
      return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
610
80.9k
  } else if (AS == AMDGPUAS::REGION_ADDRESS) {
611
48
    MachineFunction &MF = CurDAG->getMachineFunction();
612
48
    unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
613
48
    return
614
48
        glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
615
48
  }
616
84.4k
  return N;
617
84.4k
}
618
619
MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
620
1.47k
                                                  EVT VT) const {
621
1.47k
  SDNode *Lo = CurDAG->getMachineNode(
622
1.47k
      AMDGPU::S_MOV_B32, DL, MVT::i32,
623
1.47k
      CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
624
1.47k
  SDNode *Hi =
625
1.47k
      CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
626
1.47k
                             CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
627
1.47k
  const SDValue Ops[] = {
628
1.47k
      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
629
1.47k
      SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
630
1.47k
      SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
631
1.47k
632
1.47k
  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
633
1.47k
}
634
635
28.1k
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
636
28.1k
  switch (NumVectorElts) {
637
28.1k
  case 1:
638
0
    return AMDGPU::SReg_32_XM0RegClassID;
639
28.1k
  case 2:
640
17.6k
    return AMDGPU::SReg_64RegClassID;
641
28.1k
  case 3:
642
114
    return AMDGPU::SGPR_96RegClassID;
643
28.1k
  case 4:
644
9.04k
    return AMDGPU::SReg_128RegClassID;
645
28.1k
  case 5:
646
1
    return AMDGPU::SGPR_160RegClassID;
647
28.1k
  case 8:
648
1.24k
    return AMDGPU::SReg_256RegClassID;
649
28.1k
  case 16:
650
123
    return AMDGPU::SReg_512RegClassID;
651
28.1k
  case 32:
652
23
    return AMDGPU::SReg_1024RegClassID;
653
0
  }
654
0
655
0
  llvm_unreachable("invalid vector size");
656
0
}
657
658
30.6k
void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
659
30.6k
  EVT VT = N->getValueType(0);
660
30.6k
  unsigned NumVectorElts = VT.getVectorNumElements();
661
30.6k
  EVT EltVT = VT.getVectorElementType();
662
30.6k
  SDLoc DL(N);
663
30.6k
  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
664
30.6k
665
30.6k
  if (NumVectorElts == 1) {
666
0
    CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
667
0
                         RegClass);
668
0
    return;
669
0
  }
670
30.6k
671
30.6k
  assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
672
30.6k
                                  "supported yet");
673
30.6k
  // 32 = Max Num Vector Elements
674
30.6k
  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
675
30.6k
  // 1 = Vector Register Class
676
30.6k
  SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
677
30.6k
678
30.6k
  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
679
30.6k
  bool IsRegSeq = true;
680
30.6k
  unsigned NOps = N->getNumOperands();
681
124k
  for (unsigned i = 0; i < NOps; 
i++93.5k
) {
682
93.5k
    // XXX: Why is this here?
683
93.5k
    if (isa<RegisterSDNode>(N->getOperand(i))) {
684
0
      IsRegSeq = false;
685
0
      break;
686
0
    }
687
93.5k
    unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
688
93.5k
    RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
689
93.5k
    RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
690
93.5k
  }
691
30.6k
  if (NOps != NumVectorElts) {
692
5
    // Fill in the missing undef elements if this was a scalar_to_vector.
693
5
    assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
694
5
    MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
695
5
                                                   DL, EltVT);
696
10
    for (unsigned i = NOps; i < NumVectorElts; 
++i5
) {
697
5
      unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
698
5
      RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
699
5
      RegSeqArgs[1 + (2 * i) + 1] =
700
5
          CurDAG->getTargetConstant(Sub, DL, MVT::i32);
701
5
    }
702
5
  }
703
30.6k
704
30.6k
  if (!IsRegSeq)
705
0
    SelectCode(N);
706
30.6k
  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
707
30.6k
}
708
709
700k
void AMDGPUDAGToDAGISel::Select(SDNode *N) {
710
700k
  unsigned int Opc = N->getOpcode();
711
700k
  if (N->isMachineOpcode()) {
712
2.95k
    N->setNodeId(-1);
713
2.95k
    return;   // Already selected.
714
2.95k
  }
715
697k
716
697k
  if (isa<AtomicSDNode>(N) ||
717
697k
      
(695k
Opc == AMDGPUISD::ATOMIC_INC695k
||
Opc == AMDGPUISD::ATOMIC_DEC695k
||
718
695k
       
Opc == ISD::ATOMIC_LOAD_FADD695k
||
719
695k
       
Opc == AMDGPUISD::ATOMIC_LOAD_FMIN695k
||
720
695k
       
Opc == AMDGPUISD::ATOMIC_LOAD_FMAX695k
))
721
2.70k
    N = glueCopyToM0LDSInit(N);
722
697k
723
697k
  switch (Opc) {
724
697k
  default:
725
473k
    break;
726
697k
  // We are selecting i64 ADD here instead of custom lower it during
727
697k
  // DAG legalization, so we can fold some i64 ADDs used for address
728
697k
  // calculation into the LOAD and STORE instructions.
729
697k
  case ISD::ADDC:
730
288
  case ISD::ADDE:
731
288
  case ISD::SUBC:
732
288
  case ISD::SUBE: {
733
288
    if (N->getValueType(0) != MVT::i64)
734
138
      break;
735
150
736
150
    SelectADD_SUB_I64(N);
737
150
    return;
738
150
  }
739
389
  case ISD::ADDCARRY:
740
389
  case ISD::SUBCARRY:
741
389
    if (N->getValueType(0) != MVT::i32)
742
0
      break;
743
389
744
389
    SelectAddcSubb(N);
745
389
    return;
746
389
  case ISD::UADDO:
747
259
  case ISD::USUBO: {
748
259
    SelectUADDO_USUBO(N);
749
259
    return;
750
259
  }
751
259
  case AMDGPUISD::FMUL_W_CHAIN: {
752
64
    SelectFMUL_W_CHAIN(N);
753
64
    return;
754
259
  }
755
320
  case AMDGPUISD::FMA_W_CHAIN: {
756
320
    SelectFMA_W_CHAIN(N);
757
320
    return;
758
259
  }
759
259
760
29.0k
  case ISD::SCALAR_TO_VECTOR:
761
29.0k
  case ISD::BUILD_VECTOR: {
762
29.0k
    EVT VT = N->getValueType(0);
763
29.0k
    unsigned NumVectorElts = VT.getVectorNumElements();
764
29.0k
    if (VT.getScalarSizeInBits() == 16) {
765
896
      if (Opc == ISD::BUILD_VECTOR && 
NumVectorElts == 2825
) {
766
825
        if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
767
252
          ReplaceNode(N, Packed);
768
252
          return;
769
252
        }
770
644
      }
771
644
772
644
      break;
773
644
    }
774
28.1k
775
28.1k
    assert(VT.getVectorElementType().bitsEq(MVT::i32));
776
28.1k
    unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
777
28.1k
    SelectBuildVector(N, RegClassID);
778
28.1k
    return;
779
28.1k
  }
780
28.1k
  case ISD::BUILD_PAIR: {
781
11.7k
    SDValue RC, SubReg0, SubReg1;
782
11.7k
    SDLoc DL(N);
783
11.7k
    if (N->getValueType(0) == MVT::i128) {
784
0
      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
785
0
      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
786
0
      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
787
11.7k
    } else if (N->getValueType(0) == MVT::i64) {
788
11.7k
      RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
789
11.7k
      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
790
11.7k
      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
791
11.7k
    } else {
792
0
      llvm_unreachable("Unhandled value type for BUILD_PAIR");
793
0
    }
794
11.7k
    const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
795
11.7k
                            N->getOperand(1), SubReg1 };
796
11.7k
    ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
797
11.7k
                                          N->getValueType(0), Ops));
798
11.7k
    return;
799
11.7k
  }
800
11.7k
801
34.4k
  case ISD::Constant:
802
34.4k
  case ISD::ConstantFP: {
803
34.4k
    if (N->getValueType(0).getSizeInBits() != 64 || 
isInlineImmediate(N)4.17k
)
804
33.0k
      break;
805
1.39k
806
1.39k
    uint64_t Imm;
807
1.39k
    if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
808
77
      Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
809
1.31k
    else {
810
1.31k
      ConstantSDNode *C = cast<ConstantSDNode>(N);
811
1.31k
      Imm = C->getZExtValue();
812
1.31k
    }
813
1.39k
814
1.39k
    SDLoc DL(N);
815
1.39k
    ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
816
1.39k
    return;
817
1.39k
  }
818
90.5k
  case ISD::LOAD:
819
90.5k
  case ISD::STORE:
820
90.5k
  case ISD::ATOMIC_LOAD:
821
90.5k
  case ISD::ATOMIC_STORE: {
822
90.5k
    N = glueCopyToM0LDSInit(N);
823
90.5k
    break;
824
90.5k
  }
825
90.5k
826
90.5k
  case AMDGPUISD::BFE_I32:
827
152
  case AMDGPUISD::BFE_U32: {
828
152
    // There is a scalar version available, but unlike the vector version which
829
152
    // has a separate operand for the offset and width, the scalar version packs
830
152
    // the width and offset into a single operand. Try to move to the scalar
831
152
    // version if the offsets are constant, so that we can try to keep extended
832
152
    // loads of kernel arguments in SGPRs.
833
152
834
152
    // TODO: Technically we could try to pattern match scalar bitshifts of
835
152
    // dynamic values, but it's probably not useful.
836
152
    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
837
152
    if (!Offset)
838
16
      break;
839
136
840
136
    ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
841
136
    if (!Width)
842
4
      break;
843
132
844
132
    bool Signed = Opc == AMDGPUISD::BFE_I32;
845
132
846
132
    uint32_t OffsetVal = Offset->getZExtValue();
847
132
    uint32_t WidthVal = Width->getZExtValue();
848
132
849
132
    ReplaceNode(N, getS_BFE(Signed ? 
AMDGPU::S_BFE_I328
:
AMDGPU::S_BFE_U32124
,
850
132
                            SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
851
132
    return;
852
132
  }
853
319
  case AMDGPUISD::DIV_SCALE: {
854
319
    SelectDIV_SCALE(N);
855
319
    return;
856
132
  }
857
172
  case AMDGPUISD::DIV_FMAS: {
858
172
    SelectDIV_FMAS(N);
859
172
    return;
860
132
  }
861
132
  case AMDGPUISD::MAD_I64_I32:
862
48
  case AMDGPUISD::MAD_U64_U32: {
863
48
    SelectMAD_64_32(N);
864
48
    return;
865
48
  }
866
22.0k
  case ISD::CopyToReg: {
867
22.0k
    const SITargetLowering& Lowering =
868
22.0k
      *static_cast<const SITargetLowering*>(getTargetLowering());
869
22.0k
    N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
870
22.0k
    break;
871
48
  }
872
29.7k
  case ISD::AND:
873
29.7k
  case ISD::SRL:
874
29.7k
  case ISD::SRA:
875
29.7k
  case ISD::SIGN_EXTEND_INREG:
876
29.7k
    if (N->getValueType(0) != MVT::i32)
877
7.37k
      break;
878
22.3k
879
22.3k
    SelectS_BFE(N);
880
22.3k
    return;
881
22.3k
  case ISD::BRCOND:
882
692
    SelectBRCOND(N);
883
692
    return;
884
22.3k
  case ISD::FMAD:
885
2.78k
  case ISD::FMA:
886
2.78k
    SelectFMAD_FMA(N);
887
2.78k
    return;
888
2.78k
  case AMDGPUISD::ATOMIC_CMP_SWAP:
889
690
    SelectATOMIC_CMP_SWAP(N);
890
690
    return;
891
2.78k
  case AMDGPUISD::CVT_PKRTZ_F16_F32:
892
187
  case AMDGPUISD::CVT_PKNORM_I16_F32:
893
187
  case AMDGPUISD::CVT_PKNORM_U16_F32:
894
187
  case AMDGPUISD::CVT_PK_U16_U32:
895
187
  case AMDGPUISD::CVT_PK_I16_I32: {
896
187
    // Hack around using a legal type if f16 is illegal.
897
187
    if (N->getValueType(0) == MVT::i32) {
898
84
      MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? 
MVT::v2f1628
:
MVT::v2i1656
;
899
84
      N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
900
84
                              { N->getOperand(0), N->getOperand(1) });
901
84
      SelectCode(N);
902
84
      return;
903
84
    }
904
103
905
103
    break;
906
103
  }
907
103
  case ISD::INTRINSIC_W_CHAIN: {
908
85
    SelectINTRINSIC_W_CHAIN(N);
909
85
    return;
910
103
  }
911
571
  case ISD::INTRINSIC_VOID: {
912
571
    SelectINTRINSIC_VOID(N);
913
571
    return;
914
627k
  }
915
627k
  }
916
627k
917
627k
  SelectCode(N);
918
627k
}
919
920
441
bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
921
441
  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
922
441
  const Instruction *Term = BB->getTerminator();
923
441
  return Term->getMetadata("amdgpu.uniform") ||
924
441
         
Term->getMetadata("structurizecfg.uniform")1
;
925
441
}
926
927
27.4k
StringRef AMDGPUDAGToDAGISel::getPassName() const {
928
27.4k
  return "AMDGPU DAG->DAG Pattern Instruction Selection";
929
27.4k
}
930
931
//===----------------------------------------------------------------------===//
932
// Complex Patterns
933
//===----------------------------------------------------------------------===//
934
935
bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
936
0
                                            SDValue &Offset) {
937
0
  return false;
938
0
}
939
940
bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
941
0
                                            SDValue &Offset) {
942
0
  ConstantSDNode *C;
943
0
  SDLoc DL(Addr);
944
0
945
0
  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
946
0
    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
947
0
    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
948
0
  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
949
0
             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
950
0
    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
951
0
    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
952
0
  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
953
0
            (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
954
0
    Base = Addr.getOperand(0);
955
0
    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
956
0
  } else {
957
0
    Base = Addr;
958
0
    Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
959
0
  }
960
0
961
0
  return true;
962
0
}
963
964
// FIXME: Should only handle addcarry/subcarry
965
150
void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
966
150
  SDLoc DL(N);
967
150
  SDValue LHS = N->getOperand(0);
968
150
  SDValue RHS = N->getOperand(1);
969
150
970
150
  unsigned Opcode = N->getOpcode();
971
150
  bool ConsumeCarry = (Opcode == ISD::ADDE || 
Opcode == ISD::SUBE144
);
972
150
  bool ProduceCarry =
973
150
      ConsumeCarry || 
Opcode == ISD::ADDC144
||
Opcode == ISD::SUBC0
;
974
150
  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || 
Opcode == ISD::ADDE6
;
975
150
976
150
  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
977
150
  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
978
150
979
150
  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
980
150
                                       DL, MVT::i32, LHS, Sub0);
981
150
  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
982
150
                                       DL, MVT::i32, LHS, Sub1);
983
150
984
150
  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
985
150
                                       DL, MVT::i32, RHS, Sub0);
986
150
  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
987
150
                                       DL, MVT::i32, RHS, Sub1);
988
150
989
150
  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
990
150
991
150
  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : 
AMDGPU::S_SUB_U320
;
992
150
  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : 
AMDGPU::S_SUBB_U320
;
993
150
994
150
  SDNode *AddLo;
995
150
  if (!ConsumeCarry) {
996
144
    SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
997
144
    AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
998
144
  } else {
999
6
    SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
1000
6
    AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
1001
6
  }
1002
150
  SDValue AddHiArgs[] = {
1003
150
    SDValue(Hi0, 0),
1004
150
    SDValue(Hi1, 0),
1005
150
    SDValue(AddLo, 1)
1006
150
  };
1007
150
  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
1008
150
1009
150
  SDValue RegSequenceArgs[] = {
1010
150
    CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
1011
150
    SDValue(AddLo,0),
1012
150
    Sub0,
1013
150
    SDValue(AddHi,0),
1014
150
    Sub1,
1015
150
  };
1016
150
  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1017
150
                                               MVT::i64, RegSequenceArgs);
1018
150
1019
150
  if (ProduceCarry) {
1020
150
    // Replace the carry-use
1021
150
    ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
1022
150
  }
1023
150
1024
150
  // Replace the remaining uses.
1025
150
  ReplaceNode(N, RegSequence);
1026
150
}
1027
1028
389
void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
1029
389
  SDLoc DL(N);
1030
389
  SDValue LHS = N->getOperand(0);
1031
389
  SDValue RHS = N->getOperand(1);
1032
389
  SDValue CI = N->getOperand(2);
1033
389
1034
389
  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? 
AMDGPU::V_ADDC_U32_e64175
1035
389
                                                 : 
AMDGPU::V_SUBB_U32_e64214
;
1036
389
  CurDAG->SelectNodeTo(
1037
389
      N, Opc, N->getVTList(),
1038
389
      {LHS, RHS, CI, CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1039
389
}
1040
1041
259
void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
1042
259
  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
1043
259
  // carry out despite the _i32 name. These were renamed in VI to _U32.
1044
259
  // FIXME: We should probably rename the opcodes here.
1045
259
  unsigned Opc = N->getOpcode() == ISD::UADDO ?
1046
146
    
AMDGPU::V_ADD_I32_e64113
: AMDGPU::V_SUB_I32_e64;
1047
259
1048
259
  CurDAG->SelectNodeTo(
1049
259
      N, Opc, N->getVTList(),
1050
259
      {N->getOperand(0), N->getOperand(1),
1051
259
       CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1052
259
}
1053
1054
320
void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
1055
320
  SDLoc SL(N);
1056
320
  //  src0_modifiers, src0,  src1_modifiers, src1, src2_modifiers, src2, clamp, omod
1057
320
  SDValue Ops[10];
1058
320
1059
320
  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
1060
320
  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1061
320
  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
1062
320
  Ops[8] = N->getOperand(0);
1063
320
  Ops[9] = N->getOperand(4);
1064
320
1065
320
  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
1066
320
}
1067
1068
64
void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
1069
64
  SDLoc SL(N);
1070
64
  //    src0_modifiers, src0,  src1_modifiers, src1, clamp, omod
1071
64
  SDValue Ops[8];
1072
64
1073
64
  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
1074
64
  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1075
64
  Ops[6] = N->getOperand(0);
1076
64
  Ops[7] = N->getOperand(3);
1077
64
1078
64
  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
1079
64
}
1080
1081
// We need to handle this here because tablegen doesn't support matching
1082
// instructions with multiple outputs.
1083
319
void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
1084
319
  SDLoc SL(N);
1085
319
  EVT VT = N->getValueType(0);
1086
319
1087
319
  assert(VT == MVT::f32 || VT == MVT::f64);
1088
319
1089
319
  unsigned Opc
1090
319
    = (VT == MVT::f64) ? 
AMDGPU::V_DIV_SCALE_F64136
:
AMDGPU::V_DIV_SCALE_F32183
;
1091
319
1092
319
  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
1093
319
  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1094
319
}
1095
1096
172
void AMDGPUDAGToDAGISel::SelectDIV_FMAS(SDNode *N) {
1097
172
  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
1098
172
  const SIRegisterInfo *TRI = ST->getRegisterInfo();
1099
172
1100
172
  SDLoc SL(N);
1101
172
  EVT VT = N->getValueType(0);
1102
172
1103
172
  assert(VT == MVT::f32 || VT == MVT::f64);
1104
172
1105
172
  unsigned Opc
1106
172
    = (VT == MVT::f64) ? 
AMDGPU::V_DIV_FMAS_F6467
:
AMDGPU::V_DIV_FMAS_F32105
;
1107
172
1108
172
  SDValue CarryIn = N->getOperand(3);
1109
172
  // V_DIV_FMAS implicitly reads VCC.
1110
172
  SDValue VCC = CurDAG->getCopyToReg(CurDAG->getEntryNode(), SL,
1111
172
                                     TRI->getVCC(), CarryIn, SDValue());
1112
172
1113
172
  SDValue Ops[10];
1114
172
1115
172
  SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1116
172
  SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
1117
172
  SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
1118
172
1119
172
  Ops[8] = VCC;
1120
172
  Ops[9] = VCC.getValue(1);
1121
172
1122
172
  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1123
172
}
1124
1125
// We need to handle this here because tablegen doesn't support matching
1126
// instructions with multiple outputs.
1127
48
void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1128
48
  SDLoc SL(N);
1129
48
  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1130
48
  unsigned Opc = Signed ? 
AMDGPU::V_MAD_I64_I3212
:
AMDGPU::V_MAD_U64_U3236
;
1131
48
1132
48
  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1133
48
  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1134
48
                    Clamp };
1135
48
  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1136
48
}
1137
1138
bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
1139
8.51k
                                         unsigned OffsetBits) const {
1140
8.51k
  if ((OffsetBits == 16 && 
!isUInt<16>(Offset)8.02k
) ||
1141
8.51k
      
(8.09k
OffsetBits == 88.09k
&&
!isUInt<8>(Offset)483
))
1142
437
    return false;
1143
8.07k
1144
8.07k
  if (Subtarget->hasUsableDSOffset() ||
1145
8.07k
      
Subtarget->unsafeDSOffsetFoldingEnabled()2.03k
)
1146
6.04k
    return true;
1147
2.03k
1148
2.03k
  // On Southern Islands instruction with a negative base value and an offset
1149
2.03k
  // don't seem to work.
1150
2.03k
  return CurDAG->SignBitIsZero(Base);
1151
2.03k
}
1152
1153
bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1154
11.7k
                                              SDValue &Offset) const {
1155
11.7k
  SDLoc DL(Addr);
1156
11.7k
  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1157
7.97k
    SDValue N0 = Addr.getOperand(0);
1158
7.97k
    SDValue N1 = Addr.getOperand(1);
1159
7.97k
    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1160
7.97k
    if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
1161
7.52k
      // (add n0, c0)
1162
7.52k
      Base = N0;
1163
7.52k
      Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1164
7.52k
      return true;
1165
7.52k
    }
1166
3.80k
  } else if (Addr.getOpcode() == ISD::SUB) {
1167
16
    // sub C, x -> add (sub 0, x), C
1168
16
    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1169
16
      int64_t ByteOffset = C->getSExtValue();
1170
16
      if (isUInt<16>(ByteOffset)) {
1171
14
        SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1172
14
1173
14
        // XXX - This is kind of hacky. Create a dummy sub node so we can check
1174
14
        // the known bits in isDSOffsetLegal. We need to emit the selected node
1175
14
        // here, so this is thrown away.
1176
14
        SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1177
14
                                      Zero, Addr.getOperand(1));
1178
14
1179
14
        if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
1180
12
          SmallVector<SDValue, 3> Opnds;
1181
12
          Opnds.push_back(Zero);
1182
12
          Opnds.push_back(Addr.getOperand(1));
1183
12
1184
12
          // FIXME: Select to VOP3 version for with-carry.
1185
12
          unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1186
12
          if (Subtarget->hasAddNoCarry()) {
1187
5
            SubOp = AMDGPU::V_SUB_U32_e64;
1188
5
            Opnds.push_back(
1189
5
                CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1190
5
          }
1191
12
1192
12
          MachineSDNode *MachineSub =
1193
12
              CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1194
12
1195
12
          Base = SDValue(MachineSub, 0);
1196
12
          Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1197
12
          return true;
1198
12
        }
1199
3.78k
      }
1200
16
    }
1201
3.78k
  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1202
720
    // If we have a constant address, prefer to put the constant into the
1203
720
    // offset. This can save moves to load the constant address since multiple
1204
720
    // operations can share the zero base address register, and enables merging
1205
720
    // into read2 / write2 instructions.
1206
720
1207
720
    SDLoc DL(Addr);
1208
720
1209
720
    if (isUInt<16>(CAddr->getZExtValue())) {
1210
716
      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1211
716
      MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1212
716
                                 DL, MVT::i32, Zero);
1213
716
      Base = SDValue(MovZero, 0);
1214
716
      Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1215
716
      return true;
1216
716
    }
1217
3.52k
  }
1218
3.52k
1219
3.52k
  // default case
1220
3.52k
  Base = Addr;
1221
3.52k
  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1222
3.52k
  return true;
1223
3.52k
}
1224
1225
// TODO: If offset is too big, put low 16-bit into offset.
1226
bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1227
                                                   SDValue &Offset0,
1228
595
                                                   SDValue &Offset1) const {
1229
595
  SDLoc DL(Addr);
1230
595
1231
595
  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1232
477
    SDValue N0 = Addr.getOperand(0);
1233
477
    SDValue N1 = Addr.getOperand(1);
1234
477
    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1235
477
    unsigned DWordOffset0 = C1->getZExtValue() / 4;
1236
477
    unsigned DWordOffset1 = DWordOffset0 + 1;
1237
477
    // (add n0, c0)
1238
477
    if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
1239
455
      Base = N0;
1240
455
      Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1241
455
      Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1242
455
      return true;
1243
455
    }
1244
118
  } else if (Addr.getOpcode() == ISD::SUB) {
1245
6
    // sub C, x -> add (sub 0, x), C
1246
6
    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1247
6
      unsigned DWordOffset0 = C->getZExtValue() / 4;
1248
6
      unsigned DWordOffset1 = DWordOffset0 + 1;
1249
6
1250
6
      if (isUInt<8>(DWordOffset0)) {
1251
6
        SDLoc DL(Addr);
1252
6
        SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1253
6
1254
6
        // XXX - This is kind of hacky. Create a dummy sub node so we can check
1255
6
        // the known bits in isDSOffsetLegal. We need to emit the selected node
1256
6
        // here, so this is thrown away.
1257
6
        SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1258
6
                                      Zero, Addr.getOperand(1));
1259
6
1260
6
        if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
1261
4
          SmallVector<SDValue, 3> Opnds;
1262
4
          Opnds.push_back(Zero);
1263
4
          Opnds.push_back(Addr.getOperand(1));
1264
4
          unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1265
4
          if (Subtarget->hasAddNoCarry()) {
1266
2
            SubOp = AMDGPU::V_SUB_U32_e64;
1267
2
            Opnds.push_back(
1268
2
                CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1269
2
          }
1270
4
1271
4
          MachineSDNode *MachineSub
1272
4
            = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1273
4
1274
4
          Base = SDValue(MachineSub, 0);
1275
4
          Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1276
4
          Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1277
4
          return true;
1278
4
        }
1279
112
      }
1280
6
    }
1281
112
  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1282
0
    unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
1283
0
    unsigned DWordOffset1 = DWordOffset0 + 1;
1284
0
    assert(4 * DWordOffset0 == CAddr->getZExtValue());
1285
0
1286
0
    if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
1287
0
      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1288
0
      MachineSDNode *MovZero
1289
0
        = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1290
0
                                 DL, MVT::i32, Zero);
1291
0
      Base = SDValue(MovZero, 0);
1292
0
      Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1293
0
      Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1294
0
      return true;
1295
0
    }
1296
136
  }
1297
136
1298
136
  // default case
1299
136
1300
136
  Base = Addr;
1301
136
  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1302
136
  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1303
136
  return true;
1304
136
}
1305
1306
bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
1307
                                     SDValue &VAddr, SDValue &SOffset,
1308
                                     SDValue &Offset, SDValue &Offen,
1309
                                     SDValue &Idxen, SDValue &Addr64,
1310
                                     SDValue &GLC, SDValue &SLC,
1311
50.6k
                                     SDValue &TFE, SDValue &DLC) const {
1312
50.6k
  // Subtarget prefers to use flat instruction
1313
50.6k
  if (Subtarget->useFlatForGlobal())
1314
17.8k
    return false;
1315
32.8k
1316
32.8k
  SDLoc DL(Addr);
1317
32.8k
1318
32.8k
  if (!GLC.getNode())
1319
32.8k
    GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1320
32.8k
  if (!SLC.getNode())
1321
32.5k
    SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1322
32.8k
  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1323
32.8k
  DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1324
32.8k
1325
32.8k
  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1326
32.8k
  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1327
32.8k
  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1328
32.8k
  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1329
32.8k
1330
32.8k
  ConstantSDNode *C1 = nullptr;
1331
32.8k
  SDValue N0 = Addr;
1332
32.8k
  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1333
9.18k
    C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1334
9.18k
    if (isUInt<32>(C1->getZExtValue()))
1335
9.17k
      N0 = Addr.getOperand(0);
1336
10
    else
1337
10
      C1 = nullptr;
1338
9.18k
  }
1339
32.8k
1340
32.8k
  if (N0.getOpcode() == ISD::ADD) {
1341
4.79k
    // (add N2, N3) -> addr64, or
1342
4.79k
    // (add (add N2, N3), C1) -> addr64
1343
4.79k
    SDValue N2 = N0.getOperand(0);
1344
4.79k
    SDValue N3 = N0.getOperand(1);
1345
4.79k
    Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1346
4.79k
1347
4.79k
    if (N2->isDivergent()) {
1348
152
      if (N3->isDivergent()) {
1349
7
        // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1350
7
        // addr64, and construct the resource from a 0 address.
1351
7
        Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1352
7
        VAddr = N0;
1353
145
      } else {
1354
145
        // N2 is divergent, N3 is not.
1355
145
        Ptr = N3;
1356
145
        VAddr = N2;
1357
145
      }
1358
4.64k
    } else {
1359
4.64k
      // N2 is not divergent.
1360
4.64k
      Ptr = N2;
1361
4.64k
      VAddr = N3;
1362
4.64k
    }
1363
4.79k
    Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1364
28.0k
  } else if (N0->isDivergent()) {
1365
73
    // N0 is divergent. Use it as the addr64, and construct the resource from a
1366
73
    // 0 address.
1367
73
    Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1368
73
    VAddr = N0;
1369
73
    Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1370
27.9k
  } else {
1371
27.9k
    // N0 -> offset, or
1372
27.9k
    // (N0 + C1) -> offset
1373
27.9k
    VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1374
27.9k
    Ptr = N0;
1375
27.9k
  }
1376
32.8k
1377
32.8k
  if (!C1) {
1378
23.6k
    // No offset.
1379
23.6k
    Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1380
23.6k
    return true;
1381
23.6k
  }
1382
9.17k
1383
9.17k
  if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1384
9.04k
    // Legal offset for instruction.
1385
9.04k
    Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1386
9.04k
    return true;
1387
9.04k
  }
1388
132
1389
132
  // Illegal offset, store it in soffset.
1390
132
  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1391
132
  SOffset =
1392
132
      SDValue(CurDAG->getMachineNode(
1393
132
                  AMDGPU::S_MOV_B32, DL, MVT::i32,
1394
132
                  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1395
132
              0);
1396
132
  return true;
1397
132
}
1398
1399
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1400
                                           SDValue &VAddr, SDValue &SOffset,
1401
                                           SDValue &Offset, SDValue &GLC,
1402
                                           SDValue &SLC, SDValue &TFE,
1403
35.3k
                                           SDValue &DLC) const {
1404
35.3k
  SDValue Ptr, Offen, Idxen, Addr64;
1405
35.3k
1406
35.3k
  // addr64 bit was removed for volcanic islands.
1407
35.3k
  if (!Subtarget->hasAddr64())
1408
18.4k
    return false;
1409
16.8k
1410
16.8k
  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1411
16.8k
              GLC, SLC, TFE, DLC))
1412
3.18k
    return false;
1413
13.6k
1414
13.6k
  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1415
13.6k
  if (C->getSExtValue()) {
1416
3.78k
    SDLoc DL(Addr);
1417
3.78k
1418
3.78k
    const SITargetLowering& Lowering =
1419
3.78k
      *static_cast<const SITargetLowering*>(getTargetLowering());
1420
3.78k
1421
3.78k
    SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1422
3.78k
    return true;
1423
3.78k
  }
1424
9.90k
1425
9.90k
  return false;
1426
9.90k
}
1427
1428
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1429
                                           SDValue &VAddr, SDValue &SOffset,
1430
                                           SDValue &Offset,
1431
635
                                           SDValue &SLC) const {
1432
635
  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1433
635
  SDValue GLC, TFE, DLC;
1434
635
1435
635
  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC);
1436
635
}
1437
1438
328
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1439
328
  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1440
328
  return PSV && 
PSV->isStack()221
;
1441
328
}
1442
1443
7.55k
std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1444
7.55k
  const MachineFunction &MF = CurDAG->getMachineFunction();
1445
7.55k
  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1446
7.55k
1447
7.55k
  if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1448
6.24k
    SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1449
6.24k
                                              FI->getValueType(0));
1450
6.24k
1451
6.24k
    // If we can resolve this to a frame index access, this will be relative to
1452
6.24k
    // either the stack or frame pointer SGPR.
1453
6.24k
    return std::make_pair(
1454
6.24k
        TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32));
1455
6.24k
  }
1456
1.30k
1457
1.30k
  // If we don't know this private access is a local stack object, it needs to
1458
1.30k
  // be relative to the entry point's scratch wave offset register.
1459
1.30k
  return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1460
1.30k
                                               MVT::i32));
1461
1.30k
}
1462
1463
bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1464
                                                 SDValue Addr, SDValue &Rsrc,
1465
                                                 SDValue &VAddr, SDValue &SOffset,
1466
7.55k
                                                 SDValue &ImmOffset) const {
1467
7.55k
1468
7.55k
  SDLoc DL(Addr);
1469
7.55k
  MachineFunction &MF = CurDAG->getMachineFunction();
1470
7.55k
  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1471
7.55k
1472
7.55k
  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1473
7.55k
1474
7.55k
  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1475
6
    unsigned Imm = CAddr->getZExtValue();
1476
6
1477
6
    SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1478
6
    MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1479
6
                                                        DL, MVT::i32, HighBits);
1480
6
    VAddr = SDValue(MovHighBits, 0);
1481
6
1482
6
    // In a call sequence, stores to the argument stack area are relative to the
1483
6
    // stack pointer.
1484
6
    const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1485
6
    unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1486
6
      
Info->getStackPtrOffsetReg()0
: Info->getScratchWaveOffsetReg();
1487
6
1488
6
    SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1489
6
    ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1490
6
    return true;
1491
6
  }
1492
7.55k
1493
7.55k
  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1494
6.24k
    // (add n0, c1)
1495
6.24k
1496
6.24k
    SDValue N0 = Addr.getOperand(0);
1497
6.24k
    SDValue N1 = Addr.getOperand(1);
1498
6.24k
1499
6.24k
    // Offsets in vaddr must be positive if range checking is enabled.
1500
6.24k
    //
1501
6.24k
    // The total computation of vaddr + soffset + offset must not overflow.  If
1502
6.24k
    // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1503
6.24k
    // overflowing.
1504
6.24k
    //
1505
6.24k
    // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1506
6.24k
    // always perform a range check. If a negative vaddr base index was used,
1507
6.24k
    // this would fail the range check. The overall address computation would
1508
6.24k
    // compute a valid address, but this doesn't happen due to the range
1509
6.24k
    // check. For out-of-bounds MUBUF loads, a 0 is returned.
1510
6.24k
    //
1511
6.24k
    // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1512
6.24k
    // MUBUF vaddr, but not on older subtargets which can only do this if the
1513
6.24k
    // sign bit is known 0.
1514
6.24k
    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1515
6.24k
    if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
1516
6.24k
        
(6.22k
!Subtarget->privateMemoryResourceIsRangeChecked()6.22k
||
1517
6.22k
         
CurDAG->SignBitIsZero(N0)3.60k
)) {
1518
5.62k
      std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1519
5.62k
      ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1520
5.62k
      return true;
1521
5.62k
    }
1522
1.92k
  }
1523
1.92k
1524
1.92k
  // (node)
1525
1.92k
  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1526
1.92k
  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1527
1.92k
  return true;
1528
1.92k
}
1529
1530
bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1531
                                                  SDValue Addr,
1532
                                                  SDValue &SRsrc,
1533
                                                  SDValue &SOffset,
1534
7.88k
                                                  SDValue &Offset) const {
1535
7.88k
  ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1536
7.88k
  if (!CAddr || 
!SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())328
)
1537
7.55k
    return false;
1538
322
1539
322
  SDLoc DL(Addr);
1540
322
  MachineFunction &MF = CurDAG->getMachineFunction();
1541
322
  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1542
322
1543
322
  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1544
322
1545
322
  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1546
322
  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1547
221
    Info->getStackPtrOffsetReg() : 
Info->getScratchWaveOffsetReg()101
;
1548
322
1549
322
  // FIXME: Get from MachinePointerInfo? We should only be using the frame
1550
322
  // offset if we know this is in a call sequence.
1551
322
  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1552
322
1553
322
  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1554
322
  return true;
1555
322
}
1556
1557
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1558
                                           SDValue &SOffset, SDValue &Offset,
1559
                                           SDValue &GLC, SDValue &SLC,
1560
33.8k
                                           SDValue &TFE, SDValue &DLC) const {
1561
33.8k
  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1562
33.8k
  const SIInstrInfo *TII =
1563
33.8k
    static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1564
33.8k
1565
33.8k
  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1566
33.8k
              GLC, SLC, TFE, DLC))
1567
14.6k
    return false;
1568
19.1k
1569
19.1k
  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1570
19.1k
      !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1571
19.1k
      !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1572
18.0k
    uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1573
18.0k
                    APInt::getAllOnesValue(32).getZExtValue(); // Size
1574
18.0k
    SDLoc DL(Addr);
1575
18.0k
1576
18.0k
    const SITargetLowering& Lowering =
1577
18.0k
      *static_cast<const SITargetLowering*>(getTargetLowering());
1578
18.0k
1579
18.0k
    SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1580
18.0k
    return true;
1581
18.0k
  }
1582
1.08k
  return false;
1583
1.08k
}
1584
1585
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1586
                                           SDValue &Soffset, SDValue &Offset
1587
8
                                           ) const {
1588
8
  SDValue GLC, SLC, TFE, DLC;
1589
8
1590
8
  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
1591
8
}
1592
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1593
                                           SDValue &Soffset, SDValue &Offset,
1594
566
                                           SDValue &SLC) const {
1595
566
  SDValue GLC, TFE, DLC;
1596
566
1597
566
  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
1598
566
}
1599
1600
template <bool IsSigned>
1601
bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
1602
                                          SDValue Addr,
1603
                                          SDValue &VAddr,
1604
                                          SDValue &Offset,
1605
19.1k
                                          SDValue &SLC) const {
1606
19.1k
  return static_cast<const SITargetLowering*>(getTargetLowering())->
1607
19.1k
    SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC);
1608
19.1k
}
AMDGPUISelDAGToDAG.cpp:bool (anonymous namespace)::AMDGPUDAGToDAGISel::SelectFlatOffset<true>(llvm::SDNode*, llvm::SDValue, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&) const
Line
Count
Source
1605
6.18k
                                          SDValue &SLC) const {
1606
6.18k
  return static_cast<const SITargetLowering*>(getTargetLowering())->
1607
6.18k
    SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC);
1608
6.18k
}
AMDGPUISelDAGToDAG.cpp:bool (anonymous namespace)::AMDGPUDAGToDAGISel::SelectFlatOffset<false>(llvm::SDNode*, llvm::SDValue, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&) const
Line
Count
Source
1605
12.9k
                                          SDValue &SLC) const {
1606
12.9k
  return static_cast<const SITargetLowering*>(getTargetLowering())->
1607
12.9k
    SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC);
1608
12.9k
}
1609
1610
bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
1611
                                          SDValue Addr,
1612
                                          SDValue &VAddr,
1613
                                          SDValue &Offset,
1614
2.06k
                                          SDValue &SLC) const {
1615
2.06k
  return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC);
1616
2.06k
}
1617
1618
bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
1619
                                          SDValue Addr,
1620
                                          SDValue &VAddr,
1621
                                          SDValue &Offset,
1622
215
                                          SDValue &SLC) const {
1623
215
  return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC);
1624
215
}
1625
1626
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1627
26.8k
                                          SDValue &Offset, bool &Imm) const {
1628
26.8k
1629
26.8k
  // FIXME: Handle non-constant offsets.
1630
26.8k
  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1631
26.8k
  if (!C)
1632
71
    return false;
1633
26.7k
1634
26.7k
  SDLoc SL(ByteOffsetNode);
1635
26.7k
  GCNSubtarget::Generation Gen = Subtarget->getGeneration();
1636
26.7k
  int64_t ByteOffset = C->getSExtValue();
1637
26.7k
  int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1638
26.7k
1639
26.7k
  if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1640
26.6k
    Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1641
26.6k
    Imm = true;
1642
26.6k
    return true;
1643
26.6k
  }
1644
93
1645
93
  if (!isUInt<32>(EncodedOffset) || 
!isUInt<32>(ByteOffset)82
)
1646
13
    return false;
1647
80
1648
80
  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && 
isUInt<32>(EncodedOffset)30
) {
1649
30
    // 32-bit Immediates are supported on Sea Islands.
1650
30
    Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1651
50
  } else {
1652
50
    SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1653
50
    Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1654
50
                                            C32Bit), 0);
1655
50
  }
1656
80
  Imm = false;
1657
80
  return true;
1658
80
}
1659
1660
32.3k
SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1661
32.3k
  if (Addr.getValueType() != MVT::i32)
1662
32.1k
    return Addr;
1663
122
1664
122
  // Zero-extend a 32-bit address.
1665
122
  SDLoc SL(Addr);
1666
122
1667
122
  const MachineFunction &MF = CurDAG->getMachineFunction();
1668
122
  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1669
122
  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1670
122
  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1671
122
1672
122
  const SDValue Ops[] = {
1673
122
    CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1674
122
    Addr,
1675
122
    CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1676
122
    SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1677
122
            0),
1678
122
    CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1679
122
  };
1680
122
1681
122
  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1682
122
                                        Ops), 0);
1683
122
}
1684
1685
bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1686
32.3k
                                     SDValue &Offset, bool &Imm) const {
1687
32.3k
  SDLoc SL(Addr);
1688
32.3k
1689
32.3k
  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1690
32.3k
  // wraparound, because s_load instructions perform the addition in 64 bits.
1691
32.3k
  if ((Addr.getValueType() != MVT::i32 ||
1692
32.3k
       
Addr->getFlags().hasNoUnsignedWrap()122
) &&
1693
32.3k
      
CurDAG->isBaseWithConstantOffset(Addr)32.2k
) {
1694
26.2k
    SDValue N0 = Addr.getOperand(0);
1695
26.2k
    SDValue N1 = Addr.getOperand(1);
1696
26.2k
1697
26.2k
    if (SelectSMRDOffset(N1, Offset, Imm)) {
1698
26.2k
      SBase = Expand32BitAddress(N0);
1699
26.2k
      return true;
1700
26.2k
    }
1701
6.04k
  }
1702
6.04k
  SBase = Expand32BitAddress(Addr);
1703
6.04k
  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1704
6.04k
  Imm = true;
1705
6.04k
  return true;
1706
6.04k
}
1707
1708
bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1709
32.2k
                                       SDValue &Offset) const {
1710
32.2k
  bool Imm;
1711
32.2k
  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1712
32.2k
}
1713
1714
bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1715
9
                                         SDValue &Offset) const {
1716
9
1717
9
  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1718
0
    return false;
1719
9
1720
9
  bool Imm;
1721
9
  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1722
0
    return false;
1723
9
1724
9
  return !Imm && isa<ConstantSDNode>(Offset);
1725
9
}
1726
1727
bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1728
19
                                        SDValue &Offset) const {
1729
19
  bool Imm;
1730
19
  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1731
19
         !isa<ConstantSDNode>(Offset);
1732
19
}
1733
1734
bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1735
532
                                             SDValue &Offset) const {
1736
532
  bool Imm;
1737
532
  return SelectSMRDOffset(Addr, Offset, Imm) && 
Imm472
;
1738
532
}
1739
1740
bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1741
17
                                               SDValue &Offset) const {
1742
17
  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1743
0
    return false;
1744
17
1745
17
  bool Imm;
1746
17
  if (!SelectSMRDOffset(Addr, Offset, Imm))
1747
11
    return false;
1748
6
1749
6
  return !Imm && isa<ConstantSDNode>(Offset);
1750
6
}
1751
1752
bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1753
                                            SDValue &Base,
1754
76.1k
                                            SDValue &Offset) const {
1755
76.1k
  SDLoc DL(Index);
1756
76.1k
1757
76.1k
  if (CurDAG->isBaseWithConstantOffset(Index)) {
1758
81
    SDValue N0 = Index.getOperand(0);
1759
81
    SDValue N1 = Index.getOperand(1);
1760
81
    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1761
81
1762
81
    // (add n0, c0)
1763
81
    // Don't peel off the offset (c0) if doing so could possibly lead
1764
81
    // the base (n0) to be negative.
1765
81
    if (C1->getSExtValue() <= 0 || 
CurDAG->SignBitIsZero(N0)45
) {
1766
41
      Base = N0;
1767
41
      Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1768
41
      return true;
1769
41
    }
1770
76.0k
  }
1771
76.0k
1772
76.0k
  if (isa<ConstantSDNode>(Index))
1773
76.0k
    return false;
1774
73
1775
73
  Base = Index;
1776
73
  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1777
73
  return true;
1778
73
}
1779
1780
SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1781
                                     SDValue Val, uint32_t Offset,
1782
5.09k
                                     uint32_t Width) {
1783
5.09k
  // Transformation function, pack the offset and width of a BFE into
1784
5.09k
  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1785
5.09k
  // source, bits [5:0] contain the offset and bits [22:16] the width.
1786
5.09k
  uint32_t PackedVal = Offset | (Width << 16);
1787
5.09k
  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1788
5.09k
1789
5.09k
  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1790
5.09k
}
1791
1792
492
void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1793
492
  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1794
492
  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1795
492
  // Predicate: 0 < b <= c < 32
1796
492
1797
492
  const SDValue &Shl = N->getOperand(0);
1798
492
  ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1799
492
  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1800
492
1801
492
  if (B && 
C472
) {
1802
472
    uint32_t BVal = B->getZExtValue();
1803
472
    uint32_t CVal = C->getZExtValue();
1804
472
1805
472
    if (0 < BVal && BVal <= CVal && 
CVal < 32461
) {
1806
461
      bool Signed = N->getOpcode() == ISD::SRA;
1807
461
      unsigned Opcode = Signed ? 
AMDGPU::S_BFE_I32459
:
AMDGPU::S_BFE_U322
;
1808
461
1809
461
      ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1810
461
                              32 - CVal));
1811
461
      return;
1812
461
    }
1813
31
  }
1814
31
  SelectCode(N);
1815
31
}
1816
1817
22.3k
void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1818
22.3k
  switch (N->getOpcode()) {
1819
22.3k
  case ISD::AND:
1820
8.30k
    if (N->getOperand(0).getOpcode() == ISD::SRL) {
1821
2.91k
      // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1822
2.91k
      // Predicate: isMask(mask)
1823
2.91k
      const SDValue &Srl = N->getOperand(0);
1824
2.91k
      ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1825
2.91k
      ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1826
2.91k
1827
2.91k
      if (Shift && 
Mask2.90k
) {
1828
2.90k
        uint32_t ShiftVal = Shift->getZExtValue();
1829
2.90k
        uint32_t MaskVal = Mask->getZExtValue();
1830
2.90k
1831
2.90k
        if (isMask_32(MaskVal)) {
1832
2.72k
          uint32_t WidthVal = countPopulation(MaskVal);
1833
2.72k
1834
2.72k
          ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1835
2.72k
                                  Srl.getOperand(0), ShiftVal, WidthVal));
1836
2.72k
          return;
1837
2.72k
        }
1838
5.57k
      }
1839
2.91k
    }
1840
5.57k
    break;
1841
6.76k
  case ISD::SRL:
1842
6.76k
    if (N->getOperand(0).getOpcode() == ISD::AND) {
1843
374
      // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1844
374
      // Predicate: isMask(mask >> b)
1845
374
      const SDValue &And = N->getOperand(0);
1846
374
      ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1847
374
      ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1848
374
1849
374
      if (Shift && 
Mask373
) {
1850
373
        uint32_t ShiftVal = Shift->getZExtValue();
1851
373
        uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1852
373
1853
373
        if (isMask_32(MaskVal)) {
1854
373
          uint32_t WidthVal = countPopulation(MaskVal);
1855
373
1856
373
          ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1857
373
                                  And.getOperand(0), ShiftVal, WidthVal));
1858
373
          return;
1859
373
        }
1860
6.39k
      }
1861
6.39k
    } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1862
16
      SelectS_BFEFromShifts(N);
1863
16
      return;
1864
16
    }
1865
6.38k
    break;
1866
6.38k
  case ISD::SRA:
1867
2.61k
    if (N->getOperand(0).getOpcode() == ISD::SHL) {
1868
476
      SelectS_BFEFromShifts(N);
1869
476
      return;
1870
476
    }
1871
2.14k
    break;
1872
2.14k
1873
4.70k
  case ISD::SIGN_EXTEND_INREG: {
1874
4.70k
    // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1875
4.70k
    SDValue Src = N->getOperand(0);
1876
4.70k
    if (Src.getOpcode() != ISD::SRL)
1877
3.29k
      break;
1878
1.40k
1879
1.40k
    const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1880
1.40k
    if (!Amt)
1881
0
      break;
1882
1.40k
1883
1.40k
    unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1884
1.40k
    ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1885
1.40k
                            Amt->getZExtValue(), Width));
1886
1.40k
    return;
1887
1.40k
  }
1888
17.3k
  }
1889
17.3k
1890
17.3k
  SelectCode(N);
1891
17.3k
}
1892
1893
603
bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1894
603
  assert(N->getOpcode() == ISD::BRCOND);
1895
603
  if (!N->hasOneUse())
1896
0
    return false;
1897
603
1898
603
  SDValue Cond = N->getOperand(1);
1899
603
  if (Cond.getOpcode() == ISD::CopyToReg)
1900
0
    Cond = Cond.getOperand(2);
1901
603
1902
603
  if (Cond.getOpcode() != ISD::SETCC || 
!Cond.hasOneUse()545
)
1903
64
    return false;
1904
539
1905
539
  MVT VT = Cond.getOperand(0).getSimpleValueType();
1906
539
  if (VT == MVT::i32)
1907
427
    return true;
1908
112
1909
112
  if (VT == MVT::i64) {
1910
31
    auto ST = static_cast<const GCNSubtarget *>(Subtarget);
1911
31
1912
31
    ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1913
31
    return (CC == ISD::SETEQ || 
CC == ISD::SETNE22
) &&
ST->hasScalarCompareEq64()29
;
1914
31
  }
1915
81
1916
81
  return false;
1917
81
}
1918
1919
692
void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1920
692
  SDValue Cond = N->getOperand(1);
1921
692
1922
692
  if (Cond.isUndef()) {
1923
89
    CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1924
89
                         N->getOperand(2), N->getOperand(0));
1925
89
    return;
1926
89
  }
1927
603
1928
603
  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
1929
603
  const SIRegisterInfo *TRI = ST->getRegisterInfo();
1930
603
1931
603
  bool UseSCCBr = isCBranchSCC(N) && 
isUniformBr(N)441
;
1932
603
  unsigned BrOp = UseSCCBr ? 
AMDGPU::S_CBRANCH_SCC1440
:
AMDGPU::S_CBRANCH_VCCNZ163
;
1933
603
  unsigned CondReg = UseSCCBr ? 
(unsigned)AMDGPU::SCC440
:
TRI->getVCC()163
;
1934
603
  SDLoc SL(N);
1935
603
1936
603
  if (!UseSCCBr) {
1937
163
    // This is the case that we are selecting to S_CBRANCH_VCCNZ.  We have not
1938
163
    // analyzed what generates the vcc value, so we do not know whether vcc
1939
163
    // bits for disabled lanes are 0.  Thus we need to mask out bits for
1940
163
    // disabled lanes.
1941
163
    //
1942
163
    // For the case that we select S_CBRANCH_SCC1 and it gets
1943
163
    // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
1944
163
    // SIInstrInfo::moveToVALU which inserts the S_AND).
1945
163
    //
1946
163
    // We could add an analysis of what generates the vcc value here and omit
1947
163
    // the S_AND when is unnecessary. But it would be better to add a separate
1948
163
    // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
1949
163
    // catches both cases.
1950
163
    Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? 
AMDGPU::S_AND_B3220
1951
163
                                                         : 
AMDGPU::S_AND_B64143
,
1952
163
                     SL, MVT::i1,
1953
163
                     CurDAG->getRegister(ST->isWave32() ? 
AMDGPU::EXEC_LO20
1954
163
                                                        : 
AMDGPU::EXEC143
,
1955
163
                                         MVT::i1),
1956
163
                    Cond),
1957
163
                   0);
1958
163
  }
1959
603
1960
603
  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
1961
603
  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
1962
603
                       N->getOperand(2), // Basic Block
1963
603
                       VCC.getValue(0));
1964
603
}
1965
1966
2.78k
void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
1967
2.78k
  MVT VT = N->getSimpleValueType(0);
1968
2.78k
  bool IsFMA = N->getOpcode() == ISD::FMA;
1969
2.78k
  if (VT != MVT::f32 || 
(2.11k
!Subtarget->hasMadMixInsts()2.11k
&&
1970
2.11k
                         
!Subtarget->hasFmaMixInsts()1.92k
) ||
1971
2.78k
      
(348
(348
IsFMA348
&&
Subtarget->hasMadMixInsts()179
) ||
1972
2.56k
       
(277
!IsFMA277
&&
Subtarget->hasFmaMixInsts()169
))) {
1973
2.56k
    SelectCode(N);
1974
2.56k
    return;
1975
2.56k
  }
1976
224
1977
224
  SDValue Src0 = N->getOperand(0);
1978
224
  SDValue Src1 = N->getOperand(1);
1979
224
  SDValue Src2 = N->getOperand(2);
1980
224
  unsigned Src0Mods, Src1Mods, Src2Mods;
1981
224
1982
224
  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
1983
224
  // using the conversion from f16.
1984
224
  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
1985
224
  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
1986
224
  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
1987
224
1988
224
  assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
1989
224
         "fmad selected with denormals enabled");
1990
224
  // TODO: We can select this with f32 denormals enabled if all the sources are
1991
224
  // converted from f16 (in which case fmad isn't legal).
1992
224
1993
224
  if (Sel0 || 
Sel194
||
Sel294
) {
1994
130
    // For dummy operands.
1995
130
    SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1996
130
    SDValue Ops[] = {
1997
130
      CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
1998
130
      CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
1999
130
      CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2000
130
      CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
2001
130
      Zero, Zero
2002
130
    };
2003
130
2004
130
    CurDAG->SelectNodeTo(N,
2005
130
                         IsFMA ? 
AMDGPU::V_FMA_MIX_F3265
:
AMDGPU::V_MAD_MIX_F3265
,
2006
130
                         MVT::f32, Ops);
2007
130
  } else {
2008
94
    SelectCode(N);
2009
94
  }
2010
224
}
2011
2012
// This is here because there isn't a way to use the generated sub0_sub1 as the
2013
// subreg index to EXTRACT_SUBREG in tablegen.
2014
690
void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
2015
690
  MemSDNode *Mem = cast<MemSDNode>(N);
2016
690
  unsigned AS = Mem->getAddressSpace();
2017
690
  if (AS == AMDGPUAS::FLAT_ADDRESS) {
2018
638
    SelectCode(N);
2019
638
    return;
2020
638
  }
2021
52
2022
52
  MVT VT = N->getSimpleValueType(0);
2023
52
  bool Is32 = (VT == MVT::i32);
2024
52
  SDLoc SL(N);
2025
52
2026
52
  MachineSDNode *CmpSwap = nullptr;
2027
52
  if (Subtarget->hasAddr64()) {
2028
17
    SDValue SRsrc, VAddr, SOffset, Offset, SLC;
2029
17
2030
17
    if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
2031
8
      unsigned Opcode = Is32 ? 
AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN4
:
2032
8
        
AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN4
;
2033
8
      SDValue CmpVal = Mem->getOperand(2);
2034
8
2035
8
      // XXX - Do we care about glue operands?
2036
8
2037
8
      SDValue Ops[] = {
2038
8
        CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
2039
8
      };
2040
8
2041
8
      CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2042
8
    }
2043
17
  }
2044
52
2045
52
  if (!CmpSwap) {
2046
44
    SDValue SRsrc, SOffset, Offset, SLC;
2047
44
    if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
2048
18
      unsigned Opcode = Is32 ? 
AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN8
:
2049
18
        
AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN10
;
2050
18
2051
18
      SDValue CmpVal = Mem->getOperand(2);
2052
18
      SDValue Ops[] = {
2053
18
        CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
2054
18
      };
2055
18
2056
18
      CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2057
18
    }
2058
44
  }
2059
52
2060
52
  if (!CmpSwap) {
2061
26
    SelectCode(N);
2062
26
    return;
2063
26
  }
2064
26
2065
26
  MachineMemOperand *MMO = Mem->getMemOperand();
2066
26
  CurDAG->setNodeMemRefs(CmpSwap, {MMO});
2067
26
2068
26
  unsigned SubReg = Is32 ? 
AMDGPU::sub012
:
AMDGPU::sub0_sub114
;
2069
26
  SDValue Extract
2070
26
    = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
2071
26
2072
26
  ReplaceUses(SDValue(N, 0), Extract);
2073
26
  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
2074
26
  CurDAG->RemoveDeadNode(N);
2075
26
}
2076
2077
72
void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2078
72
  // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2079
72
  // be copied to an SGPR with readfirstlane.
2080
72
  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2081
36
    AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2082
72
2083
72
  SDValue Chain = N->getOperand(0);
2084
72
  SDValue Ptr = N->getOperand(2);
2085
72
  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2086
72
  MachineMemOperand *MMO = M->getMemOperand();
2087
72
  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2088
72
2089
72
  SDValue Offset;
2090
72
  if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2091
40
    SDValue PtrBase = Ptr.getOperand(0);
2092
40
    SDValue PtrOffset = Ptr.getOperand(1);
2093
40
2094
40
    const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2095
40
    if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
2096
22
      N = glueCopyToM0(N, PtrBase);
2097
22
      Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2098
22
    }
2099
40
  }
2100
72
2101
72
  if (!Offset) {
2102
50
    N = glueCopyToM0(N, Ptr);
2103
50
    Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2104
50
  }
2105
72
2106
72
  SDValue Ops[] = {
2107
72
    Offset,
2108
72
    CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2109
72
    Chain,
2110
72
    N->getOperand(N->getNumOperands() - 1) // New glue
2111
72
  };
2112
72
2113
72
  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2114
72
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2115
72
}
2116
2117
179
static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2118
179
  switch (IntrID) {
2119
179
  case Intrinsic::amdgcn_ds_gws_init:
2120
60
    return AMDGPU::DS_GWS_INIT;
2121
179
  case Intrinsic::amdgcn_ds_gws_barrier:
2122
99
    return AMDGPU::DS_GWS_BARRIER;
2123
179
  case Intrinsic::amdgcn_ds_gws_sema_v:
2124
5
    return AMDGPU::DS_GWS_SEMA_V;
2125
179
  case Intrinsic::amdgcn_ds_gws_sema_br:
2126
5
    return AMDGPU::DS_GWS_SEMA_BR;
2127
179
  case Intrinsic::amdgcn_ds_gws_sema_p:
2128
5
    return AMDGPU::DS_GWS_SEMA_P;
2129
179
  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2130
5
    return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2131
179
  default:
2132
0
    llvm_unreachable("not a gws intrinsic");
2133
179
  }
2134
179
}
2135
2136
180
void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2137
180
  if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2138
180
      
!Subtarget->hasGWSSemaReleaseAll()6
) {
2139
1
    // Let this error.
2140
1
    SelectCode(N);
2141
1
    return;
2142
1
  }
2143
179
2144
179
  // Chain, intrinsic ID, vsrc, offset
2145
179
  const bool HasVSrc = N->getNumOperands() == 4;
2146
179
  assert(HasVSrc || N->getNumOperands() == 3);
2147
179
2148
179
  SDLoc SL(N);
2149
179
  SDValue BaseOffset = N->getOperand(HasVSrc ? 
3164
:
215
);
2150
179
  int ImmOffset = 0;
2151
179
  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2152
179
  MachineMemOperand *MMO = M->getMemOperand();
2153
179
2154
179
  // Don't worry if the offset ends up in a VGPR. Only one lane will have
2155
179
  // effect, so SIFixSGPRCopies will validly insert readfirstlane.
2156
179
2157
179
  // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
2158
179
  // offset field) % 64. Some versions of the programming guide omit the m0
2159
179
  // part, or claim it's from offset 0.
2160
179
  if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2161
128
    // If we have a constant offset, try to use the 0 in m0 as the base.
2162
128
    // TODO: Look into changing the default m0 initialization value. If the
2163
128
    // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
2164
128
    // the immediate offset.
2165
128
    glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
2166
128
    ImmOffset = ConstOffset->getZExtValue();
2167
128
  } else {
2168
51
    if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2169
24
      ImmOffset = BaseOffset.getConstantOperandVal(1);
2170
24
      BaseOffset = BaseOffset.getOperand(0);
2171
24
    }
2172
51
2173
51
    // Prefer to do the shift in an SGPR since it should be possible to use m0
2174
51
    // as the result directly. If it's already an SGPR, it will be eliminated
2175
51
    // later.
2176
51
    SDNode *SGPROffset
2177
51
      = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2178
51
                               BaseOffset);
2179
51
    // Shift to offset in m0
2180
51
    SDNode *M0Base
2181
51
      = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2182
51
                               SDValue(SGPROffset, 0),
2183
51
                               CurDAG->getTargetConstant(16, SL, MVT::i32));
2184
51
    glueCopyToM0(N, SDValue(M0Base, 0));
2185
51
  }
2186
179
2187
179
  SDValue V0;
2188
179
  SDValue Chain = N->getOperand(0);
2189
179
  SDValue Glue;
2190
179
  if (HasVSrc) {
2191
164
    SDValue VSrc0 = N->getOperand(2);
2192
164
2193
164
    // The manual doesn't mention this, but it seems only v0 works.
2194
164
    V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32);
2195
164
2196
164
    SDValue CopyToV0 = CurDAG->getCopyToReg(
2197
164
      N->getOperand(0), SL, V0, VSrc0,
2198
164
      N->getOperand(N->getNumOperands() - 1));
2199
164
    Chain = CopyToV0;
2200
164
    Glue = CopyToV0.getValue(1);
2201
164
  }
2202
179
2203
179
  SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2204
179
2205
179
  // TODO: Can this just be removed from the instruction?
2206
179
  SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1);
2207
179
2208
179
  const unsigned Opc = gwsIntrinToOpcode(IntrID);
2209
179
  SmallVector<SDValue, 5> Ops;
2210
179
  if (HasVSrc)
2211
164
    Ops.push_back(V0);
2212
179
  Ops.push_back(OffsetField);
2213
179
  Ops.push_back(GDS);
2214
179
  Ops.push_back(Chain);
2215
179
2216
179
  if (HasVSrc)
2217
164
    Ops.push_back(Glue);
2218
179
2219
179
  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2220
179
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2221
179
}
2222
2223
85
void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2224
85
  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2225
85
  switch (IntrID) {
2226
85
  case Intrinsic::amdgcn_ds_append:
2227
72
  case Intrinsic::amdgcn_ds_consume: {
2228
72
    if (N->getValueType(0) != MVT::i32)
2229
0
      break;
2230
72
    SelectDSAppendConsume(N, IntrID);
2231
72
    return;
2232
72
  }
2233
13
  }
2234
13
2235
13
  SelectCode(N);
2236
13
}
2237
2238
571
void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2239
571
  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2240
571
  switch (IntrID) {
2241
571
  case Intrinsic::amdgcn_ds_gws_init:
2242
180
  case Intrinsic::amdgcn_ds_gws_barrier:
2243
180
  case Intrinsic::amdgcn_ds_gws_sema_v:
2244
180
  case Intrinsic::amdgcn_ds_gws_sema_br:
2245
180
  case Intrinsic::amdgcn_ds_gws_sema_p:
2246
180
  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2247
180
    SelectDS_GWS(N, IntrID);
2248
180
    return;
2249
391
  default:
2250
391
    break;
2251
391
  }
2252
391
2253
391
  SelectCode(N);
2254
391
}
2255
2256
bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2257
29.5k
                                            unsigned &Mods) const {
2258
29.5k
  Mods = 0;
2259
29.5k
  Src = In;
2260
29.5k
2261
29.5k
  if (Src.getOpcode() == ISD::FNEG) {
2262
1.79k
    Mods |= SISrcMods::NEG;
2263
1.79k
    Src = Src.getOperand(0);
2264
1.79k
  }
2265
29.5k
2266
29.5k
  if (Src.getOpcode() == ISD::FABS) {
2267
762
    Mods |= SISrcMods::ABS;
2268
762
    Src = Src.getOperand(0);
2269
762
  }
2270
29.5k
2271
29.5k
  return true;
2272
29.5k
}
2273
2274
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2275
28.3k
                                        SDValue &SrcMods) const {
2276
28.3k
  unsigned Mods;
2277
28.3k
  if (SelectVOP3ModsImpl(In, Src, Mods)) {
2278
28.3k
    SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2279
28.3k
    return true;
2280
28.3k
  }
2281
0
2282
0
  return false;
2283
0
}
2284
2285
bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2286
246
                                             SDValue &SrcMods) const {
2287
246
  SelectVOP3Mods(In, Src, SrcMods);
2288
246
  return isNoNanSrc(Src);
2289
246
}
2290
2291
bool AMDGPUDAGToDAGISel::SelectVOP3Mods_f32(SDValue In, SDValue &Src,
2292
9.51k
                                            SDValue &SrcMods) const {
2293
9.51k
  if (In.getValueType() == MVT::f32)
2294
1.08k
    return SelectVOP3Mods(In, Src, SrcMods);
2295
8.43k
  Src = In;
2296
8.43k
  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);;
2297
8.43k
  return true;
2298
8.43k
}
2299
2300
4.76k
bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2301
4.76k
  if (In.getOpcode() == ISD::FABS || 
In.getOpcode() == ISD::FNEG4.73k
)
2302
325
    return false;
2303
4.44k
2304
4.44k
  Src = In;
2305
4.44k
  return true;
2306
4.44k
}
2307
2308
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2309
                                         SDValue &SrcMods, SDValue &Clamp,
2310
12.6k
                                         SDValue &Omod) const {
2311
12.6k
  SDLoc DL(In);
2312
12.6k
  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2313
12.6k
  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2314
12.6k
2315
12.6k
  return SelectVOP3Mods(In, Src, SrcMods);
2316
12.6k
}
2317
2318
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
2319
                                                   SDValue &SrcMods,
2320
                                                   SDValue &Clamp,
2321
64
                                                   SDValue &Omod) const {
2322
64
  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2323
64
  return SelectVOP3Mods(In, Src, SrcMods);
2324
64
}
2325
2326
bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2327
602
                                         SDValue &Clamp, SDValue &Omod) const {
2328
602
  Src = In;
2329
602
2330
602
  SDLoc DL(In);
2331
602
  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2332
602
  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2333
602
2334
602
  return true;
2335
602
}
2336
2337
bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2338
1.44k
                                         SDValue &SrcMods) const {
2339
1.44k
  unsigned Mods = 0;
2340
1.44k
  Src = In;
2341
1.44k
2342
1.44k
  if (Src.getOpcode() == ISD::FNEG) {
2343
13
    Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2344
13
    Src = Src.getOperand(0);
2345
13
  }
2346
1.44k
2347
1.44k
  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2348
365
    unsigned VecMods = Mods;
2349
365
2350
365
    SDValue Lo = stripBitcast(Src.getOperand(0));
2351
365
    SDValue Hi = stripBitcast(Src.getOperand(1));
2352
365
2353
365
    if (Lo.getOpcode() == ISD::FNEG) {
2354
11
      Lo = stripBitcast(Lo.getOperand(0));
2355
11
      Mods ^= SISrcMods::NEG;
2356
11
    }
2357
365
2358
365
    if (Hi.getOpcode() == ISD::FNEG) {
2359
11
      Hi = stripBitcast(Hi.getOperand(0));
2360
11
      Mods ^= SISrcMods::NEG_HI;
2361
11
    }
2362
365
2363
365
    if (isExtractHiElt(Lo, Lo))
2364
15
      Mods |= SISrcMods::OP_SEL_0;
2365
365
2366
365
    if (isExtractHiElt(Hi, Hi))
2367
15
      Mods |= SISrcMods::OP_SEL_1;
2368
365
2369
365
    Lo = stripExtractLoElt(Lo);
2370
365
    Hi = stripExtractLoElt(Hi);
2371
365
2372
365
    if (Lo == Hi && 
!isInlineImmediate(Lo.getNode())196
) {
2373
34
      // Really a scalar input. Just select from the low half of the register to
2374
34
      // avoid packing.
2375
34
2376
34
      Src = Lo;
2377
34
      SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2378
34
      return true;
2379
34
    }
2380
331
2381
331
    Mods = VecMods;
2382
331
  }
2383
1.44k
2384
1.44k
  // Packed instructions do not have abs modifiers.
2385
1.44k
  Mods |= SISrcMods::OP_SEL_1;
2386
1.40k
2387
1.40k
  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2388
1.40k
  return true;
2389
1.44k
}
2390
2391
bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
2392
                                          SDValue &SrcMods,
2393
648
                                          SDValue &Clamp) const {
2394
648
  SDLoc SL(In);
2395
648
2396
648
  // FIXME: Handle clamp and op_sel
2397
648
  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2398
648
2399
648
  return SelectVOP3PMods(In, Src, SrcMods);
2400
648
}
2401
2402
bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2403
72
                                         SDValue &SrcMods) const {
2404
72
  Src = In;
2405
72
  // FIXME: Handle op_sel
2406
72
  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2407
72
  return true;
2408
72
}
2409
2410
bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
2411
                                          SDValue &SrcMods,
2412
24
                                          SDValue &Clamp) const {
2413
24
  SDLoc SL(In);
2414
24
2415
24
  // FIXME: Handle clamp
2416
24
  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2417
24
2418
24
  return SelectVOP3OpSel(In, Src, SrcMods);
2419
24
}
2420
2421
bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2422
102
                                             SDValue &SrcMods) const {
2423
102
  // FIXME: Handle op_sel
2424
102
  return SelectVOP3Mods(In, Src, SrcMods);
2425
102
}
2426
2427
bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
2428
                                              SDValue &SrcMods,
2429
34
                                              SDValue &Clamp) const {
2430
34
  SDLoc SL(In);
2431
34
2432
34
  // FIXME: Handle clamp
2433
34
  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2434
34
2435
34
  return SelectVOP3OpSelMods(In, Src, SrcMods);
2436
34
}
2437
2438
// The return value is not whether the match is possible (which it always is),
2439
// but whether or not it a conversion is really used.
2440
bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2441
774
                                                   unsigned &Mods) const {
2442
774
  Mods = 0;
2443
774
  SelectVOP3ModsImpl(In, Src, Mods);
2444
774
2445
774
  if (Src.getOpcode() == ISD::FP_EXTEND) {
2446
404
    Src = Src.getOperand(0);
2447
404
    assert(Src.getValueType() == MVT::f16);
2448
404
    Src = stripBitcast(Src);
2449
404
2450
404
    // Be careful about folding modifiers if we already have an abs. fneg is
2451
404
    // applied last, so we don't want to apply an earlier fneg.
2452
404
    if ((Mods & SISrcMods::ABS) == 0) {
2453
398
      unsigned ModsTmp;
2454
398
      SelectVOP3ModsImpl(Src, Src, ModsTmp);
2455
398
2456
398
      if ((ModsTmp & SISrcMods::NEG) != 0)
2457
5
        Mods ^= SISrcMods::NEG;
2458
398
2459
398
      if ((ModsTmp & SISrcMods::ABS) != 0)
2460
6
        Mods |= SISrcMods::ABS;
2461
398
    }
2462
404
2463
404
    // op_sel/op_sel_hi decide the source type and source.
2464
404
    // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2465
404
    // If the sources's op_sel is set, it picks the high half of the source
2466
404
    // register.
2467
404
2468
404
    Mods |= SISrcMods::OP_SEL_1;
2469
404
    if (isExtractHiElt(Src, Src)) {
2470
134
      Mods |= SISrcMods::OP_SEL_0;
2471
134
2472
134
      // TODO: Should we try to look for neg/abs here?
2473
134
    }
2474
404
2475
404
    return true;
2476
404
  }
2477
370
2478
370
  return false;
2479
370
}
2480
2481
bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2482
102
                                               SDValue &SrcMods) const {
2483
102
  unsigned Mods = 0;
2484
102
  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2485
102
  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2486
102
  return true;
2487
102
}
2488
2489
64
SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2490
64
  if (In.isUndef())
2491
2
    return CurDAG->getUNDEF(MVT::i32);
2492
62
2493
62
  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2494
3
    SDLoc SL(In);
2495
3
    return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2496
3
  }
2497
59
2498
59
  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2499
3
    SDLoc SL(In);
2500
3
    return CurDAG->getConstant(
2501
3
      C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2502
3
  }
2503
56
2504
56
  SDValue Src;
2505
56
  if (isExtractHiElt(In, Src))
2506
44
    return Src;
2507
12
2508
12
  return SDValue();
2509
12
}
2510
2511
28.5k
bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2512
28.5k
  assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
2513
28.5k
2514
28.5k
  const SIRegisterInfo *SIRI =
2515
28.5k
    static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2516
28.5k
  const SIInstrInfo * SII =
2517
28.5k
    static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2518
28.5k
2519
28.5k
  unsigned Limit = 0;
2520
28.5k
  bool AllUsesAcceptSReg = true;
2521
28.5k
  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2522
38.9k
    Limit < 10 && 
U != E38.9k
;
++U, ++Limit10.3k
) {
2523
31.9k
    const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2524
31.9k
2525
31.9k
    // If the register class is unknown, it could be an unknown
2526
31.9k
    // register class that needs to be an SGPR, e.g. an inline asm
2527
31.9k
    // constraint
2528
31.9k
    if (!RC || 
SIRI->isSGPRClass(RC)31.9k
)
2529
16.3k
      return false;
2530
15.5k
2531
15.5k
    if (RC != &AMDGPU::VS_32RegClass) {
2532
6.07k
      AllUsesAcceptSReg = false;
2533
6.07k
      SDNode * User = *U;
2534
6.07k
      if (User->isMachineOpcode()) {
2535
5.20k
        unsigned Opc = User->getMachineOpcode();
2536
5.20k
        MCInstrDesc Desc = SII->get(Opc);
2537
5.20k
        if (Desc.isCommutable()) {
2538
999
          unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2539
999
          unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2540
999
          if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2541
890
            unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2542
890
            const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2543
890
            if (CommutedRC == &AMDGPU::VS_32RegClass)
2544
890
              AllUsesAcceptSReg = true;
2545
890
          }
2546
999
        }
2547
5.20k
      }
2548
6.07k
      // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2549
6.07k
      // commuting current user. This means have at least one use
2550
6.07k
      // that strictly require VGPR. Thus, we will not attempt to commute
2551
6.07k
      // other user instructions.
2552
6.07k
      if (!AllUsesAcceptSReg)
2553
5.18k
        break;
2554
6.07k
    }
2555
15.5k
  }
2556
28.5k
  
return 12.2k
!AllUsesAcceptSReg12.2k
&&
(Limit < 10)5.18k
;
2557
28.5k
}
2558
2559
86.1k
bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2560
86.1k
  auto Ld = cast<LoadSDNode>(N);
2561
86.1k
2562
86.1k
  return Ld->getAlignment() >= 4 &&
2563
86.1k
        (
2564
84.6k
          (
2565
84.6k
            (
2566
84.6k
              Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS       ||
2567
84.6k
              
Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT30.0k
2568
84.6k
            )
2569
84.6k
            &&
2570
84.6k
            
!N->isDivergent()54.8k
2571
84.6k
          )
2572
84.6k
          ||
2573
84.6k
          (
2574
30.1k
            Subtarget->getScalarizeGlobalBehavior() &&
2575
30.1k
            
Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS21.8k
&&
2576
30.1k
            
!Ld->isVolatile()12.8k
&&
2577
30.1k
            
!N->isDivergent()8.58k
&&
2578
30.1k
            static_cast<const SITargetLowering *>(
2579
3.40k
              getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2580
30.1k
          )
2581
84.6k
        );
2582
86.1k
}
2583
2584
30.8k
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2585
30.8k
  const AMDGPUTargetLowering& Lowering =
2586
30.8k
    *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2587
30.8k
  bool IsModified = false;
2588
33.2k
  do {
2589
33.2k
    IsModified = false;
2590
33.2k
2591
33.2k
    // Go over all selected nodes and try to fold them a bit more
2592
33.2k
    SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
2593
1.27M
    while (Position != CurDAG->allnodes_end()) {
2594
1.24M
      SDNode *Node = &*Position++;
2595
1.24M
      MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2596
1.24M
      if (!MachineNode)
2597
588k
        continue;
2598
657k
2599
657k
      SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2600
657k
      if (ResNode != Node) {
2601
35.3k
        if (ResNode)
2602
35.2k
          ReplaceUses(Node, ResNode);
2603
35.3k
        IsModified = true;
2604
35.3k
      }
2605
657k
    }
2606
33.2k
    CurDAG->RemoveDeadNodes();
2607
33.2k
  } while (IsModified);
2608
30.8k
}
2609
2610
2.29k
bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
2611
2.29k
  Subtarget = &MF.getSubtarget<R600Subtarget>();
2612
2.29k
  return SelectionDAGISel::runOnMachineFunction(MF);
2613
2.29k
}
2614
2615
3.75k
bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
2616
3.75k
  if (!N->readMem())
2617
0
    return false;
2618
3.75k
  if (CbId == -1)
2619
0
    return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2620
0
           N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
2621
3.75k
2622
3.75k
  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
2623
3.75k
}
2624
2625
bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
2626
6.52k
                                                         SDValue& IntPtr) {
2627
6.52k
  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
2628
6.52k
    IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
2629
6.52k
                                       true);
2630
6.52k
    return true;
2631
6.52k
  }
2632
0
  return false;
2633
0
}
2634
2635
bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
2636
0
    SDValue& BaseReg, SDValue &Offset) {
2637
0
  if (!isa<ConstantSDNode>(Addr)) {
2638
0
    BaseReg = Addr;
2639
0
    Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
2640
0
    return true;
2641
0
  }
2642
0
  return false;
2643
0
}
2644
2645
85.0k
void R600DAGToDAGISel::Select(SDNode *N) {
2646
85.0k
  unsigned int Opc = N->getOpcode();
2647
85.0k
  if (N->isMachineOpcode()) {
2648
0
    N->setNodeId(-1);
2649
0
    return;   // Already selected.
2650
0
  }
2651
85.0k
2652
85.0k
  switch (Opc) {
2653
85.0k
  
default: break82.6k
;
2654
85.0k
  case AMDGPUISD::BUILD_VERTICAL_VECTOR:
2655
2.45k
  case ISD::SCALAR_TO_VECTOR:
2656
2.45k
  case ISD::BUILD_VECTOR: {
2657
2.45k
    EVT VT = N->getValueType(0);
2658
2.45k
    unsigned NumVectorElts = VT.getVectorNumElements();
2659
2.45k
    unsigned RegClassID;
2660
2.45k
    // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2661
2.45k
    // that adds a 128 bits reg copy when going through TwoAddressInstructions
2662
2.45k
    // pass. We want to avoid 128 bits copies as much as possible because they
2663
2.45k
    // can't be bundled by our scheduler.
2664
2.45k
    switch(NumVectorElts) {
2665
2.45k
    
case 2: RegClassID = R600::R600_Reg64RegClassID; break386
;
2666
2.45k
    case 4:
2667
2.07k
      if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
2668
8
        RegClassID = R600::R600_Reg128VerticalRegClassID;
2669
2.06k
      else
2670
2.06k
        RegClassID = R600::R600_Reg128RegClassID;
2671
2.07k
      break;
2672
2.45k
    
default: 0
llvm_unreachable0
("Do not know how to lower this BUILD_VECTOR");
2673
2.45k
    }
2674
2.45k
    SelectBuildVector(N, RegClassID);
2675
2.45k
    return;
2676
2.45k
  }
2677
82.6k
  }
2678
82.6k
2679
82.6k
  SelectCode(N);
2680
82.6k
}
2681
2682
bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2683
2.04k
                                          SDValue &Offset) {
2684
2.04k
  ConstantSDNode *C;
2685
2.04k
  SDLoc DL(Addr);
2686
2.04k
2687
2.04k
  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2688
0
    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2689
0
    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2690
2.04k
  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2691
2.04k
             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2692
1.76k
    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2693
1.76k
    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2694
1.76k
  } else 
if (277
(277
Addr.getOpcode() == ISD::ADD277
||
Addr.getOpcode() == ISD::OR277
) &&
2695
277
            
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))0
) {
2696
0
    Base = Addr.getOperand(0);
2697
0
    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2698
277
  } else {
2699
277
    Base = Addr;
2700
277
    Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2701
277
  }
2702
2.04k
2703
2.04k
  return true;
2704
2.04k
}
2705
2706
bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2707
1.71k
                                          SDValue &Offset) {
2708
1.71k
  ConstantSDNode *IMMOffset;
2709
1.71k
2710
1.71k
  if (Addr.getOpcode() == ISD::ADD
2711
1.71k
      && 
(IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))592
2712
1.71k
      && 
isInt<16>(IMMOffset->getZExtValue())400
) {
2713
399
2714
399
      Base = Addr.getOperand(0);
2715
399
      Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2716
399
                                         MVT::i32);
2717
399
      return true;
2718
399
  // If the pointer address is constant, we can move it to the offset field.
2719
1.31k
  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2720
1.31k
             && 
isInt<16>(IMMOffset->getZExtValue())368
) {
2721
368
    Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2722
368
                                  SDLoc(CurDAG->getEntryNode()),
2723
368
                                  R600::ZERO, MVT::i32);
2724
368
    Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2725
368
                                       MVT::i32);
2726
368
    return true;
2727
368
  }
2728
948
2729
948
  // Default case, no offset
2730
948
  Base = Addr;
2731
948
  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2732
948
  return true;
2733
948
}