Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines an instruction selector for the NVPTX target.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "NVPTXISelDAGToDAG.h"
14
#include "NVPTXUtilities.h"
15
#include "MCTargetDesc/NVPTXBaseInfo.h"
16
#include "llvm/Analysis/ValueTracking.h"
17
#include "llvm/IR/GlobalValue.h"
18
#include "llvm/IR/Instructions.h"
19
#include "llvm/Support/AtomicOrdering.h"
20
#include "llvm/Support/CommandLine.h"
21
#include "llvm/Support/Debug.h"
22
#include "llvm/Support/ErrorHandling.h"
23
#include "llvm/Support/raw_ostream.h"
24
#include "llvm/Target/TargetIntrinsicInfo.h"
25
26
using namespace llvm;
27
28
#define DEBUG_TYPE "nvptx-isel"
29
30
/// createNVPTXISelDag - This pass converts a legalized DAG into a
31
/// NVPTX-specific DAG, ready for instruction scheduling.
32
FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
33
263
                                       llvm::CodeGenOpt::Level OptLevel) {
34
263
  return new NVPTXDAGToDAGISel(TM, OptLevel);
35
263
}
36
37
NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
38
                                     CodeGenOpt::Level OptLevel)
39
263
    : SelectionDAGISel(tm, OptLevel), TM(tm) {
40
263
  doMulWide = (OptLevel > 0);
41
263
}
42
43
1.69k
bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
44
1.69k
  Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
45
1.69k
  return SelectionDAGISel::runOnMachineFunction(MF);
46
1.69k
}
47
48
45
int NVPTXDAGToDAGISel::getDivF32Level() const {
49
45
  return Subtarget->getTargetLowering()->getDivF32Level();
50
45
}
51
52
2
bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
53
2
  return Subtarget->getTargetLowering()->usePrecSqrtF32();
54
2
}
55
56
1.18k
bool NVPTXDAGToDAGISel::useF32FTZ() const {
57
1.18k
  return Subtarget->getTargetLowering()->useF32FTZ(*MF);
58
1.18k
}
59
60
673
bool NVPTXDAGToDAGISel::allowFMA() const {
61
673
  const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
62
673
  return TL->allowFMA(*MF, OptLevel);
63
673
}
64
65
24
bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
66
24
  const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
67
24
  return TL->allowUnsafeFPMath(*MF);
68
24
}
69
70
20
bool NVPTXDAGToDAGISel::useShortPointers() const {
71
20
  return TM.useShortPointers();
72
20
}
73
74
/// Select - Select instructions not customized! Used for
75
/// expanded, promoted and normal instructions.
76
22.8k
void NVPTXDAGToDAGISel::Select(SDNode *N) {
77
22.8k
78
22.8k
  if (N->isMachineOpcode()) {
79
0
    N->setNodeId(-1);
80
0
    return; // Already selected.
81
0
  }
82
22.8k
83
22.8k
  switch (N->getOpcode()) {
84
22.8k
  case ISD::LOAD:
85
2.93k
  case ISD::ATOMIC_LOAD:
86
2.93k
    if (tryLoad(N))
87
2.93k
      return;
88
0
    break;
89
403
  case ISD::STORE:
90
403
  case ISD::ATOMIC_STORE:
91
403
    if (tryStore(N))
92
403
      return;
93
0
    break;
94
240
  case ISD::EXTRACT_VECTOR_ELT:
95
240
    if (tryEXTRACT_VECTOR_ELEMENT(N))
96
224
      return;
97
16
    break;
98
16
  case NVPTXISD::SETP_F16X2:
99
16
    SelectSETP_F16X2(N);
100
16
    return;
101
16
102
185
  case NVPTXISD::LoadV2:
103
185
  case NVPTXISD::LoadV4:
104
185
    if (tryLoadVector(N))
105
185
      return;
106
0
    break;
107
0
  case NVPTXISD::LDGV2:
108
0
  case NVPTXISD::LDGV4:
109
0
  case NVPTXISD::LDUV2:
110
0
  case NVPTXISD::LDUV4:
111
0
    if (tryLDGLDU(N))
112
0
      return;
113
0
    break;
114
59
  case NVPTXISD::StoreV2:
115
59
  case NVPTXISD::StoreV4:
116
59
    if (tryStoreVector(N))
117
59
      return;
118
0
    break;
119
158
  case NVPTXISD::LoadParam:
120
158
  case NVPTXISD::LoadParamV2:
121
158
  case NVPTXISD::LoadParamV4:
122
158
    if (tryLoadParam(N))
123
158
      return;
124
0
    break;
125
1.37k
  case NVPTXISD::StoreRetval:
126
1.37k
  case NVPTXISD::StoreRetvalV2:
127
1.37k
  case NVPTXISD::StoreRetvalV4:
128
1.37k
    if (tryStoreRetval(N))
129
1.37k
      return;
130
0
    break;
131
209
  case NVPTXISD::StoreParam:
132
209
  case NVPTXISD::StoreParamV2:
133
209
  case NVPTXISD::StoreParamV4:
134
209
  case NVPTXISD::StoreParamS32:
135
209
  case NVPTXISD::StoreParamU32:
136
209
    if (tryStoreParam(N))
137
209
      return;
138
0
    break;
139
192
  case ISD::INTRINSIC_WO_CHAIN:
140
192
    if (tryIntrinsicNoChain(N))
141
14
      return;
142
178
    break;
143
178
  case ISD::INTRINSIC_W_CHAIN:
144
174
    if (tryIntrinsicChain(N))
145
7
      return;
146
167
    break;
147
167
  case NVPTXISD::Tex1DFloatS32:
148
5
  case NVPTXISD::Tex1DFloatFloat:
149
5
  case NVPTXISD::Tex1DFloatFloatLevel:
150
5
  case NVPTXISD::Tex1DFloatFloatGrad:
151
5
  case NVPTXISD::Tex1DS32S32:
152
5
  case NVPTXISD::Tex1DS32Float:
153
5
  case NVPTXISD::Tex1DS32FloatLevel:
154
5
  case NVPTXISD::Tex1DS32FloatGrad:
155
5
  case NVPTXISD::Tex1DU32S32:
156
5
  case NVPTXISD::Tex1DU32Float:
157
5
  case NVPTXISD::Tex1DU32FloatLevel:
158
5
  case NVPTXISD::Tex1DU32FloatGrad:
159
5
  case NVPTXISD::Tex1DArrayFloatS32:
160
5
  case NVPTXISD::Tex1DArrayFloatFloat:
161
5
  case NVPTXISD::Tex1DArrayFloatFloatLevel:
162
5
  case NVPTXISD::Tex1DArrayFloatFloatGrad:
163
5
  case NVPTXISD::Tex1DArrayS32S32:
164
5
  case NVPTXISD::Tex1DArrayS32Float:
165
5
  case NVPTXISD::Tex1DArrayS32FloatLevel:
166
5
  case NVPTXISD::Tex1DArrayS32FloatGrad:
167
5
  case NVPTXISD::Tex1DArrayU32S32:
168
5
  case NVPTXISD::Tex1DArrayU32Float:
169
5
  case NVPTXISD::Tex1DArrayU32FloatLevel:
170
5
  case NVPTXISD::Tex1DArrayU32FloatGrad:
171
5
  case NVPTXISD::Tex2DFloatS32:
172
5
  case NVPTXISD::Tex2DFloatFloat:
173
5
  case NVPTXISD::Tex2DFloatFloatLevel:
174
5
  case NVPTXISD::Tex2DFloatFloatGrad:
175
5
  case NVPTXISD::Tex2DS32S32:
176
5
  case NVPTXISD::Tex2DS32Float:
177
5
  case NVPTXISD::Tex2DS32FloatLevel:
178
5
  case NVPTXISD::Tex2DS32FloatGrad:
179
5
  case NVPTXISD::Tex2DU32S32:
180
5
  case NVPTXISD::Tex2DU32Float:
181
5
  case NVPTXISD::Tex2DU32FloatLevel:
182
5
  case NVPTXISD::Tex2DU32FloatGrad:
183
5
  case NVPTXISD::Tex2DArrayFloatS32:
184
5
  case NVPTXISD::Tex2DArrayFloatFloat:
185
5
  case NVPTXISD::Tex2DArrayFloatFloatLevel:
186
5
  case NVPTXISD::Tex2DArrayFloatFloatGrad:
187
5
  case NVPTXISD::Tex2DArrayS32S32:
188
5
  case NVPTXISD::Tex2DArrayS32Float:
189
5
  case NVPTXISD::Tex2DArrayS32FloatLevel:
190
5
  case NVPTXISD::Tex2DArrayS32FloatGrad:
191
5
  case NVPTXISD::Tex2DArrayU32S32:
192
5
  case NVPTXISD::Tex2DArrayU32Float:
193
5
  case NVPTXISD::Tex2DArrayU32FloatLevel:
194
5
  case NVPTXISD::Tex2DArrayU32FloatGrad:
195
5
  case NVPTXISD::Tex3DFloatS32:
196
5
  case NVPTXISD::Tex3DFloatFloat:
197
5
  case NVPTXISD::Tex3DFloatFloatLevel:
198
5
  case NVPTXISD::Tex3DFloatFloatGrad:
199
5
  case NVPTXISD::Tex3DS32S32:
200
5
  case NVPTXISD::Tex3DS32Float:
201
5
  case NVPTXISD::Tex3DS32FloatLevel:
202
5
  case NVPTXISD::Tex3DS32FloatGrad:
203
5
  case NVPTXISD::Tex3DU32S32:
204
5
  case NVPTXISD::Tex3DU32Float:
205
5
  case NVPTXISD::Tex3DU32FloatLevel:
206
5
  case NVPTXISD::Tex3DU32FloatGrad:
207
5
  case NVPTXISD::TexCubeFloatFloat:
208
5
  case NVPTXISD::TexCubeFloatFloatLevel:
209
5
  case NVPTXISD::TexCubeS32Float:
210
5
  case NVPTXISD::TexCubeS32FloatLevel:
211
5
  case NVPTXISD::TexCubeU32Float:
212
5
  case NVPTXISD::TexCubeU32FloatLevel:
213
5
  case NVPTXISD::TexCubeArrayFloatFloat:
214
5
  case NVPTXISD::TexCubeArrayFloatFloatLevel:
215
5
  case NVPTXISD::TexCubeArrayS32Float:
216
5
  case NVPTXISD::TexCubeArrayS32FloatLevel:
217
5
  case NVPTXISD::TexCubeArrayU32Float:
218
5
  case NVPTXISD::TexCubeArrayU32FloatLevel:
219
5
  case NVPTXISD::Tld4R2DFloatFloat:
220
5
  case NVPTXISD::Tld4G2DFloatFloat:
221
5
  case NVPTXISD::Tld4B2DFloatFloat:
222
5
  case NVPTXISD::Tld4A2DFloatFloat:
223
5
  case NVPTXISD::Tld4R2DS64Float:
224
5
  case NVPTXISD::Tld4G2DS64Float:
225
5
  case NVPTXISD::Tld4B2DS64Float:
226
5
  case NVPTXISD::Tld4A2DS64Float:
227
5
  case NVPTXISD::Tld4R2DU64Float:
228
5
  case NVPTXISD::Tld4G2DU64Float:
229
5
  case NVPTXISD::Tld4B2DU64Float:
230
5
  case NVPTXISD::Tld4A2DU64Float:
231
5
  case NVPTXISD::TexUnified1DFloatS32:
232
5
  case NVPTXISD::TexUnified1DFloatFloat:
233
5
  case NVPTXISD::TexUnified1DFloatFloatLevel:
234
5
  case NVPTXISD::TexUnified1DFloatFloatGrad:
235
5
  case NVPTXISD::TexUnified1DS32S32:
236
5
  case NVPTXISD::TexUnified1DS32Float:
237
5
  case NVPTXISD::TexUnified1DS32FloatLevel:
238
5
  case NVPTXISD::TexUnified1DS32FloatGrad:
239
5
  case NVPTXISD::TexUnified1DU32S32:
240
5
  case NVPTXISD::TexUnified1DU32Float:
241
5
  case NVPTXISD::TexUnified1DU32FloatLevel:
242
5
  case NVPTXISD::TexUnified1DU32FloatGrad:
243
5
  case NVPTXISD::TexUnified1DArrayFloatS32:
244
5
  case NVPTXISD::TexUnified1DArrayFloatFloat:
245
5
  case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
246
5
  case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
247
5
  case NVPTXISD::TexUnified1DArrayS32S32:
248
5
  case NVPTXISD::TexUnified1DArrayS32Float:
249
5
  case NVPTXISD::TexUnified1DArrayS32FloatLevel:
250
5
  case NVPTXISD::TexUnified1DArrayS32FloatGrad:
251
5
  case NVPTXISD::TexUnified1DArrayU32S32:
252
5
  case NVPTXISD::TexUnified1DArrayU32Float:
253
5
  case NVPTXISD::TexUnified1DArrayU32FloatLevel:
254
5
  case NVPTXISD::TexUnified1DArrayU32FloatGrad:
255
5
  case NVPTXISD::TexUnified2DFloatS32:
256
5
  case NVPTXISD::TexUnified2DFloatFloat:
257
5
  case NVPTXISD::TexUnified2DFloatFloatLevel:
258
5
  case NVPTXISD::TexUnified2DFloatFloatGrad:
259
5
  case NVPTXISD::TexUnified2DS32S32:
260
5
  case NVPTXISD::TexUnified2DS32Float:
261
5
  case NVPTXISD::TexUnified2DS32FloatLevel:
262
5
  case NVPTXISD::TexUnified2DS32FloatGrad:
263
5
  case NVPTXISD::TexUnified2DU32S32:
264
5
  case NVPTXISD::TexUnified2DU32Float:
265
5
  case NVPTXISD::TexUnified2DU32FloatLevel:
266
5
  case NVPTXISD::TexUnified2DU32FloatGrad:
267
5
  case NVPTXISD::TexUnified2DArrayFloatS32:
268
5
  case NVPTXISD::TexUnified2DArrayFloatFloat:
269
5
  case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
270
5
  case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
271
5
  case NVPTXISD::TexUnified2DArrayS32S32:
272
5
  case NVPTXISD::TexUnified2DArrayS32Float:
273
5
  case NVPTXISD::TexUnified2DArrayS32FloatLevel:
274
5
  case NVPTXISD::TexUnified2DArrayS32FloatGrad:
275
5
  case NVPTXISD::TexUnified2DArrayU32S32:
276
5
  case NVPTXISD::TexUnified2DArrayU32Float:
277
5
  case NVPTXISD::TexUnified2DArrayU32FloatLevel:
278
5
  case NVPTXISD::TexUnified2DArrayU32FloatGrad:
279
5
  case NVPTXISD::TexUnified3DFloatS32:
280
5
  case NVPTXISD::TexUnified3DFloatFloat:
281
5
  case NVPTXISD::TexUnified3DFloatFloatLevel:
282
5
  case NVPTXISD::TexUnified3DFloatFloatGrad:
283
5
  case NVPTXISD::TexUnified3DS32S32:
284
5
  case NVPTXISD::TexUnified3DS32Float:
285
5
  case NVPTXISD::TexUnified3DS32FloatLevel:
286
5
  case NVPTXISD::TexUnified3DS32FloatGrad:
287
5
  case NVPTXISD::TexUnified3DU32S32:
288
5
  case NVPTXISD::TexUnified3DU32Float:
289
5
  case NVPTXISD::TexUnified3DU32FloatLevel:
290
5
  case NVPTXISD::TexUnified3DU32FloatGrad:
291
5
  case NVPTXISD::TexUnifiedCubeFloatFloat:
292
5
  case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
293
5
  case NVPTXISD::TexUnifiedCubeS32Float:
294
5
  case NVPTXISD::TexUnifiedCubeS32FloatLevel:
295
5
  case NVPTXISD::TexUnifiedCubeU32Float:
296
5
  case NVPTXISD::TexUnifiedCubeU32FloatLevel:
297
5
  case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
298
5
  case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
299
5
  case NVPTXISD::TexUnifiedCubeArrayS32Float:
300
5
  case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
301
5
  case NVPTXISD::TexUnifiedCubeArrayU32Float:
302
5
  case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
303
5
  case NVPTXISD::Tld4UnifiedR2DFloatFloat:
304
5
  case NVPTXISD::Tld4UnifiedG2DFloatFloat:
305
5
  case NVPTXISD::Tld4UnifiedB2DFloatFloat:
306
5
  case NVPTXISD::Tld4UnifiedA2DFloatFloat:
307
5
  case NVPTXISD::Tld4UnifiedR2DS64Float:
308
5
  case NVPTXISD::Tld4UnifiedG2DS64Float:
309
5
  case NVPTXISD::Tld4UnifiedB2DS64Float:
310
5
  case NVPTXISD::Tld4UnifiedA2DS64Float:
311
5
  case NVPTXISD::Tld4UnifiedR2DU64Float:
312
5
  case NVPTXISD::Tld4UnifiedG2DU64Float:
313
5
  case NVPTXISD::Tld4UnifiedB2DU64Float:
314
5
  case NVPTXISD::Tld4UnifiedA2DU64Float:
315
5
    if (tryTextureIntrinsic(N))
316
5
      return;
317
0
    break;
318
5
  case NVPTXISD::Suld1DI8Clamp:
319
5
  case NVPTXISD::Suld1DI16Clamp:
320
5
  case NVPTXISD::Suld1DI32Clamp:
321
5
  case NVPTXISD::Suld1DI64Clamp:
322
5
  case NVPTXISD::Suld1DV2I8Clamp:
323
5
  case NVPTXISD::Suld1DV2I16Clamp:
324
5
  case NVPTXISD::Suld1DV2I32Clamp:
325
5
  case NVPTXISD::Suld1DV2I64Clamp:
326
5
  case NVPTXISD::Suld1DV4I8Clamp:
327
5
  case NVPTXISD::Suld1DV4I16Clamp:
328
5
  case NVPTXISD::Suld1DV4I32Clamp:
329
5
  case NVPTXISD::Suld1DArrayI8Clamp:
330
5
  case NVPTXISD::Suld1DArrayI16Clamp:
331
5
  case NVPTXISD::Suld1DArrayI32Clamp:
332
5
  case NVPTXISD::Suld1DArrayI64Clamp:
333
5
  case NVPTXISD::Suld1DArrayV2I8Clamp:
334
5
  case NVPTXISD::Suld1DArrayV2I16Clamp:
335
5
  case NVPTXISD::Suld1DArrayV2I32Clamp:
336
5
  case NVPTXISD::Suld1DArrayV2I64Clamp:
337
5
  case NVPTXISD::Suld1DArrayV4I8Clamp:
338
5
  case NVPTXISD::Suld1DArrayV4I16Clamp:
339
5
  case NVPTXISD::Suld1DArrayV4I32Clamp:
340
5
  case NVPTXISD::Suld2DI8Clamp:
341
5
  case NVPTXISD::Suld2DI16Clamp:
342
5
  case NVPTXISD::Suld2DI32Clamp:
343
5
  case NVPTXISD::Suld2DI64Clamp:
344
5
  case NVPTXISD::Suld2DV2I8Clamp:
345
5
  case NVPTXISD::Suld2DV2I16Clamp:
346
5
  case NVPTXISD::Suld2DV2I32Clamp:
347
5
  case NVPTXISD::Suld2DV2I64Clamp:
348
5
  case NVPTXISD::Suld2DV4I8Clamp:
349
5
  case NVPTXISD::Suld2DV4I16Clamp:
350
5
  case NVPTXISD::Suld2DV4I32Clamp:
351
5
  case NVPTXISD::Suld2DArrayI8Clamp:
352
5
  case NVPTXISD::Suld2DArrayI16Clamp:
353
5
  case NVPTXISD::Suld2DArrayI32Clamp:
354
5
  case NVPTXISD::Suld2DArrayI64Clamp:
355
5
  case NVPTXISD::Suld2DArrayV2I8Clamp:
356
5
  case NVPTXISD::Suld2DArrayV2I16Clamp:
357
5
  case NVPTXISD::Suld2DArrayV2I32Clamp:
358
5
  case NVPTXISD::Suld2DArrayV2I64Clamp:
359
5
  case NVPTXISD::Suld2DArrayV4I8Clamp:
360
5
  case NVPTXISD::Suld2DArrayV4I16Clamp:
361
5
  case NVPTXISD::Suld2DArrayV4I32Clamp:
362
5
  case NVPTXISD::Suld3DI8Clamp:
363
5
  case NVPTXISD::Suld3DI16Clamp:
364
5
  case NVPTXISD::Suld3DI32Clamp:
365
5
  case NVPTXISD::Suld3DI64Clamp:
366
5
  case NVPTXISD::Suld3DV2I8Clamp:
367
5
  case NVPTXISD::Suld3DV2I16Clamp:
368
5
  case NVPTXISD::Suld3DV2I32Clamp:
369
5
  case NVPTXISD::Suld3DV2I64Clamp:
370
5
  case NVPTXISD::Suld3DV4I8Clamp:
371
5
  case NVPTXISD::Suld3DV4I16Clamp:
372
5
  case NVPTXISD::Suld3DV4I32Clamp:
373
5
  case NVPTXISD::Suld1DI8Trap:
374
5
  case NVPTXISD::Suld1DI16Trap:
375
5
  case NVPTXISD::Suld1DI32Trap:
376
5
  case NVPTXISD::Suld1DI64Trap:
377
5
  case NVPTXISD::Suld1DV2I8Trap:
378
5
  case NVPTXISD::Suld1DV2I16Trap:
379
5
  case NVPTXISD::Suld1DV2I32Trap:
380
5
  case NVPTXISD::Suld1DV2I64Trap:
381
5
  case NVPTXISD::Suld1DV4I8Trap:
382
5
  case NVPTXISD::Suld1DV4I16Trap:
383
5
  case NVPTXISD::Suld1DV4I32Trap:
384
5
  case NVPTXISD::Suld1DArrayI8Trap:
385
5
  case NVPTXISD::Suld1DArrayI16Trap:
386
5
  case NVPTXISD::Suld1DArrayI32Trap:
387
5
  case NVPTXISD::Suld1DArrayI64Trap:
388
5
  case NVPTXISD::Suld1DArrayV2I8Trap:
389
5
  case NVPTXISD::Suld1DArrayV2I16Trap:
390
5
  case NVPTXISD::Suld1DArrayV2I32Trap:
391
5
  case NVPTXISD::Suld1DArrayV2I64Trap:
392
5
  case NVPTXISD::Suld1DArrayV4I8Trap:
393
5
  case NVPTXISD::Suld1DArrayV4I16Trap:
394
5
  case NVPTXISD::Suld1DArrayV4I32Trap:
395
5
  case NVPTXISD::Suld2DI8Trap:
396
5
  case NVPTXISD::Suld2DI16Trap:
397
5
  case NVPTXISD::Suld2DI32Trap:
398
5
  case NVPTXISD::Suld2DI64Trap:
399
5
  case NVPTXISD::Suld2DV2I8Trap:
400
5
  case NVPTXISD::Suld2DV2I16Trap:
401
5
  case NVPTXISD::Suld2DV2I32Trap:
402
5
  case NVPTXISD::Suld2DV2I64Trap:
403
5
  case NVPTXISD::Suld2DV4I8Trap:
404
5
  case NVPTXISD::Suld2DV4I16Trap:
405
5
  case NVPTXISD::Suld2DV4I32Trap:
406
5
  case NVPTXISD::Suld2DArrayI8Trap:
407
5
  case NVPTXISD::Suld2DArrayI16Trap:
408
5
  case NVPTXISD::Suld2DArrayI32Trap:
409
5
  case NVPTXISD::Suld2DArrayI64Trap:
410
5
  case NVPTXISD::Suld2DArrayV2I8Trap:
411
5
  case NVPTXISD::Suld2DArrayV2I16Trap:
412
5
  case NVPTXISD::Suld2DArrayV2I32Trap:
413
5
  case NVPTXISD::Suld2DArrayV2I64Trap:
414
5
  case NVPTXISD::Suld2DArrayV4I8Trap:
415
5
  case NVPTXISD::Suld2DArrayV4I16Trap:
416
5
  case NVPTXISD::Suld2DArrayV4I32Trap:
417
5
  case NVPTXISD::Suld3DI8Trap:
418
5
  case NVPTXISD::Suld3DI16Trap:
419
5
  case NVPTXISD::Suld3DI32Trap:
420
5
  case NVPTXISD::Suld3DI64Trap:
421
5
  case NVPTXISD::Suld3DV2I8Trap:
422
5
  case NVPTXISD::Suld3DV2I16Trap:
423
5
  case NVPTXISD::Suld3DV2I32Trap:
424
5
  case NVPTXISD::Suld3DV2I64Trap:
425
5
  case NVPTXISD::Suld3DV4I8Trap:
426
5
  case NVPTXISD::Suld3DV4I16Trap:
427
5
  case NVPTXISD::Suld3DV4I32Trap:
428
5
  case NVPTXISD::Suld1DI8Zero:
429
5
  case NVPTXISD::Suld1DI16Zero:
430
5
  case NVPTXISD::Suld1DI32Zero:
431
5
  case NVPTXISD::Suld1DI64Zero:
432
5
  case NVPTXISD::Suld1DV2I8Zero:
433
5
  case NVPTXISD::Suld1DV2I16Zero:
434
5
  case NVPTXISD::Suld1DV2I32Zero:
435
5
  case NVPTXISD::Suld1DV2I64Zero:
436
5
  case NVPTXISD::Suld1DV4I8Zero:
437
5
  case NVPTXISD::Suld1DV4I16Zero:
438
5
  case NVPTXISD::Suld1DV4I32Zero:
439
5
  case NVPTXISD::Suld1DArrayI8Zero:
440
5
  case NVPTXISD::Suld1DArrayI16Zero:
441
5
  case NVPTXISD::Suld1DArrayI32Zero:
442
5
  case NVPTXISD::Suld1DArrayI64Zero:
443
5
  case NVPTXISD::Suld1DArrayV2I8Zero:
444
5
  case NVPTXISD::Suld1DArrayV2I16Zero:
445
5
  case NVPTXISD::Suld1DArrayV2I32Zero:
446
5
  case NVPTXISD::Suld1DArrayV2I64Zero:
447
5
  case NVPTXISD::Suld1DArrayV4I8Zero:
448
5
  case NVPTXISD::Suld1DArrayV4I16Zero:
449
5
  case NVPTXISD::Suld1DArrayV4I32Zero:
450
5
  case NVPTXISD::Suld2DI8Zero:
451
5
  case NVPTXISD::Suld2DI16Zero:
452
5
  case NVPTXISD::Suld2DI32Zero:
453
5
  case NVPTXISD::Suld2DI64Zero:
454
5
  case NVPTXISD::Suld2DV2I8Zero:
455
5
  case NVPTXISD::Suld2DV2I16Zero:
456
5
  case NVPTXISD::Suld2DV2I32Zero:
457
5
  case NVPTXISD::Suld2DV2I64Zero:
458
5
  case NVPTXISD::Suld2DV4I8Zero:
459
5
  case NVPTXISD::Suld2DV4I16Zero:
460
5
  case NVPTXISD::Suld2DV4I32Zero:
461
5
  case NVPTXISD::Suld2DArrayI8Zero:
462
5
  case NVPTXISD::Suld2DArrayI16Zero:
463
5
  case NVPTXISD::Suld2DArrayI32Zero:
464
5
  case NVPTXISD::Suld2DArrayI64Zero:
465
5
  case NVPTXISD::Suld2DArrayV2I8Zero:
466
5
  case NVPTXISD::Suld2DArrayV2I16Zero:
467
5
  case NVPTXISD::Suld2DArrayV2I32Zero:
468
5
  case NVPTXISD::Suld2DArrayV2I64Zero:
469
5
  case NVPTXISD::Suld2DArrayV4I8Zero:
470
5
  case NVPTXISD::Suld2DArrayV4I16Zero:
471
5
  case NVPTXISD::Suld2DArrayV4I32Zero:
472
5
  case NVPTXISD::Suld3DI8Zero:
473
5
  case NVPTXISD::Suld3DI16Zero:
474
5
  case NVPTXISD::Suld3DI32Zero:
475
5
  case NVPTXISD::Suld3DI64Zero:
476
5
  case NVPTXISD::Suld3DV2I8Zero:
477
5
  case NVPTXISD::Suld3DV2I16Zero:
478
5
  case NVPTXISD::Suld3DV2I32Zero:
479
5
  case NVPTXISD::Suld3DV2I64Zero:
480
5
  case NVPTXISD::Suld3DV4I8Zero:
481
5
  case NVPTXISD::Suld3DV4I16Zero:
482
5
  case NVPTXISD::Suld3DV4I32Zero:
483
5
    if (trySurfaceIntrinsic(N))
484
5
      return;
485
0
    break;
486
240
  case ISD::AND:
487
240
  case ISD::SRA:
488
240
  case ISD::SRL:
489
240
    // Try to select BFE
490
240
    if (tryBFE(N))
491
7
      return;
492
233
    break;
493
233
  case ISD::ADDRSPACECAST:
494
166
    SelectAddrSpaceCast(N);
495
166
    return;
496
233
  case ISD::ConstantFP:
497
20
    if (tryConstantFP16(N))
498
10
      return;
499
10
    break;
500
16.4k
  default:
501
16.4k
    break;
502
17.0k
  }
503
17.0k
  SelectCode(N);
504
17.0k
}
505
506
174
bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
507
174
  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
508
174
  switch (IID) {
509
174
  default:
510
167
    return false;
511
174
  case Intrinsic::nvvm_ldg_global_f:
512
7
  case Intrinsic::nvvm_ldg_global_i:
513
7
  case Intrinsic::nvvm_ldg_global_p:
514
7
  case Intrinsic::nvvm_ldu_global_f:
515
7
  case Intrinsic::nvvm_ldu_global_i:
516
7
  case Intrinsic::nvvm_ldu_global_p:
517
7
    return tryLDGLDU(N);
518
174
  }
519
174
}
520
521
// There's no way to specify FP16 immediates in .f16 ops, so we have to
522
// load them into an .f16 register first.
523
20
bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
524
20
  if (N->getValueType(0) != MVT::f16)
525
10
    return false;
526
10
  SDValue Val = CurDAG->getTargetConstantFP(
527
10
      cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
528
10
  SDNode *LoadConstF16 =
529
10
      CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
530
10
  ReplaceNode(N, LoadConstF16);
531
10
  return true;
532
10
}
533
534
// Map ISD:CONDCODE value to appropriate CmpMode expected by
535
// NVPTXInstPrinter::printCmpMode()
536
16
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
537
16
  using NVPTX::PTXCmpMode::CmpMode;
538
16
  unsigned PTXCmpMode = [](ISD::CondCode CC) {
539
16
    switch (CC) {
540
16
    default:
541
0
      llvm_unreachable("Unexpected condition code.");
542
16
    case ISD::SETOEQ:
543
1
      return CmpMode::EQ;
544
16
    case ISD::SETOGT:
545
1
      return CmpMode::GT;
546
16
    case ISD::SETOGE:
547
1
      return CmpMode::GE;
548
16
    case ISD::SETOLT:
549
1
      return CmpMode::LT;
550
16
    case ISD::SETOLE:
551
1
      return CmpMode::LE;
552
16
    case ISD::SETONE:
553
1
      return CmpMode::NE;
554
16
    case ISD::SETO:
555
1
      return CmpMode::NUM;
556
16
    case ISD::SETUO:
557
1
      return CmpMode::NotANumber;
558
16
    case ISD::SETUEQ:
559
1
      return CmpMode::EQU;
560
16
    case ISD::SETUGT:
561
1
      return CmpMode::GTU;
562
16
    case ISD::SETUGE:
563
1
      return CmpMode::GEU;
564
16
    case ISD::SETULT:
565
1
      return CmpMode::LTU;
566
16
    case ISD::SETULE:
567
1
      return CmpMode::LEU;
568
16
    case ISD::SETUNE:
569
3
      return CmpMode::NEU;
570
16
    case ISD::SETEQ:
571
0
      return CmpMode::EQ;
572
16
    case ISD::SETGT:
573
0
      return CmpMode::GT;
574
16
    case ISD::SETGE:
575
0
      return CmpMode::GE;
576
16
    case ISD::SETLT:
577
0
      return CmpMode::LT;
578
16
    case ISD::SETLE:
579
0
      return CmpMode::LE;
580
16
    case ISD::SETNE:
581
0
      return CmpMode::NE;
582
16
    }
583
16
  }(CondCode.get());
584
16
585
16
  if (FTZ)
586
0
    PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
587
16
588
16
  return PTXCmpMode;
589
16
}
590
591
16
bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
592
16
  unsigned PTXCmpMode =
593
16
      getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
594
16
  SDLoc DL(N);
595
16
  SDNode *SetP = CurDAG->getMachineNode(
596
16
      NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
597
16
      N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
598
16
  ReplaceNode(N, SetP);
599
16
  return true;
600
16
}
601
602
// Find all instances of extract_vector_elt that use this v2f16 vector
603
// and coalesce them into a scattering move instruction.
604
240
bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
605
240
  SDValue Vector = N->getOperand(0);
606
240
607
240
  // We only care about f16x2 as it's the only real vector type we
608
240
  // need to deal with.
609
240
  if (Vector.getSimpleValueType() != MVT::v2f16)
610
0
    return false;
611
240
612
240
  // Find and record all uses of this vector that extract element 0 or 1.
613
240
  SmallVector<SDNode *, 4> E0, E1;
614
694
  for (const auto &U : Vector.getNode()->uses()) {
615
694
    if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
616
224
      continue;
617
470
    if (U->getOperand(0) != Vector)
618
6
      continue;
619
464
    if (const ConstantSDNode *IdxConst =
620
464
            dyn_cast<ConstantSDNode>(U->getOperand(1))) {
621
464
      if (IdxConst->getZExtValue() == 0)
622
237
        E0.push_back(U);
623
227
      else if (IdxConst->getZExtValue() == 1)
624
227
        E1.push_back(U);
625
227
      else
626
227
        
llvm_unreachable0
("Invalid vector index.");
627
464
    }
628
464
  }
629
240
630
240
  // There's no point scattering f16x2 if we only ever access one
631
240
  // element of it.
632
240
  if (E0.empty() || 
E1.empty()237
)
633
16
    return false;
634
224
635
224
  unsigned Op = NVPTX::SplitF16x2;
636
224
  // If the vector has been BITCAST'ed from i32, we can use original
637
224
  // value directly and avoid register-to-register move.
638
224
  SDValue Source = Vector;
639
224
  if (Vector->getOpcode() == ISD::BITCAST) {
640
0
    Op = NVPTX::SplitI32toF16x2;
641
0
    Source = Vector->getOperand(0);
642
0
  }
643
224
  // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
644
224
  // into f16,f16 SplitF16x2(V)
645
224
  SDNode *ScatterOp =
646
224
      CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source);
647
224
  for (auto *Node : E0)
648
224
    ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
649
224
  for (auto *Node : E1)
650
224
    ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
651
224
652
224
  return true;
653
224
}
654
655
3.57k
static unsigned int getCodeAddrSpace(MemSDNode *N) {
656
3.57k
  const Value *Src = N->getMemOperand()->getValue();
657
3.57k
658
3.57k
  if (!Src)
659
1
    return NVPTX::PTXLdStInstCode::GENERIC;
660
3.57k
661
3.57k
  if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
662
3.57k
    switch (PT->getAddressSpace()) {
663
3.57k
    
case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL54
;
664
3.57k
    
case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL242
;
665
3.57k
    
case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED88
;
666
3.57k
    
case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC486
;
667
3.57k
    
case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM2.70k
;
668
3.57k
    
case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT4
;
669
3.57k
    
default: break0
;
670
0
    }
671
0
  }
672
0
  return NVPTX::PTXLdStInstCode::GENERIC;
673
0
}
674
675
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
676
3.11k
                          unsigned CodeAddrSpace, MachineFunction *F) {
677
3.11k
  // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
678
3.11k
  // space.
679
3.11k
  //
680
3.11k
  // We have two ways of identifying invariant loads: Loads may be explicitly
681
3.11k
  // marked as invariant, or we may infer them to be invariant.
682
3.11k
  //
683
3.11k
  // We currently infer invariance for loads from
684
3.11k
  //  - constant global variables, and
685
3.11k
  //  - kernel function pointer params that are noalias (i.e. __restrict) and
686
3.11k
  //    never written to.
687
3.11k
  //
688
3.11k
  // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
689
3.11k
  // not during the SelectionDAG phase).
690
3.11k
  //
691
3.11k
  // TODO: Infer invariance only at -O2.  We still want to use ldg at -O0 for
692
3.11k
  // explicitly invariant loads because these are how clang tells us to use ldg
693
3.11k
  // when the user uses a builtin.
694
3.11k
  if (!Subtarget.hasLDG() || 
CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL1.28k
)
695
3.07k
    return false;
696
42
697
42
  if (N->isInvariant())
698
6
    return true;
699
36
700
36
  bool IsKernelFn = isKernelFunction(F->getFunction());
701
36
702
36
  // We use GetUnderlyingObjects() here instead of GetUnderlyingObject() mainly
703
36
  // because the former looks through phi nodes while the latter does not. We
704
36
  // need to look through phi nodes to handle pointer induction variables.
705
36
  SmallVector<const Value *, 8> Objs;
706
36
  GetUnderlyingObjects(N->getMemOperand()->getValue(),
707
36
                       Objs, F->getDataLayout());
708
36
709
36
  return all_of(Objs, [&](const Value *V) {
710
36
    if (auto *A = dyn_cast<const Argument>(V))
711
33
      return IsKernelFn && 
A->onlyReadsMemory()31
&&
A->hasNoAliasAttr()26
;
712
3
    if (auto *GV = dyn_cast<const GlobalVariable>(V))
713
3
      return GV->isConstant();
714
0
    return false;
715
0
  });
716
36
}
717
718
192
bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
719
192
  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
720
192
  switch (IID) {
721
192
  default:
722
178
    return false;
723
192
  case Intrinsic::nvvm_texsurf_handle_internal:
724
14
    SelectTexSurfHandle(N);
725
14
    return true;
726
192
  }
727
192
}
728
729
14
void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
730
14
  // Op 0 is the intrinsic ID
731
14
  SDValue Wrapper = N->getOperand(1);
732
14
  SDValue GlobalVal = Wrapper.getOperand(0);
733
14
  ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
734
14
                                        MVT::i64, GlobalVal));
735
14
}
736
737
166
void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
738
166
  SDValue Src = N->getOperand(0);
739
166
  AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
740
166
  unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
741
166
  unsigned DstAddrSpace = CastN->getDestAddressSpace();
742
166
  assert(SrcAddrSpace != DstAddrSpace &&
743
166
         "addrspacecast must be between different address spaces");
744
166
745
166
  if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
746
22
    // Specific to generic
747
22
    unsigned Opc;
748
22
    switch (SrcAddrSpace) {
749
22
    
default: report_fatal_error("Bad address space in addrspacecast")0
;
750
22
    case ADDRESS_SPACE_GLOBAL:
751
11
      Opc = TM.is64Bit() ? 
NVPTX::cvta_global_yes_644
:
NVPTX::cvta_global_yes7
;
752
11
      break;
753
22
    case ADDRESS_SPACE_SHARED:
754
5
      Opc = TM.is64Bit() ? 
(useShortPointers() 3
?
NVPTX::cvta_shared_yes_64321
755
3
                                               : 
NVPTX::cvta_shared_yes_642
)
756
5
                         : 
NVPTX::cvta_shared_yes2
;
757
5
      break;
758
22
    case ADDRESS_SPACE_CONST:
759
3
      Opc = TM.is64Bit() ? 
(useShortPointers() 2
?
NVPTX::cvta_const_yes_64321
760
2
                                               : 
NVPTX::cvta_const_yes_641
)
761
3
                         : 
NVPTX::cvta_const_yes1
;
762
3
      break;
763
22
    case ADDRESS_SPACE_LOCAL:
764
3
      Opc = TM.is64Bit() ? 
(useShortPointers() 2
?
NVPTX::cvta_local_yes_64321
765
2
                                               : 
NVPTX::cvta_local_yes_641
)
766
3
                         : 
NVPTX::cvta_local_yes1
;
767
3
      break;
768
22
    }
769
22
    ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
770
22
                                          Src));
771
22
    return;
772
144
  } else {
773
144
    // Generic to specific
774
144
    if (SrcAddrSpace != 0)
775
0
      report_fatal_error("Cannot cast between two non-generic address spaces");
776
144
    unsigned Opc;
777
144
    switch (DstAddrSpace) {
778
144
    
default: report_fatal_error("Bad address space in addrspacecast")0
;
779
144
    case ADDRESS_SPACE_GLOBAL:
780
123
      Opc = TM.is64Bit() ? 
NVPTX::cvta_to_global_yes_6493
781
123
                         : 
NVPTX::cvta_to_global_yes30
;
782
123
      break;
783
144
    case ADDRESS_SPACE_SHARED:
784
3
      Opc = TM.is64Bit() ? 
(useShortPointers() 2
?
NVPTX::cvta_to_shared_yes_32641
785
2
                                                : 
NVPTX::cvta_to_shared_yes_641
)
786
3
                         : 
NVPTX::cvta_to_shared_yes1
;
787
3
      break;
788
144
    case ADDRESS_SPACE_CONST:
789
3
      Opc = TM.is64Bit() ? 
(useShortPointers() 2
?
NVPTX::cvta_to_const_yes_32641
790
2
                                             : 
NVPTX::cvta_to_const_yes_641
)
791
3
                         : 
NVPTX::cvta_to_const_yes1
;
792
3
      break;
793
144
    case ADDRESS_SPACE_LOCAL:
794
15
      Opc = TM.is64Bit() ? 
(useShortPointers() 9
?
NVPTX::cvta_to_local_yes_32641
795
9
                                               : 
NVPTX::cvta_to_local_yes_648
)
796
15
                         : 
NVPTX::cvta_to_local_yes6
;
797
15
      break;
798
144
    case ADDRESS_SPACE_PARAM:
799
0
      Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
800
0
                         : NVPTX::nvvm_ptr_gen_to_param;
801
0
      break;
802
144
    }
803
144
    ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
804
144
                                          Src));
805
144
    return;
806
144
  }
807
166
}
808
809
// Helper function template to reduce amount of boilerplate code for
810
// opcode selection.
811
static Optional<unsigned> pickOpcodeForVT(
812
    MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16,
813
    unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16,
814
5.32k
    unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) {
815
5.32k
  switch (VT) {
816
5.32k
  case MVT::i1:
817
96
  case MVT::i8:
818
96
    return Opcode_i8;
819
443
  case MVT::i16:
820
443
    return Opcode_i16;
821
1.70k
  case MVT::i32:
822
1.70k
    return Opcode_i32;
823
922
  case MVT::i64:
824
922
    return Opcode_i64;
825
704
  case MVT::f16:
826
704
    return Opcode_f16;
827
464
  case MVT::v2f16:
828
464
    return Opcode_f16x2;
829
725
  case MVT::f32:
830
725
    return Opcode_f32;
831
262
  case MVT::f64:
832
262
    return Opcode_f64;
833
96
  default:
834
0
    return None;
835
5.32k
  }
836
5.32k
}
837
838
2.93k
bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
839
2.93k
  SDLoc dl(N);
840
2.93k
  MemSDNode *LD = cast<MemSDNode>(N);
841
2.93k
  assert(LD->readMem() && "Expected load");
842
2.93k
  LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N);
843
2.93k
  EVT LoadedVT = LD->getMemoryVT();
844
2.93k
  SDNode *NVPTXLD = nullptr;
845
2.93k
846
2.93k
  // do not support pre/post inc/dec
847
2.93k
  if (PlainLoad && 
PlainLoad->isIndexed()2.92k
)
848
0
    return false;
849
2.93k
850
2.93k
  if (!LoadedVT.isSimple())
851
0
    return false;
852
2.93k
853
2.93k
  AtomicOrdering Ordering = LD->getOrdering();
854
2.93k
  // In order to lower atomic loads with stronger guarantees we would need to
855
2.93k
  // use load.acquire or insert fences. However these features were only added
856
2.93k
  // with PTX ISA 6.0 / sm_70.
857
2.93k
  // TODO: Check if we can actually use the new instructions and implement them.
858
2.93k
  if (isStrongerThanMonotonic(Ordering))
859
0
    return false;
860
2.93k
861
2.93k
  // Address Space Setting
862
2.93k
  unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
863
2.93k
  if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
864
15
    return tryLDGLDU(N);
865
15
  }
866
2.91k
867
2.91k
  unsigned int PointerSize =
868
2.91k
      CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
869
2.91k
870
2.91k
  // Volatile Setting
871
2.91k
  // - .volatile is only available for .global and .shared
872
2.91k
  // - .volatile has the same memory synchronization semantics as .relaxed.sys
873
2.91k
  bool isVolatile = LD->isVolatile() || 
Ordering == AtomicOrdering::Monotonic2.90k
;
874
2.91k
  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
875
2.91k
      
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED2.84k
&&
876
2.91k
      
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC2.78k
)
877
2.61k
    isVolatile = false;
878
2.91k
879
2.91k
  // Type Setting: fromType + fromTypeWidth
880
2.91k
  //
881
2.91k
  // Sign   : ISD::SEXTLOAD
882
2.91k
  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
883
2.91k
  //          type is integer
884
2.91k
  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
885
2.91k
  MVT SimpleVT = LoadedVT.getSimpleVT();
886
2.91k
  MVT ScalarVT = SimpleVT.getScalarType();
887
2.91k
  // Read at least 8 bits (predicates are stored as 8-bit values)
888
2.91k
  unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
889
2.91k
  unsigned int fromType;
890
2.91k
891
2.91k
  // Vector Setting
892
2.91k
  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
893
2.91k
  if (SimpleVT.isVector()) {
894
293
    assert(LoadedVT == MVT::v2f16 && "Unexpected vector type");
895
293
    // v2f16 is loaded using ld.b32
896
293
    fromTypeWidth = 32;
897
293
  }
898
2.91k
899
2.91k
  if (PlainLoad && 
(PlainLoad->getExtensionType() == ISD::SEXTLOAD)2.91k
)
900
20
    fromType = NVPTX::PTXLdStInstCode::Signed;
901
2.89k
  else if (ScalarVT.isFloatingPoint())
902
1.18k
    // f16 uses .b16 as its storage type.
903
1.18k
    fromType = ScalarVT.SimpleTy == MVT::f16 ? 
NVPTX::PTXLdStInstCode::Untyped713
904
1.18k
                                             : 
NVPTX::PTXLdStInstCode::Float467
;
905
1.71k
  else
906
1.71k
    fromType = NVPTX::PTXLdStInstCode::Unsigned;
907
2.91k
908
2.91k
  // Create the machine instruction DAG
909
2.91k
  SDValue Chain = N->getOperand(0);
910
2.91k
  SDValue N1 = N->getOperand(1);
911
2.91k
  SDValue Addr;
912
2.91k
  SDValue Offset, Base;
913
2.91k
  Optional<unsigned> Opcode;
914
2.91k
  MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
915
2.91k
916
2.91k
  if (SelectDirectAddr(N1, Addr)) {
917
2.54k
    Opcode = pickOpcodeForVT(
918
2.54k
        TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
919
2.54k
        NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
920
2.54k
        NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
921
2.54k
    if (!Opcode)
922
0
      return false;
923
2.54k
    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
924
2.54k
                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
925
2.54k
                      getI32Imm(fromTypeWidth, dl), Addr, Chain };
926
2.54k
    NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
927
2.54k
                                     MVT::Other, Ops);
928
2.54k
  } else 
if (370
PointerSize == 64 370
?
SelectADDRsi64(N1.getNode(), N1, Base, Offset)256
929
370
                               : 
SelectADDRsi(N1.getNode(), N1, Base, Offset)114
) {
930
60
    Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
931
60
                                 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
932
60
                                 NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
933
60
                                 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
934
60
    if (!Opcode)
935
0
      return false;
936
60
    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
937
60
                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
938
60
                      getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
939
60
    NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
940
60
                                     MVT::Other, Ops);
941
310
  } else if (PointerSize == 64 ? 
SelectADDRri64(N1.getNode(), N1, Base, Offset)201
942
310
                               : 
SelectADDRri(N1.getNode(), N1, Base, Offset)109
) {
943
107
    if (PointerSize == 64)
944
73
      Opcode = pickOpcodeForVT(
945
73
          TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
946
73
          NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
947
73
          NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
948
34
    else
949
34
      Opcode = pickOpcodeForVT(
950
34
          TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
951
34
          NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
952
34
          NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
953
107
    if (!Opcode)
954
0
      return false;
955
107
    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
956
107
                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
957
107
                      getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
958
107
    NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
959
107
                                     MVT::Other, Ops);
960
203
  } else {
961
203
    if (PointerSize == 64)
962
128
      Opcode = pickOpcodeForVT(
963
128
          TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
964
128
          NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
965
128
          NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
966
128
          NVPTX::LD_f64_areg_64);
967
75
    else
968
75
      Opcode = pickOpcodeForVT(
969
75
          TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
970
75
          NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
971
75
          NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
972
203
    if (!Opcode)
973
0
      return false;
974
203
    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
975
203
                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
976
203
                      getI32Imm(fromTypeWidth, dl), N1, Chain };
977
203
    NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
978
203
                                     MVT::Other, Ops);
979
203
  }
980
2.91k
981
2.91k
  if (!NVPTXLD)
982
0
    return false;
983
2.91k
984
2.91k
  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
985
2.91k
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXLD), {MemRef});
986
2.91k
987
2.91k
  ReplaceNode(N, NVPTXLD);
988
2.91k
  return true;
989
2.91k
}
990
991
185
bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
992
185
993
185
  SDValue Chain = N->getOperand(0);
994
185
  SDValue Op1 = N->getOperand(1);
995
185
  SDValue Addr, Offset, Base;
996
185
  Optional<unsigned> Opcode;
997
185
  SDLoc DL(N);
998
185
  SDNode *LD;
999
185
  MemSDNode *MemSD = cast<MemSDNode>(N);
1000
185
  EVT LoadedVT = MemSD->getMemoryVT();
1001
185
1002
185
  if (!LoadedVT.isSimple())
1003
0
    return false;
1004
185
1005
185
  // Address Space Setting
1006
185
  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
1007
185
  if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
1008
20
    return tryLDGLDU(N);
1009
20
  }
1010
165
1011
165
  unsigned int PointerSize =
1012
165
      CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
1013
165
1014
165
  // Volatile Setting
1015
165
  // - .volatile is only availalble for .global and .shared
1016
165
  bool IsVolatile = MemSD->isVolatile();
1017
165
  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1018
165
      
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED145
&&
1019
165
      
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC145
)
1020
116
    IsVolatile = false;
1021
165
1022
165
  // Vector Setting
1023
165
  MVT SimpleVT = LoadedVT.getSimpleVT();
1024
165
1025
165
  // Type Setting: fromType + fromTypeWidth
1026
165
  //
1027
165
  // Sign   : ISD::SEXTLOAD
1028
165
  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1029
165
  //          type is integer
1030
165
  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1031
165
  MVT ScalarVT = SimpleVT.getScalarType();
1032
165
  // Read at least 8 bits (predicates are stored as 8-bit values)
1033
165
  unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
1034
165
  unsigned int FromType;
1035
165
  // The last operand holds the original LoadSDNode::getExtensionType() value
1036
165
  unsigned ExtensionType = cast<ConstantSDNode>(
1037
165
      N->getOperand(N->getNumOperands() - 1))->getZExtValue();
1038
165
  if (ExtensionType == ISD::SEXTLOAD)
1039
0
    FromType = NVPTX::PTXLdStInstCode::Signed;
1040
165
  else if (ScalarVT.isFloatingPoint())
1041
70
    FromType = ScalarVT.SimpleTy == MVT::f16 ? 
NVPTX::PTXLdStInstCode::Untyped10
1042
70
                                             : 
NVPTX::PTXLdStInstCode::Float60
;
1043
95
  else
1044
95
    FromType = NVPTX::PTXLdStInstCode::Unsigned;
1045
165
1046
165
  unsigned VecType;
1047
165
1048
165
  switch (N->getOpcode()) {
1049
165
  case NVPTXISD::LoadV2:
1050
108
    VecType = NVPTX::PTXLdStInstCode::V2;
1051
108
    break;
1052
165
  case NVPTXISD::LoadV4:
1053
57
    VecType = NVPTX::PTXLdStInstCode::V4;
1054
57
    break;
1055
165
  default:
1056
0
    return false;
1057
165
  }
1058
165
1059
165
  EVT EltVT = N->getValueType(0);
1060
165
1061
165
  // v8f16 is a special case. PTX doesn't have ld.v8.f16
1062
165
  // instruction. Instead, we split the vector into v2f16 chunks and
1063
165
  // load them with ld.v4.b32.
1064
165
  if (EltVT == MVT::v2f16) {
1065
3
    assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1066
3
    EltVT = MVT::i32;
1067
3
    FromType = NVPTX::PTXLdStInstCode::Untyped;
1068
3
    FromTypeWidth = 32;
1069
3
  }
1070
165
1071
165
  if (SelectDirectAddr(Op1, Addr)) {
1072
99
    switch (N->getOpcode()) {
1073
99
    default:
1074
0
      return false;
1075
99
    case NVPTXISD::LoadV2:
1076
74
      Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1077
74
                               NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1078
74
                               NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1079
74
                               NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
1080
74
                               NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1081
74
      break;
1082
99
    case NVPTXISD::LoadV4:
1083
25
      Opcode =
1084
25
          pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1085
25
                          NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None,
1086
25
                          NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
1087
25
                          NVPTX::LDV_f32_v4_avar, None);
1088
25
      break;
1089
99
    }
1090
99
    if (!Opcode)
1091
0
      return false;
1092
99
    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1093
99
                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1094
99
                      getI32Imm(FromTypeWidth, DL), Addr, Chain };
1095
99
    LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1096
99
  } else 
if (66
PointerSize == 64
1097
66
                 ? 
SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)38
1098
66
                 : 
SelectADDRsi(Op1.getNode(), Op1, Base, Offset)28
) {
1099
17
    switch (N->getOpcode()) {
1100
17
    default:
1101
0
      return false;
1102
17
    case NVPTXISD::LoadV2:
1103
5
      Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1104
5
                               NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1105
5
                               NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1106
5
                               NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
1107
5
                               NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1108
5
      break;
1109
17
    case NVPTXISD::LoadV4:
1110
12
      Opcode =
1111
12
          pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1112
12
                          NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None,
1113
12
                          NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
1114
12
                          NVPTX::LDV_f32_v4_asi, None);
1115
12
      break;
1116
17
    }
1117
17
    if (!Opcode)
1118
0
      return false;
1119
17
    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1120
17
                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1121
17
                      getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1122
17
    LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1123
49
  } else if (PointerSize == 64
1124
49
                 ? 
SelectADDRri64(Op1.getNode(), Op1, Base, Offset)32
1125
49
                 : 
SelectADDRri(Op1.getNode(), Op1, Base, Offset)17
) {
1126
8
    if (PointerSize == 64) {
1127
3
      switch (N->getOpcode()) {
1128
3
      default:
1129
0
        return false;
1130
3
      case NVPTXISD::LoadV2:
1131
3
        Opcode = pickOpcodeForVT(
1132
3
            EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
1133
3
            NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
1134
3
            NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
1135
3
            NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
1136
3
            NVPTX::LDV_f64_v2_ari_64);
1137
3
        break;
1138
3
      case NVPTXISD::LoadV4:
1139
0
        Opcode = pickOpcodeForVT(
1140
0
            EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1141
0
            NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None,
1142
0
            NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
1143
0
            NVPTX::LDV_f32_v4_ari_64, None);
1144
0
        break;
1145
5
      }
1146
5
    } else {
1147
5
      switch (N->getOpcode()) {
1148
5
      default:
1149
0
        return false;
1150
5
      case NVPTXISD::LoadV2:
1151
3
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1152
3
                                 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1153
3
                                 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1154
3
                                 NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
1155
3
                                 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1156
3
        break;
1157
5
      case NVPTXISD::LoadV4:
1158
2
        Opcode =
1159
2
            pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1160
2
                            NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None,
1161
2
                            NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
1162
2
                            NVPTX::LDV_f32_v4_ari, None);
1163
2
        break;
1164
8
      }
1165
8
    }
1166
8
    if (!Opcode)
1167
0
      return false;
1168
8
    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1169
8
                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1170
8
                      getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1171
8
1172
8
    LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1173
41
  } else {
1174
41
    if (PointerSize == 64) {
1175
29
      switch (N->getOpcode()) {
1176
29
      default:
1177
0
        return false;
1178
29
      case NVPTXISD::LoadV2:
1179
15
        Opcode = pickOpcodeForVT(
1180
15
            EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1181
15
            NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1182
15
            NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
1183
15
            NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1184
15
            NVPTX::LDV_f64_v2_areg_64);
1185
15
        break;
1186
29
      case NVPTXISD::LoadV4:
1187
14
        Opcode = pickOpcodeForVT(
1188
14
            EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1189
14
            NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None,
1190
14
            NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
1191
14
            NVPTX::LDV_f32_v4_areg_64, None);
1192
14
        break;
1193
12
      }
1194
12
    } else {
1195
12
      switch (N->getOpcode()) {
1196
12
      default:
1197
0
        return false;
1198
12
      case NVPTXISD::LoadV2:
1199
8
        Opcode =
1200
8
            pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1201
8
                            NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1202
8
                            NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
1203
8
                            NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
1204
8
                            NVPTX::LDV_f64_v2_areg);
1205
8
        break;
1206
12
      case NVPTXISD::LoadV4:
1207
4
        Opcode = pickOpcodeForVT(
1208
4
            EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1209
4
            NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None,
1210
4
            NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
1211
4
            NVPTX::LDV_f32_v4_areg, None);
1212
4
        break;
1213
41
      }
1214
41
    }
1215
41
    if (!Opcode)
1216
0
      return false;
1217
41
    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1218
41
                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1219
41
                      getI32Imm(FromTypeWidth, DL), Op1, Chain };
1220
41
    LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1221
41
  }
1222
165
1223
165
  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1224
165
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
1225
165
1226
165
  ReplaceNode(N, LD);
1227
165
  return true;
1228
165
}
1229
1230
42
bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1231
42
1232
42
  SDValue Chain = N->getOperand(0);
1233
42
  SDValue Op1;
1234
42
  MemSDNode *Mem;
1235
42
  bool IsLDG = true;
1236
42
1237
42
  // If this is an LDG intrinsic, the address is the third operand. If its an
1238
42
  // LDG/LDU SD node (from custom vector handling), then its the second operand
1239
42
  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1240
7
    Op1 = N->getOperand(2);
1241
7
    Mem = cast<MemIntrinsicSDNode>(N);
1242
7
    unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1243
7
    switch (IID) {
1244
7
    default:
1245
0
      return false;
1246
7
    case Intrinsic::nvvm_ldg_global_f:
1247
2
    case Intrinsic::nvvm_ldg_global_i:
1248
2
    case Intrinsic::nvvm_ldg_global_p:
1249
2
      IsLDG = true;
1250
2
      break;
1251
5
    case Intrinsic::nvvm_ldu_global_f:
1252
5
    case Intrinsic::nvvm_ldu_global_i:
1253
5
    case Intrinsic::nvvm_ldu_global_p:
1254
5
      IsLDG = false;
1255
5
      break;
1256
35
    }
1257
35
  } else {
1258
35
    Op1 = N->getOperand(1);
1259
35
    Mem = cast<MemSDNode>(N);
1260
35
  }
1261
42
1262
42
  Optional<unsigned> Opcode;
1263
42
  SDLoc DL(N);
1264
42
  SDNode *LD;
1265
42
  SDValue Base, Offset, Addr;
1266
42
1267
42
  EVT EltVT = Mem->getMemoryVT();
1268
42
  unsigned NumElts = 1;
1269
42
  if (EltVT.isVector()) {
1270
21
    NumElts = EltVT.getVectorNumElements();
1271
21
    EltVT = EltVT.getVectorElementType();
1272
21
    // vectors of f16 are loaded/stored as multiples of v2f16 elements.
1273
21
    if (EltVT == MVT::f16 && 
N->getValueType(0) == MVT::v2f163
) {
1274
2
      assert(NumElts % 2 == 0 && "Vector must have even number of elements");
1275
2
      EltVT = MVT::v2f16;
1276
2
      NumElts /= 2;
1277
2
    }
1278
21
  }
1279
42
1280
42
  // Build the "promoted" result VTList for the load. If we are really loading
1281
42
  // i8s, then the return type will be promoted to i16 since we do not expose
1282
42
  // 8-bit registers in NVPTX.
1283
42
  EVT NodeVT = (EltVT == MVT::i8) ? 
MVT::i169
:
EltVT33
;
1284
42
  SmallVector<EVT, 5> InstVTs;
1285
120
  for (unsigned i = 0; i != NumElts; 
++i78
) {
1286
78
    InstVTs.push_back(NodeVT);
1287
78
  }
1288
42
  InstVTs.push_back(MVT::Other);
1289
42
  SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1290
42
1291
42
  if (SelectDirectAddr(Op1, Addr)) {
1292
3
    switch (N->getOpcode()) {
1293
3
    default:
1294
0
      return false;
1295
3
    case ISD::LOAD:
1296
1
    case ISD::INTRINSIC_W_CHAIN:
1297
1
      if (IsLDG)
1298
1
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1299
1
                                     NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1300
1
                                     NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1301
1
                                     NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1302
1
                                     NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1303
1
                                     NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
1304
1
                                     NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
1305
1
                                     NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1306
1
                                     NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1307
0
      else
1308
0
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1309
0
                                     NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1310
0
                                     NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1311
0
                                     NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1312
0
                                     NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1313
0
                                     NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
1314
0
                                     NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
1315
0
                                     NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1316
0
                                     NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1317
1
      break;
1318
1
    case NVPTXISD::LoadV2:
1319
1
    case NVPTXISD::LDGV2:
1320
1
      Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1321
1
                                   NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1322
1
                                   NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1323
1
                                   NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1324
1
                                   NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1325
1
                                   NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
1326
1
                                   NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
1327
1
                                   NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1328
1
                                   NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1329
1
      break;
1330
1
    case NVPTXISD::LDUV2:
1331
0
      Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1332
0
                                   NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1333
0
                                   NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1334
0
                                   NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1335
0
                                   NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1336
0
                                   NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
1337
0
                                   NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
1338
0
                                   NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1339
0
                                   NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1340
0
      break;
1341
1
    case NVPTXISD::LoadV4:
1342
1
    case NVPTXISD::LDGV4:
1343
1
      Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1344
1
                               NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1345
1
                               NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1346
1
                               NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None,
1347
1
                               NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
1348
1
                               NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
1349
1
                               NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None);
1350
1
      break;
1351
1
    case NVPTXISD::LDUV4:
1352
0
      Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1353
0
                               NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1354
0
                               NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1355
0
                               NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None,
1356
0
                               NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
1357
0
                               NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
1358
0
                               NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None);
1359
0
      break;
1360
3
    }
1361
3
    if (!Opcode)
1362
0
      return false;
1363
3
    SDValue Ops[] = { Addr, Chain };
1364
3
    LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1365
39
  } else if (TM.is64Bit() ? 
SelectADDRri64(Op1.getNode(), Op1, Base, Offset)27
1366
39
                          : 
SelectADDRri(Op1.getNode(), Op1, Base, Offset)12
) {
1367
5
    if (TM.is64Bit()) {
1368
2
      switch (N->getOpcode()) {
1369
2
      default:
1370
0
        return false;
1371
2
      case ISD::LOAD:
1372
1
      case ISD::INTRINSIC_W_CHAIN:
1373
1
        if (IsLDG)
1374
1
          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1375
1
                                       NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1376
1
                                       NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1377
1
                                       NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1378
1
                                       NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1379
1
                                       NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
1380
1
                                       NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
1381
1
                                       NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1382
1
                                       NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1383
0
        else
1384
0
          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1385
0
                                       NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1386
0
                                       NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1387
0
                                       NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1388
0
                                       NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1389
0
                                       NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
1390
0
                                       NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
1391
0
                                       NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1392
0
                                       NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1393
1
        break;
1394
1
      case NVPTXISD::LoadV2:
1395
1
      case NVPTXISD::LDGV2:
1396
1
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1397
1
                                     NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1398
1
                                     NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1399
1
                                     NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1400
1
                                     NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1401
1
                                     NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
1402
1
                                     NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
1403
1
                                     NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1404
1
                                     NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1405
1
        break;
1406
1
      case NVPTXISD::LDUV2:
1407
0
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1408
0
                                     NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1409
0
                                     NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1410
0
                                     NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1411
0
                                     NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1412
0
                                     NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
1413
0
                                     NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
1414
0
                                     NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1415
0
                                     NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1416
0
        break;
1417
1
      case NVPTXISD::LoadV4:
1418
0
      case NVPTXISD::LDGV4:
1419
0
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1420
0
                                 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1421
0
                                 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1422
0
                                 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None,
1423
0
                                 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
1424
0
                                 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
1425
0
                                 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None);
1426
0
        break;
1427
0
      case NVPTXISD::LDUV4:
1428
0
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1429
0
                                 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1430
0
                                 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1431
0
                                 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None,
1432
0
                                 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
1433
0
                                 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
1434
0
                                 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None);
1435
0
        break;
1436
3
      }
1437
3
    } else {
1438
3
      switch (N->getOpcode()) {
1439
3
      default:
1440
0
        return false;
1441
3
      case ISD::LOAD:
1442
3
      case ISD::INTRINSIC_W_CHAIN:
1443
3
        if (IsLDG)
1444
1
          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1445
1
                                       NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1446
1
                                       NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1447
1
                                       NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1448
1
                                       NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1449
1
                                       NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
1450
1
                                       NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
1451
1
                                       NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1452
1
                                       NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1453
2
        else
1454
2
          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1455
2
                                       NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1456
2
                                       NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1457
2
                                       NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1458
2
                                       NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1459
2
                                       NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
1460
2
                                       NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
1461
2
                                       NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1462
2
                                       NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1463
3
        break;
1464
3
      case NVPTXISD::LoadV2:
1465
0
      case NVPTXISD::LDGV2:
1466
0
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1467
0
                                     NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1468
0
                                     NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1469
0
                                     NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1470
0
                                     NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1471
0
                                     NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
1472
0
                                     NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
1473
0
                                     NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1474
0
                                     NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1475
0
        break;
1476
0
      case NVPTXISD::LDUV2:
1477
0
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1478
0
                                     NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1479
0
                                     NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1480
0
                                     NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1481
0
                                     NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1482
0
                                     NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
1483
0
                                     NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
1484
0
                                     NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1485
0
                                     NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1486
0
        break;
1487
0
      case NVPTXISD::LoadV4:
1488
0
      case NVPTXISD::LDGV4:
1489
0
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1490
0
                                 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1491
0
                                 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1492
0
                                 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None,
1493
0
                                 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
1494
0
                                 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
1495
0
                                 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None);
1496
0
        break;
1497
0
      case NVPTXISD::LDUV4:
1498
0
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1499
0
                                 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1500
0
                                 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1501
0
                                 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None,
1502
0
                                 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
1503
0
                                 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
1504
0
                                 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None);
1505
0
        break;
1506
5
      }
1507
5
    }
1508
5
    if (!Opcode)
1509
0
      return false;
1510
5
    SDValue Ops[] = {Base, Offset, Chain};
1511
5
    LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1512
34
  } else {
1513
34
    if (TM.is64Bit()) {
1514
25
      switch (N->getOpcode()) {
1515
25
      default:
1516
0
        return false;
1517
25
      case ISD::LOAD:
1518
10
      case ISD::INTRINSIC_W_CHAIN:
1519
10
        if (IsLDG)
1520
10
          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1521
10
                                       NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1522
10
                                       NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1523
10
                                       NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1524
10
                                       NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1525
10
                                       NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
1526
10
                                       NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
1527
10
                                       NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1528
10
                                       NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1529
0
        else
1530
0
          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1531
0
                                       NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1532
0
                                       NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1533
0
                                       NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1534
0
                                       NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1535
0
                                       NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
1536
0
                                       NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
1537
0
                                       NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1538
0
                                       NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1539
10
        break;
1540
10
      case NVPTXISD::LoadV2:
1541
8
      case NVPTXISD::LDGV2:
1542
8
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1543
8
                                     NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1544
8
                                     NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1545
8
                                     NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1546
8
                                     NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1547
8
                                     NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
1548
8
                                     NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
1549
8
                                     NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1550
8
                                     NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1551
8
        break;
1552
8
      case NVPTXISD::LDUV2:
1553
0
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1554
0
                                     NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1555
0
                                     NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1556
0
                                     NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1557
0
                                     NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1558
0
                                     NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
1559
0
                                     NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
1560
0
                                     NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1561
0
                                     NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1562
0
        break;
1563
8
      case NVPTXISD::LoadV4:
1564
7
      case NVPTXISD::LDGV4:
1565
7
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1566
7
                                 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1567
7
                                 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1568
7
                                 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None,
1569
7
                                 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
1570
7
                                 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
1571
7
                                 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None);
1572
7
        break;
1573
7
      case NVPTXISD::LDUV4:
1574
0
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1575
0
                                 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1576
0
                                 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1577
0
                                 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None,
1578
0
                                 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
1579
0
                                 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
1580
0
                                 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None);
1581
0
        break;
1582
9
      }
1583
9
    } else {
1584
9
      switch (N->getOpcode()) {
1585
9
      default:
1586
0
        return false;
1587
9
      case ISD::LOAD:
1588
7
      case ISD::INTRINSIC_W_CHAIN:
1589
7
        if (IsLDG)
1590
4
          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1591
4
                                   NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1592
4
                                   NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1593
4
                                   NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1594
4
                                   NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1595
4
                                   NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
1596
4
                                   NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
1597
4
                                   NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1598
4
                                   NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1599
3
        else
1600
3
          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1601
3
                                   NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1602
3
                                   NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1603
3
                                   NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1604
3
                                   NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1605
3
                                   NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
1606
3
                                   NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
1607
3
                                   NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1608
3
                                   NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1609
7
        break;
1610
7
      case NVPTXISD::LoadV2:
1611
2
      case NVPTXISD::LDGV2:
1612
2
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1613
2
                                 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1614
2
                                 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1615
2
                                 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1616
2
                                 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1617
2
                                 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
1618
2
                                 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
1619
2
                                 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1620
2
                                 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1621
2
        break;
1622
2
      case NVPTXISD::LDUV2:
1623
0
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1624
0
                                 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1625
0
                                 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1626
0
                                 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1627
0
                                 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1628
0
                                 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
1629
0
                                 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
1630
0
                                 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1631
0
                                 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1632
0
        break;
1633
2
      case NVPTXISD::LoadV4:
1634
0
      case NVPTXISD::LDGV4:
1635
0
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1636
0
                                 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1637
0
                                 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1638
0
                                 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None,
1639
0
                                 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
1640
0
                                 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
1641
0
                                 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None);
1642
0
        break;
1643
0
      case NVPTXISD::LDUV4:
1644
0
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1645
0
                                 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1646
0
                                 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1647
0
                                 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None,
1648
0
                                 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
1649
0
                                 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
1650
0
                                 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None);
1651
0
        break;
1652
34
      }
1653
34
    }
1654
34
    if (!Opcode)
1655
0
      return false;
1656
34
    SDValue Ops[] = { Op1, Chain };
1657
34
    LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1658
34
  }
1659
42
1660
42
  MachineMemOperand *MemRef = Mem->getMemOperand();
1661
42
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
1662
42
1663
42
  // For automatic generation of LDG (through SelectLoad[Vector], not the
1664
42
  // intrinsics), we may have an extending load like:
1665
42
  //
1666
42
  //   i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1667
42
  //
1668
42
  // In this case, the matching logic above will select a load for the original
1669
42
  // memory type (in this case, i8) and our types will not match (the node needs
1670
42
  // to return an i32 in this case). Our LDG/LDU nodes do not support the
1671
42
  // concept of sign-/zero-extension, so emulate it here by adding an explicit
1672
42
  // CVT instruction. Ptxas should clean up any redundancies here.
1673
42
1674
42
  EVT OrigType = N->getValueType(0);
1675
42
  LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
1676
42
1677
42
  if (OrigType != EltVT && 
LdNode10
) {
1678
3
    // We have an extending-load. The instruction we selected operates on the
1679
3
    // smaller type, but the SDNode we are replacing has the larger type. We
1680
3
    // need to emit a CVT to make the types match.
1681
3
    bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
1682
3
    unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
1683
3
                                       EltVT.getSimpleVT(), IsSigned);
1684
3
1685
3
    // For each output value, apply the manual sign/zero-extension and make sure
1686
3
    // all users of the load go through that CVT.
1687
6
    for (unsigned i = 0; i != NumElts; 
++i3
) {
1688
3
      SDValue Res(LD, i);
1689
3
      SDValue OrigVal(N, i);
1690
3
1691
3
      SDNode *CvtNode =
1692
3
        CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
1693
3
                               CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
1694
3
                                                         DL, MVT::i32));
1695
3
      ReplaceUses(OrigVal, SDValue(CvtNode, 0));
1696
3
    }
1697
3
  }
1698
42
1699
42
  ReplaceNode(N, LD);
1700
42
  return true;
1701
42
}
1702
1703
403
bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1704
403
  SDLoc dl(N);
1705
403
  MemSDNode *ST = cast<MemSDNode>(N);
1706
403
  assert(ST->writeMem() && "Expected store");
1707
403
  StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N);
1708
403
  AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N);
1709
403
  assert((PlainStore || AtomicStore) && "Expected store");
1710
403
  EVT StoreVT = ST->getMemoryVT();
1711
403
  SDNode *NVPTXST = nullptr;
1712
403
1713
403
  // do not support pre/post inc/dec
1714
403
  if (PlainStore && 
PlainStore->isIndexed()398
)
1715
0
    return false;
1716
403
1717
403
  if (!StoreVT.isSimple())
1718
0
    return false;
1719
403
1720
403
  AtomicOrdering Ordering = ST->getOrdering();
1721
403
  // In order to lower atomic loads with stronger guarantees we would need to
1722
403
  // use store.release or insert fences. However these features were only added
1723
403
  // with PTX ISA 6.0 / sm_70.
1724
403
  // TODO: Check if we can actually use the new instructions and implement them.
1725
403
  if (isStrongerThanMonotonic(Ordering))
1726
0
    return false;
1727
403
1728
403
  // Address Space Setting
1729
403
  unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
1730
403
  unsigned int PointerSize =
1731
403
      CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
1732
403
1733
403
  // Volatile Setting
1734
403
  // - .volatile is only available for .global and .shared
1735
403
  // - .volatile has the same memory synchronization semantics as .relaxed.sys
1736
403
  bool isVolatile = ST->isVolatile() || 
Ordering == AtomicOrdering::Monotonic395
;
1737
403
  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1738
403
      
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED321
&&
1739
403
      
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC289
)
1740
33
    isVolatile = false;
1741
403
1742
403
  // Vector Setting
1743
403
  MVT SimpleVT = StoreVT.getSimpleVT();
1744
403
  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1745
403
1746
403
  // Type Setting: toType + toTypeWidth
1747
403
  // - for integer type, always use 'u'
1748
403
  //
1749
403
  MVT ScalarVT = SimpleVT.getScalarType();
1750
403
  unsigned toTypeWidth = ScalarVT.getSizeInBits();
1751
403
  if (SimpleVT.isVector()) {
1752
0
    assert(StoreVT == MVT::v2f16 && "Unexpected vector type");
1753
0
    // v2f16 is stored using st.b32
1754
0
    toTypeWidth = 32;
1755
0
  }
1756
403
1757
403
  unsigned int toType;
1758
403
  if (ScalarVT.isFloatingPoint())
1759
113
    // f16 uses .b16 as its storage type.
1760
113
    toType = ScalarVT.SimpleTy == MVT::f16 ? 
NVPTX::PTXLdStInstCode::Untyped13
1761
113
                                           : 
NVPTX::PTXLdStInstCode::Float100
;
1762
290
  else
1763
290
    toType = NVPTX::PTXLdStInstCode::Unsigned;
1764
403
1765
403
  // Create the machine instruction DAG
1766
403
  SDValue Chain = ST->getChain();
1767
403
  SDValue Value = PlainStore ? 
PlainStore->getValue()398
:
AtomicStore->getVal()5
;
1768
403
  SDValue BasePtr = ST->getBasePtr();
1769
403
  SDValue Addr;
1770
403
  SDValue Offset, Base;
1771
403
  Optional<unsigned> Opcode;
1772
403
  MVT::SimpleValueType SourceVT =
1773
403
      Value.getNode()->getSimpleValueType(0).SimpleTy;
1774
403
1775
403
  if (SelectDirectAddr(BasePtr, Addr)) {
1776
6
    Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1777
6
                             NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1778
6
                             NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
1779
6
                             NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1780
6
    if (!Opcode)
1781
0
      return false;
1782
6
    SDValue Ops[] = {Value,
1783
6
                     getI32Imm(isVolatile, dl),
1784
6
                     getI32Imm(CodeAddrSpace, dl),
1785
6
                     getI32Imm(vecType, dl),
1786
6
                     getI32Imm(toType, dl),
1787
6
                     getI32Imm(toTypeWidth, dl),
1788
6
                     Addr,
1789
6
                     Chain};
1790
6
    NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1791
397
  } else if (PointerSize == 64
1792
397
                 ? 
SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset)207
1793
397
                 : 
SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)190
) {
1794
7
    Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1795
7
                             NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1796
7
                             NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
1797
7
                             NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1798
7
    if (!Opcode)
1799
0
      return false;
1800
7
    SDValue Ops[] = {Value,
1801
7
                     getI32Imm(isVolatile, dl),
1802
7
                     getI32Imm(CodeAddrSpace, dl),
1803
7
                     getI32Imm(vecType, dl),
1804
7
                     getI32Imm(toType, dl),
1805
7
                     getI32Imm(toTypeWidth, dl),
1806
7
                     Base,
1807
7
                     Offset,
1808
7
                     Chain};
1809
7
    NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1810
390
  } else if (PointerSize == 64
1811
390
                 ? 
SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)204
1812
390
                 : 
SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)186
) {
1813
112
    if (PointerSize == 64)
1814
68
      Opcode = pickOpcodeForVT(
1815
68
          SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1816
68
          NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
1817
68
          NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1818
44
    else
1819
44
      Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1820
44
                               NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1821
44
                               NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
1822
44
                               NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1823
112
    if (!Opcode)
1824
0
      return false;
1825
112
1826
112
    SDValue Ops[] = {Value,
1827
112
                     getI32Imm(isVolatile, dl),
1828
112
                     getI32Imm(CodeAddrSpace, dl),
1829
112
                     getI32Imm(vecType, dl),
1830
112
                     getI32Imm(toType, dl),
1831
112
                     getI32Imm(toTypeWidth, dl),
1832
112
                     Base,
1833
112
                     Offset,
1834
112
                     Chain};
1835
112
    NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1836
278
  } else {
1837
278
    if (PointerSize == 64)
1838
136
      Opcode =
1839
136
          pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1840
136
                          NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1841
136
                          NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
1842
136
                          NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1843
142
    else
1844
142
      Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1845
142
                               NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1846
142
                               NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
1847
142
                               NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1848
278
    if (!Opcode)
1849
0
      return false;
1850
278
    SDValue Ops[] = {Value,
1851
278
                     getI32Imm(isVolatile, dl),
1852
278
                     getI32Imm(CodeAddrSpace, dl),
1853
278
                     getI32Imm(vecType, dl),
1854
278
                     getI32Imm(toType, dl),
1855
278
                     getI32Imm(toTypeWidth, dl),
1856
278
                     BasePtr,
1857
278
                     Chain};
1858
278
    NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1859
278
  }
1860
403
1861
403
  if (!NVPTXST)
1862
0
    return false;
1863
403
1864
403
  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1865
403
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXST), {MemRef});
1866
403
  ReplaceNode(N, NVPTXST);
1867
403
  return true;
1868
403
}
1869
1870
59
bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1871
59
  SDValue Chain = N->getOperand(0);
1872
59
  SDValue Op1 = N->getOperand(1);
1873
59
  SDValue Addr, Offset, Base;
1874
59
  Optional<unsigned> Opcode;
1875
59
  SDLoc DL(N);
1876
59
  SDNode *ST;
1877
59
  EVT EltVT = Op1.getValueType();
1878
59
  MemSDNode *MemSD = cast<MemSDNode>(N);
1879
59
  EVT StoreVT = MemSD->getMemoryVT();
1880
59
1881
59
  // Address Space Setting
1882
59
  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
1883
59
  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1884
0
    report_fatal_error("Cannot store to pointer that points to constant "
1885
0
                       "memory space");
1886
0
  }
1887
59
  unsigned int PointerSize =
1888
59
      CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
1889
59
1890
59
  // Volatile Setting
1891
59
  // - .volatile is only availalble for .global and .shared
1892
59
  bool IsVolatile = MemSD->isVolatile();
1893
59
  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1894
59
      
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED29
&&
1895
59
      
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC29
)
1896
0
    IsVolatile = false;
1897
59
1898
59
  // Type Setting: toType + toTypeWidth
1899
59
  // - for integer type, always use 'u'
1900
59
  assert(StoreVT.isSimple() && "Store value is not simple");
1901
59
  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1902
59
  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1903
59
  unsigned ToType;
1904
59
  if (ScalarVT.isFloatingPoint())
1905
34
    ToType = ScalarVT.SimpleTy == MVT::f16 ? 
NVPTX::PTXLdStInstCode::Untyped9
1906
34
                                           : 
NVPTX::PTXLdStInstCode::Float25
;
1907
25
  else
1908
25
    ToType = NVPTX::PTXLdStInstCode::Unsigned;
1909
59
1910
59
  SmallVector<SDValue, 12> StOps;
1911
59
  SDValue N2;
1912
59
  unsigned VecType;
1913
59
1914
59
  switch (N->getOpcode()) {
1915
59
  case NVPTXISD::StoreV2:
1916
35
    VecType = NVPTX::PTXLdStInstCode::V2;
1917
35
    StOps.push_back(N->getOperand(1));
1918
35
    StOps.push_back(N->getOperand(2));
1919
35
    N2 = N->getOperand(3);
1920
35
    break;
1921
59
  case NVPTXISD::StoreV4:
1922
24
    VecType = NVPTX::PTXLdStInstCode::V4;
1923
24
    StOps.push_back(N->getOperand(1));
1924
24
    StOps.push_back(N->getOperand(2));
1925
24
    StOps.push_back(N->getOperand(3));
1926
24
    StOps.push_back(N->getOperand(4));
1927
24
    N2 = N->getOperand(5);
1928
24
    break;
1929
59
  default:
1930
0
    return false;
1931
59
  }
1932
59
1933
59
  // v8f16 is a special case. PTX doesn't have st.v8.f16
1934
59
  // instruction. Instead, we split the vector into v2f16 chunks and
1935
59
  // store them with st.v4.b32.
1936
59
  if (EltVT == MVT::v2f16) {
1937
3
    assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1938
3
    EltVT = MVT::i32;
1939
3
    ToType = NVPTX::PTXLdStInstCode::Untyped;
1940
3
    ToTypeWidth = 32;
1941
3
  }
1942
59
1943
59
  StOps.push_back(getI32Imm(IsVolatile, DL));
1944
59
  StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1945
59
  StOps.push_back(getI32Imm(VecType, DL));
1946
59
  StOps.push_back(getI32Imm(ToType, DL));
1947
59
  StOps.push_back(getI32Imm(ToTypeWidth, DL));
1948
59
1949
59
  if (SelectDirectAddr(N2, Addr)) {
1950
0
    switch (N->getOpcode()) {
1951
0
    default:
1952
0
      return false;
1953
0
    case NVPTXISD::StoreV2:
1954
0
      Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1955
0
                               NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1956
0
                               NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1957
0
                               NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
1958
0
                               NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1959
0
      break;
1960
0
    case NVPTXISD::StoreV4:
1961
0
      Opcode =
1962
0
          pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar,
1963
0
                          NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None,
1964
0
                          NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
1965
0
                          NVPTX::STV_f32_v4_avar, None);
1966
0
      break;
1967
0
    }
1968
0
    StOps.push_back(Addr);
1969
59
  } else if (PointerSize == 64 ? 
SelectADDRsi64(N2.getNode(), N2, Base, Offset)42
1970
59
                               : 
SelectADDRsi(N2.getNode(), N2, Base, Offset)17
) {
1971
0
    switch (N->getOpcode()) {
1972
0
    default:
1973
0
      return false;
1974
0
    case NVPTXISD::StoreV2:
1975
0
      Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1976
0
                               NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1977
0
                               NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1978
0
                               NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
1979
0
                               NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1980
0
      break;
1981
0
    case NVPTXISD::StoreV4:
1982
0
      Opcode =
1983
0
          pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
1984
0
                          NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None,
1985
0
                          NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
1986
0
                          NVPTX::STV_f32_v4_asi, None);
1987
0
      break;
1988
0
    }
1989
0
    StOps.push_back(Base);
1990
0
    StOps.push_back(Offset);
1991
59
  } else if (PointerSize == 64 ? 
SelectADDRri64(N2.getNode(), N2, Base, Offset)42
1992
59
                               : 
SelectADDRri(N2.getNode(), N2, Base, Offset)17
) {
1993
7
    if (PointerSize == 64) {
1994
5
      switch (N->getOpcode()) {
1995
5
      default:
1996
0
        return false;
1997
5
      case NVPTXISD::StoreV2:
1998
5
        Opcode = pickOpcodeForVT(
1999
5
            EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
2000
5
            NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
2001
5
            NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
2002
5
            NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
2003
5
            NVPTX::STV_f64_v2_ari_64);
2004
5
        break;
2005
5
      case NVPTXISD::StoreV4:
2006
0
        Opcode = pickOpcodeForVT(
2007
0
            EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
2008
0
            NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None,
2009
0
            NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
2010
0
            NVPTX::STV_f32_v4_ari_64, None);
2011
0
        break;
2012
2
      }
2013
2
    } else {
2014
2
      switch (N->getOpcode()) {
2015
2
      default:
2016
0
        return false;
2017
2
      case NVPTXISD::StoreV2:
2018
0
        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
2019
0
                                 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
2020
0
                                 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
2021
0
                                 NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
2022
0
                                 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
2023
0
        break;
2024
2
      case NVPTXISD::StoreV4:
2025
2
        Opcode =
2026
2
            pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari,
2027
2
                            NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None,
2028
2
                            NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
2029
2
                            NVPTX::STV_f32_v4_ari, None);
2030
2
        break;
2031
7
      }
2032
7
    }
2033
7
    StOps.push_back(Base);
2034
7
    StOps.push_back(Offset);
2035
52
  } else {
2036
52
    if (PointerSize == 64) {
2037
37
      switch (N->getOpcode()) {
2038
37
      default:
2039
0
        return false;
2040
37
      case NVPTXISD::StoreV2:
2041
22
        Opcode = pickOpcodeForVT(
2042
22
            EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
2043
22
            NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
2044
22
            NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
2045
22
            NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
2046
22
            NVPTX::STV_f64_v2_areg_64);
2047
22
        break;
2048
37
      case NVPTXISD::StoreV4:
2049
15
        Opcode = pickOpcodeForVT(
2050
15
            EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
2051
15
            NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None,
2052
15
            NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
2053
15
            NVPTX::STV_f32_v4_areg_64, None);
2054
15
        break;
2055
15
      }
2056
15
    } else {
2057
15
      switch (N->getOpcode()) {
2058
15
      default:
2059
0
        return false;
2060
15
      case NVPTXISD::StoreV2:
2061
8
        Opcode =
2062
8
            pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
2063
8
                            NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
2064
8
                            NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
2065
8
                            NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
2066
8
                            NVPTX::STV_f64_v2_areg);
2067
8
        break;
2068
15
      case NVPTXISD::StoreV4:
2069
7
        Opcode =
2070
7
            pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
2071
7
                            NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None,
2072
7
                            NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
2073
7
                            NVPTX::STV_f32_v4_areg, None);
2074
7
        break;
2075
52
      }
2076
52
    }
2077
52
    StOps.push_back(N2);
2078
52
  }
2079
59
2080
59
  if (!Opcode)
2081
0
    return false;
2082
59
2083
59
  StOps.push_back(Chain);
2084
59
2085
59
  ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
2086
59
2087
59
  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2088
59
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef});
2089
59
2090
59
  ReplaceNode(N, ST);
2091
59
  return true;
2092
59
}
2093
2094
158
bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2095
158
  SDValue Chain = Node->getOperand(0);
2096
158
  SDValue Offset = Node->getOperand(2);
2097
158
  SDValue Flag = Node->getOperand(3);
2098
158
  SDLoc DL(Node);
2099
158
  MemSDNode *Mem = cast<MemSDNode>(Node);
2100
158
2101
158
  unsigned VecSize;
2102
158
  switch (Node->getOpcode()) {
2103
158
  default:
2104
0
    return false;
2105
158
  case NVPTXISD::LoadParam:
2106
117
    VecSize = 1;
2107
117
    break;
2108
158
  case NVPTXISD::LoadParamV2:
2109
21
    VecSize = 2;
2110
21
    break;
2111
158
  case NVPTXISD::LoadParamV4:
2112
20
    VecSize = 4;
2113
20
    break;
2114
158
  }
2115
158
2116
158
  EVT EltVT = Node->getValueType(0);
2117
158
  EVT MemVT = Mem->getMemoryVT();
2118
158
2119
158
  Optional<unsigned> Opcode;
2120
158
2121
158
  switch (VecSize) {
2122
158
  default:
2123
0
    return false;
2124
158
  case 1:
2125
117
    Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2126
117
                             NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2127
117
                             NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2128
117
                             NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
2129
117
                             NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2130
117
    break;
2131
158
  case 2:
2132
21
    Opcode =
2133
21
        pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2134
21
                        NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2135
21
                        NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
2136
21
                        NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
2137
21
                        NVPTX::LoadParamMemV2F64);
2138
21
    break;
2139
158
  case 4:
2140
20
    Opcode = pickOpcodeForVT(
2141
20
        MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2142
20
        NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None,
2143
20
        NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
2144
20
        NVPTX::LoadParamMemV4F32, None);
2145
20
    break;
2146
158
  }
2147
158
  if (!Opcode)
2148
0
    return false;
2149
158
2150
158
  SDVTList VTs;
2151
158
  if (VecSize == 1) {
2152
117
    VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2153
117
  } else 
if (41
VecSize == 241
) {
2154
21
    VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2155
21
  } else {
2156
20
    EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2157
20
    VTs = CurDAG->getVTList(EVTs);
2158
20
  }
2159
158
2160
158
  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2161
158
2162
158
  SmallVector<SDValue, 2> Ops;
2163
158
  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2164
158
  Ops.push_back(Chain);
2165
158
  Ops.push_back(Flag);
2166
158
2167
158
  ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops));
2168
158
  return true;
2169
158
}
2170
2171
1.37k
bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2172
1.37k
  SDLoc DL(N);
2173
1.37k
  SDValue Chain = N->getOperand(0);
2174
1.37k
  SDValue Offset = N->getOperand(1);
2175
1.37k
  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2176
1.37k
  MemSDNode *Mem = cast<MemSDNode>(N);
2177
1.37k
2178
1.37k
  // How many elements do we have?
2179
1.37k
  unsigned NumElts = 1;
2180
1.37k
  switch (N->getOpcode()) {
2181
1.37k
  default:
2182
0
    return false;
2183
1.37k
  case NVPTXISD::StoreRetval:
2184
1.23k
    NumElts = 1;
2185
1.23k
    break;
2186
1.37k
  case NVPTXISD::StoreRetvalV2:
2187
99
    NumElts = 2;
2188
99
    break;
2189
1.37k
  case NVPTXISD::StoreRetvalV4:
2190
37
    NumElts = 4;
2191
37
    break;
2192
1.37k
  }
2193
1.37k
2194
1.37k
  // Build vector of operands
2195
1.37k
  SmallVector<SDValue, 6> Ops;
2196
2.95k
  for (unsigned i = 0; i < NumElts; 
++i1.58k
)
2197
1.58k
    Ops.push_back(N->getOperand(i + 2));
2198
1.37k
  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2199
1.37k
  Ops.push_back(Chain);
2200
1.37k
2201
1.37k
  // Determine target opcode
2202
1.37k
  // If we have an i1, use an 8-bit store. The lowering code in
2203
1.37k
  // NVPTXISelLowering will have already emitted an upcast.
2204
1.37k
  Optional<unsigned> Opcode = 0;
2205
1.37k
  switch (NumElts) {
2206
1.37k
  default:
2207
0
    return false;
2208
1.37k
  case 1:
2209
1.23k
    Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2210
1.23k
                             NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2211
1.23k
                             NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2212
1.23k
                             NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
2213
1.23k
                             NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2214
1.23k
    break;
2215
1.37k
  case 2:
2216
99
    Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2217
99
                             NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2218
99
                             NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2219
99
                             NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
2220
99
                             NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2221
99
    break;
2222
1.37k
  case 4:
2223
37
    Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2224
37
                             NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2225
37
                             NVPTX::StoreRetvalV4I32, None,
2226
37
                             NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
2227
37
                             NVPTX::StoreRetvalV4F32, None);
2228
37
    break;
2229
1.37k
  }
2230
1.37k
  if (!Opcode)
2231
0
    return false;
2232
1.37k
2233
1.37k
  SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
2234
1.37k
  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2235
1.37k
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2236
1.37k
2237
1.37k
  ReplaceNode(N, Ret);
2238
1.37k
  return true;
2239
1.37k
}
2240
2241
209
bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2242
209
  SDLoc DL(N);
2243
209
  SDValue Chain = N->getOperand(0);
2244
209
  SDValue Param = N->getOperand(1);
2245
209
  unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2246
209
  SDValue Offset = N->getOperand(2);
2247
209
  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2248
209
  MemSDNode *Mem = cast<MemSDNode>(N);
2249
209
  SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2250
209
2251
209
  // How many elements do we have?
2252
209
  unsigned NumElts = 1;
2253
209
  switch (N->getOpcode()) {
2254
209
  default:
2255
0
    return false;
2256
209
  case NVPTXISD::StoreParamU32:
2257
170
  case NVPTXISD::StoreParamS32:
2258
170
  case NVPTXISD::StoreParam:
2259
170
    NumElts = 1;
2260
170
    break;
2261
170
  case NVPTXISD::StoreParamV2:
2262
23
    NumElts = 2;
2263
23
    break;
2264
170
  case NVPTXISD::StoreParamV4:
2265
16
    NumElts = 4;
2266
16
    break;
2267
209
  }
2268
209
2269
209
  // Build vector of operands
2270
209
  SmallVector<SDValue, 8> Ops;
2271
489
  for (unsigned i = 0; i < NumElts; 
++i280
)
2272
280
    Ops.push_back(N->getOperand(i + 3));
2273
209
  Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2274
209
  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2275
209
  Ops.push_back(Chain);
2276
209
  Ops.push_back(Flag);
2277
209
2278
209
  // Determine target opcode
2279
209
  // If we have an i1, use an 8-bit store. The lowering code in
2280
209
  // NVPTXISelLowering will have already emitted an upcast.
2281
209
  Optional<unsigned> Opcode = 0;
2282
209
  switch (N->getOpcode()) {
2283
209
  default:
2284
209
    switch (NumElts) {
2285
209
    default:
2286
0
      return false;
2287
209
    case 1:
2288
170
      Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2289
170
                               NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2290
170
                               NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2291
170
                               NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
2292
170
                               NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2293
170
      break;
2294
209
    case 2:
2295
23
      Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2296
23
                               NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2297
23
                               NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2298
23
                               NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
2299
23
                               NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2300
23
      break;
2301
209
    case 4:
2302
16
      Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2303
16
                               NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2304
16
                               NVPTX::StoreParamV4I32, None,
2305
16
                               NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
2306
16
                               NVPTX::StoreParamV4F32, None);
2307
16
      break;
2308
209
    }
2309
209
    if (!Opcode)
2310
0
      return false;
2311
209
    break;
2312
209
  // Special case: if we have a sign-extend/zero-extend node, insert the
2313
209
  // conversion instruction first, and use that as the value operand to
2314
209
  // the selected StoreParam node.
2315
209
  case NVPTXISD::StoreParamU32: {
2316
0
    Opcode = NVPTX::StoreParamI32;
2317
0
    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
2318
0
                                                MVT::i32);
2319
0
    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2320
0
                                         MVT::i32, Ops[0], CvtNone);
2321
0
    Ops[0] = SDValue(Cvt, 0);
2322
0
    break;
2323
209
  }
2324
209
  case NVPTXISD::StoreParamS32: {
2325
0
    Opcode = NVPTX::StoreParamI32;
2326
0
    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
2327
0
                                                MVT::i32);
2328
0
    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2329
0
                                         MVT::i32, Ops[0], CvtNone);
2330
0
    Ops[0] = SDValue(Cvt, 0);
2331
0
    break;
2332
209
  }
2333
209
  }
2334
209
2335
209
  SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2336
209
  SDNode *Ret =
2337
209
      CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
2338
209
  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2339
209
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2340
209
2341
209
  ReplaceNode(N, Ret);
2342
209
  return true;
2343
209
}
2344
2345
5
bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
2346
5
  unsigned Opc = 0;
2347
5
2348
5
  switch (N->getOpcode()) {
2349
5
  
default: return false0
;
2350
5
  case NVPTXISD::Tex1DFloatS32:
2351
1
    Opc = NVPTX::TEX_1D_F32_S32;
2352
1
    break;
2353
5
  case NVPTXISD::Tex1DFloatFloat:
2354
0
    Opc = NVPTX::TEX_1D_F32_F32;
2355
0
    break;
2356
5
  case NVPTXISD::Tex1DFloatFloatLevel:
2357
0
    Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2358
0
    break;
2359
5
  case NVPTXISD::Tex1DFloatFloatGrad:
2360
0
    Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2361
0
    break;
2362
5
  case NVPTXISD::Tex1DS32S32:
2363
0
    Opc = NVPTX::TEX_1D_S32_S32;
2364
0
    break;
2365
5
  case NVPTXISD::Tex1DS32Float:
2366
0
    Opc = NVPTX::TEX_1D_S32_F32;
2367
0
    break;
2368
5
  case NVPTXISD::Tex1DS32FloatLevel:
2369
0
    Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
2370
0
    break;
2371
5
  case NVPTXISD::Tex1DS32FloatGrad:
2372
0
    Opc = NVPTX::TEX_1D_S32_F32_GRAD;
2373
0
    break;
2374
5
  case NVPTXISD::Tex1DU32S32:
2375
0
    Opc = NVPTX::TEX_1D_U32_S32;
2376
0
    break;
2377
5
  case NVPTXISD::Tex1DU32Float:
2378
0
    Opc = NVPTX::TEX_1D_U32_F32;
2379
0
    break;
2380
5
  case NVPTXISD::Tex1DU32FloatLevel:
2381
0
    Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
2382
0
    break;
2383
5
  case NVPTXISD::Tex1DU32FloatGrad:
2384
0
    Opc = NVPTX::TEX_1D_U32_F32_GRAD;
2385
0
    break;
2386
5
  case NVPTXISD::Tex1DArrayFloatS32:
2387
0
    Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
2388
0
    break;
2389
5
  case NVPTXISD::Tex1DArrayFloatFloat:
2390
0
    Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2391
0
    break;
2392
5
  case NVPTXISD::Tex1DArrayFloatFloatLevel:
2393
0
    Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2394
0
    break;
2395
5
  case NVPTXISD::Tex1DArrayFloatFloatGrad:
2396
0
    Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2397
0
    break;
2398
5
  case NVPTXISD::Tex1DArrayS32S32:
2399
0
    Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
2400
0
    break;
2401
5
  case NVPTXISD::Tex1DArrayS32Float:
2402
0
    Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
2403
0
    break;
2404
5
  case NVPTXISD::Tex1DArrayS32FloatLevel:
2405
0
    Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
2406
0
    break;
2407
5
  case NVPTXISD::Tex1DArrayS32FloatGrad:
2408
0
    Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
2409
0
    break;
2410
5
  case NVPTXISD::Tex1DArrayU32S32:
2411
0
    Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
2412
0
    break;
2413
5
  case NVPTXISD::Tex1DArrayU32Float:
2414
0
    Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
2415
0
    break;
2416
5
  case NVPTXISD::Tex1DArrayU32FloatLevel:
2417
0
    Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
2418
0
    break;
2419
5
  case NVPTXISD::Tex1DArrayU32FloatGrad:
2420
0
    Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
2421
0
    break;
2422
5
  case NVPTXISD::Tex2DFloatS32:
2423
0
    Opc = NVPTX::TEX_2D_F32_S32;
2424
0
    break;
2425
5
  case NVPTXISD::Tex2DFloatFloat:
2426
0
    Opc = NVPTX::TEX_2D_F32_F32;
2427
0
    break;
2428
5
  case NVPTXISD::Tex2DFloatFloatLevel:
2429
0
    Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2430
0
    break;
2431
5
  case NVPTXISD::Tex2DFloatFloatGrad:
2432
0
    Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2433
0
    break;
2434
5
  case NVPTXISD::Tex2DS32S32:
2435
0
    Opc = NVPTX::TEX_2D_S32_S32;
2436
0
    break;
2437
5
  case NVPTXISD::Tex2DS32Float:
2438
0
    Opc = NVPTX::TEX_2D_S32_F32;
2439
0
    break;
2440
5
  case NVPTXISD::Tex2DS32FloatLevel:
2441
0
    Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
2442
0
    break;
2443
5
  case NVPTXISD::Tex2DS32FloatGrad:
2444
0
    Opc = NVPTX::TEX_2D_S32_F32_GRAD;
2445
0
    break;
2446
5
  case NVPTXISD::Tex2DU32S32:
2447
0
    Opc = NVPTX::TEX_2D_U32_S32;
2448
0
    break;
2449
5
  case NVPTXISD::Tex2DU32Float:
2450
0
    Opc = NVPTX::TEX_2D_U32_F32;
2451
0
    break;
2452
5
  case NVPTXISD::Tex2DU32FloatLevel:
2453
0
    Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
2454
0
    break;
2455
5
  case NVPTXISD::Tex2DU32FloatGrad:
2456
0
    Opc = NVPTX::TEX_2D_U32_F32_GRAD;
2457
0
    break;
2458
5
  case NVPTXISD::Tex2DArrayFloatS32:
2459
0
    Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
2460
0
    break;
2461
5
  case NVPTXISD::Tex2DArrayFloatFloat:
2462
0
    Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2463
0
    break;
2464
5
  case NVPTXISD::Tex2DArrayFloatFloatLevel:
2465
0
    Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2466
0
    break;
2467
5
  case NVPTXISD::Tex2DArrayFloatFloatGrad:
2468
0
    Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2469
0
    break;
2470
5
  case NVPTXISD::Tex2DArrayS32S32:
2471
0
    Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
2472
0
    break;
2473
5
  case NVPTXISD::Tex2DArrayS32Float:
2474
0
    Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
2475
0
    break;
2476
5
  case NVPTXISD::Tex2DArrayS32FloatLevel:
2477
0
    Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
2478
0
    break;
2479
5
  case NVPTXISD::Tex2DArrayS32FloatGrad:
2480
0
    Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
2481
0
    break;
2482
5
  case NVPTXISD::Tex2DArrayU32S32:
2483
0
    Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
2484
0
    break;
2485
5
  case NVPTXISD::Tex2DArrayU32Float:
2486
0
    Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
2487
0
    break;
2488
5
  case NVPTXISD::Tex2DArrayU32FloatLevel:
2489
0
    Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
2490
0
    break;
2491
5
  case NVPTXISD::Tex2DArrayU32FloatGrad:
2492
0
    Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
2493
0
    break;
2494
5
  case NVPTXISD::Tex3DFloatS32:
2495
0
    Opc = NVPTX::TEX_3D_F32_S32;
2496
0
    break;
2497
5
  case NVPTXISD::Tex3DFloatFloat:
2498
0
    Opc = NVPTX::TEX_3D_F32_F32;
2499
0
    break;
2500
5
  case NVPTXISD::Tex3DFloatFloatLevel:
2501
0
    Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2502
0
    break;
2503
5
  case NVPTXISD::Tex3DFloatFloatGrad:
2504
0
    Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2505
0
    break;
2506
5
  case NVPTXISD::Tex3DS32S32:
2507
0
    Opc = NVPTX::TEX_3D_S32_S32;
2508
0
    break;
2509
5
  case NVPTXISD::Tex3DS32Float:
2510
0
    Opc = NVPTX::TEX_3D_S32_F32;
2511
0
    break;
2512
5
  case NVPTXISD::Tex3DS32FloatLevel:
2513
0
    Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
2514
0
    break;
2515
5
  case NVPTXISD::Tex3DS32FloatGrad:
2516
0
    Opc = NVPTX::TEX_3D_S32_F32_GRAD;
2517
0
    break;
2518
5
  case NVPTXISD::Tex3DU32S32:
2519
0
    Opc = NVPTX::TEX_3D_U32_S32;
2520
0
    break;
2521
5
  case NVPTXISD::Tex3DU32Float:
2522
0
    Opc = NVPTX::TEX_3D_U32_F32;
2523
0
    break;
2524
5
  case NVPTXISD::Tex3DU32FloatLevel:
2525
0
    Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
2526
0
    break;
2527
5
  case NVPTXISD::Tex3DU32FloatGrad:
2528
0
    Opc = NVPTX::TEX_3D_U32_F32_GRAD;
2529
0
    break;
2530
5
  case NVPTXISD::TexCubeFloatFloat:
2531
0
    Opc = NVPTX::TEX_CUBE_F32_F32;
2532
0
    break;
2533
5
  case NVPTXISD::TexCubeFloatFloatLevel:
2534
0
    Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
2535
0
    break;
2536
5
  case NVPTXISD::TexCubeS32Float:
2537
0
    Opc = NVPTX::TEX_CUBE_S32_F32;
2538
0
    break;
2539
5
  case NVPTXISD::TexCubeS32FloatLevel:
2540
0
    Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
2541
0
    break;
2542
5
  case NVPTXISD::TexCubeU32Float:
2543
0
    Opc = NVPTX::TEX_CUBE_U32_F32;
2544
0
    break;
2545
5
  case NVPTXISD::TexCubeU32FloatLevel:
2546
0
    Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
2547
0
    break;
2548
5
  case NVPTXISD::TexCubeArrayFloatFloat:
2549
0
    Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
2550
0
    break;
2551
5
  case NVPTXISD::TexCubeArrayFloatFloatLevel:
2552
0
    Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
2553
0
    break;
2554
5
  case NVPTXISD::TexCubeArrayS32Float:
2555
0
    Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
2556
0
    break;
2557
5
  case NVPTXISD::TexCubeArrayS32FloatLevel:
2558
0
    Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
2559
0
    break;
2560
5
  case NVPTXISD::TexCubeArrayU32Float:
2561
0
    Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
2562
0
    break;
2563
5
  case NVPTXISD::TexCubeArrayU32FloatLevel:
2564
0
    Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
2565
0
    break;
2566
5
  case NVPTXISD::Tld4R2DFloatFloat:
2567
0
    Opc = NVPTX::TLD4_R_2D_F32_F32;
2568
0
    break;
2569
5
  case NVPTXISD::Tld4G2DFloatFloat:
2570
0
    Opc = NVPTX::TLD4_G_2D_F32_F32;
2571
0
    break;
2572
5
  case NVPTXISD::Tld4B2DFloatFloat:
2573
0
    Opc = NVPTX::TLD4_B_2D_F32_F32;
2574
0
    break;
2575
5
  case NVPTXISD::Tld4A2DFloatFloat:
2576
0
    Opc = NVPTX::TLD4_A_2D_F32_F32;
2577
0
    break;
2578
5
  case NVPTXISD::Tld4R2DS64Float:
2579
0
    Opc = NVPTX::TLD4_R_2D_S32_F32;
2580
0
    break;
2581
5
  case NVPTXISD::Tld4G2DS64Float:
2582
0
    Opc = NVPTX::TLD4_G_2D_S32_F32;
2583
0
    break;
2584
5
  case NVPTXISD::Tld4B2DS64Float:
2585
0
    Opc = NVPTX::TLD4_B_2D_S32_F32;
2586
0
    break;
2587
5
  case NVPTXISD::Tld4A2DS64Float:
2588
0
    Opc = NVPTX::TLD4_A_2D_S32_F32;
2589
0
    break;
2590
5
  case NVPTXISD::Tld4R2DU64Float:
2591
0
    Opc = NVPTX::TLD4_R_2D_U32_F32;
2592
0
    break;
2593
5
  case NVPTXISD::Tld4G2DU64Float:
2594
0
    Opc = NVPTX::TLD4_G_2D_U32_F32;
2595
0
    break;
2596
5
  case NVPTXISD::Tld4B2DU64Float:
2597
0
    Opc = NVPTX::TLD4_B_2D_U32_F32;
2598
0
    break;
2599
5
  case NVPTXISD::Tld4A2DU64Float:
2600
0
    Opc = NVPTX::TLD4_A_2D_U32_F32;
2601
0
    break;
2602
5
  case NVPTXISD::TexUnified1DFloatS32:
2603
4
    Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
2604
4
    break;
2605
5
  case NVPTXISD::TexUnified1DFloatFloat:
2606
0
    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
2607
0
    break;
2608
5
  case NVPTXISD::TexUnified1DFloatFloatLevel:
2609
0
    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
2610
0
    break;
2611
5
  case NVPTXISD::TexUnified1DFloatFloatGrad:
2612
0
    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
2613
0
    break;
2614
5
  case NVPTXISD::TexUnified1DS32S32:
2615
0
    Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
2616
0
    break;
2617
5
  case NVPTXISD::TexUnified1DS32Float:
2618
0
    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
2619
0
    break;
2620
5
  case NVPTXISD::TexUnified1DS32FloatLevel:
2621
0
    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
2622
0
    break;
2623
5
  case NVPTXISD::TexUnified1DS32FloatGrad:
2624
0
    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
2625
0
    break;
2626
5
  case NVPTXISD::TexUnified1DU32S32:
2627
0
    Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
2628
0
    break;
2629
5
  case NVPTXISD::TexUnified1DU32Float:
2630
0
    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
2631
0
    break;
2632
5
  case NVPTXISD::TexUnified1DU32FloatLevel:
2633
0
    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
2634
0
    break;
2635
5
  case NVPTXISD::TexUnified1DU32FloatGrad:
2636
0
    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
2637
0
    break;
2638
5
  case NVPTXISD::TexUnified1DArrayFloatS32:
2639
0
    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
2640
0
    break;
2641
5
  case NVPTXISD::TexUnified1DArrayFloatFloat:
2642
0
    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
2643
0
    break;
2644
5
  case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
2645
0
    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
2646
0
    break;
2647
5
  case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
2648
0
    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
2649
0
    break;
2650
5
  case NVPTXISD::TexUnified1DArrayS32S32:
2651
0
    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
2652
0
    break;
2653
5
  case NVPTXISD::TexUnified1DArrayS32Float:
2654
0
    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
2655
0
    break;
2656
5
  case NVPTXISD::TexUnified1DArrayS32FloatLevel:
2657
0
    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
2658
0
    break;
2659
5
  case NVPTXISD::TexUnified1DArrayS32FloatGrad:
2660
0
    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
2661
0
    break;
2662
5
  case NVPTXISD::TexUnified1DArrayU32S32:
2663
0
    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
2664
0
    break;
2665
5
  case NVPTXISD::TexUnified1DArrayU32Float:
2666
0
    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
2667
0
    break;
2668
5
  case NVPTXISD::TexUnified1DArrayU32FloatLevel:
2669
0
    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
2670
0
    break;
2671
5
  case NVPTXISD::TexUnified1DArrayU32FloatGrad:
2672
0
    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
2673
0
    break;
2674
5
  case NVPTXISD::TexUnified2DFloatS32:
2675
0
    Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
2676
0
    break;
2677
5
  case NVPTXISD::TexUnified2DFloatFloat:
2678
0
    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
2679
0
    break;
2680
5
  case NVPTXISD::TexUnified2DFloatFloatLevel:
2681
0
    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
2682
0
    break;
2683
5
  case NVPTXISD::TexUnified2DFloatFloatGrad:
2684
0
    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
2685
0
    break;
2686
5
  case NVPTXISD::TexUnified2DS32S32:
2687
0
    Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
2688
0
    break;
2689
5
  case NVPTXISD::TexUnified2DS32Float:
2690
0
    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
2691
0
    break;
2692
5
  case NVPTXISD::TexUnified2DS32FloatLevel:
2693
0
    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
2694
0
    break;
2695
5
  case NVPTXISD::TexUnified2DS32FloatGrad:
2696
0
    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
2697
0
    break;
2698
5
  case NVPTXISD::TexUnified2DU32S32:
2699
0
    Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
2700
0
    break;
2701
5
  case NVPTXISD::TexUnified2DU32Float:
2702
0
    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
2703
0
    break;
2704
5
  case NVPTXISD::TexUnified2DU32FloatLevel:
2705
0
    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
2706
0
    break;
2707
5
  case NVPTXISD::TexUnified2DU32FloatGrad:
2708
0
    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
2709
0
    break;
2710
5
  case NVPTXISD::TexUnified2DArrayFloatS32:
2711
0
    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
2712
0
    break;
2713
5
  case NVPTXISD::TexUnified2DArrayFloatFloat:
2714
0
    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
2715
0
    break;
2716
5
  case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
2717
0
    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
2718
0
    break;
2719
5
  case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
2720
0
    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
2721
0
    break;
2722
5
  case NVPTXISD::TexUnified2DArrayS32S32:
2723
0
    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
2724
0
    break;
2725
5
  case NVPTXISD::TexUnified2DArrayS32Float:
2726
0
    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
2727
0
    break;
2728
5
  case NVPTXISD::TexUnified2DArrayS32FloatLevel:
2729
0
    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
2730
0
    break;
2731
5
  case NVPTXISD::TexUnified2DArrayS32FloatGrad:
2732
0
    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
2733
0
    break;
2734
5
  case NVPTXISD::TexUnified2DArrayU32S32:
2735
0
    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
2736
0
    break;
2737
5
  case NVPTXISD::TexUnified2DArrayU32Float:
2738
0
    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
2739
0
    break;
2740
5
  case NVPTXISD::TexUnified2DArrayU32FloatLevel:
2741
0
    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
2742
0
    break;
2743
5
  case NVPTXISD::TexUnified2DArrayU32FloatGrad:
2744
0
    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
2745
0
    break;
2746
5
  case NVPTXISD::TexUnified3DFloatS32:
2747
0
    Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
2748
0
    break;
2749
5
  case NVPTXISD::TexUnified3DFloatFloat:
2750
0
    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
2751
0
    break;
2752
5
  case NVPTXISD::TexUnified3DFloatFloatLevel:
2753
0
    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
2754
0
    break;
2755
5
  case NVPTXISD::TexUnified3DFloatFloatGrad:
2756
0
    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
2757
0
    break;
2758
5
  case NVPTXISD::TexUnified3DS32S32:
2759
0
    Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
2760
0
    break;
2761
5
  case NVPTXISD::TexUnified3DS32Float:
2762
0
    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
2763
0
    break;
2764
5
  case NVPTXISD::TexUnified3DS32FloatLevel:
2765
0
    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
2766
0
    break;
2767
5
  case NVPTXISD::TexUnified3DS32FloatGrad:
2768
0
    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
2769
0
    break;
2770
5
  case NVPTXISD::TexUnified3DU32S32:
2771
0
    Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
2772
0
    break;
2773
5
  case NVPTXISD::TexUnified3DU32Float:
2774
0
    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
2775
0
    break;
2776
5
  case NVPTXISD::TexUnified3DU32FloatLevel:
2777
0
    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
2778
0
    break;
2779
5
  case NVPTXISD::TexUnified3DU32FloatGrad:
2780
0
    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
2781
0
    break;
2782
5
  case NVPTXISD::TexUnifiedCubeFloatFloat:
2783
0
    Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
2784
0
    break;
2785
5
  case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
2786
0
    Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
2787
0
    break;
2788
5
  case NVPTXISD::TexUnifiedCubeS32Float:
2789
0
    Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
2790
0
    break;
2791
5
  case NVPTXISD::TexUnifiedCubeS32FloatLevel:
2792
0
    Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
2793
0
    break;
2794
5
  case NVPTXISD::TexUnifiedCubeU32Float:
2795
0
    Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
2796
0
    break;
2797
5
  case NVPTXISD::TexUnifiedCubeU32FloatLevel:
2798
0
    Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
2799
0
    break;
2800
5
  case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
2801
0
    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
2802
0
    break;
2803
5
  case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
2804
0
    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
2805
0
    break;
2806
5
  case NVPTXISD::TexUnifiedCubeArrayS32Float:
2807
0
    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
2808
0
    break;
2809
5
  case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
2810
0
    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
2811
0
    break;
2812
5
  case NVPTXISD::TexUnifiedCubeArrayU32Float:
2813
0
    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
2814
0
    break;
2815
5
  case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
2816
0
    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
2817
0
    break;
2818
5
  case NVPTXISD::Tld4UnifiedR2DFloatFloat:
2819
0
    Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
2820
0
    break;
2821
5
  case NVPTXISD::Tld4UnifiedG2DFloatFloat:
2822
0
    Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
2823
0
    break;
2824
5
  case NVPTXISD::Tld4UnifiedB2DFloatFloat:
2825
0
    Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
2826
0
    break;
2827
5
  case NVPTXISD::Tld4UnifiedA2DFloatFloat:
2828
0
    Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
2829
0
    break;
2830
5
  case NVPTXISD::Tld4UnifiedR2DS64Float:
2831
0
    Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
2832
0
    break;
2833
5
  case NVPTXISD::Tld4UnifiedG2DS64Float:
2834
0
    Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
2835
0
    break;
2836
5
  case NVPTXISD::Tld4UnifiedB2DS64Float:
2837
0
    Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
2838
0
    break;
2839
5
  case NVPTXISD::Tld4UnifiedA2DS64Float:
2840
0
    Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
2841
0
    break;
2842
5
  case NVPTXISD::Tld4UnifiedR2DU64Float:
2843
0
    Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
2844
0
    break;
2845
5
  case NVPTXISD::Tld4UnifiedG2DU64Float:
2846
0
    Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
2847
0
    break;
2848
5
  case NVPTXISD::Tld4UnifiedB2DU64Float:
2849
0
    Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
2850
0
    break;
2851
5
  case NVPTXISD::Tld4UnifiedA2DU64Float:
2852
0
    Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
2853
0
    break;
2854
5
  }
2855
5
2856
5
  // Copy over operands
2857
5
  SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
2858
5
  Ops.push_back(N->getOperand(0)); // Move chain to the back.
2859
5
2860
5
  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
2861
5
  return true;
2862
5
}
2863
2864
5
bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
2865
5
  unsigned Opc = 0;
2866
5
  switch (N->getOpcode()) {
2867
5
  
default: return false0
;
2868
5
  case NVPTXISD::Suld1DI8Clamp:
2869
0
    Opc = NVPTX::SULD_1D_I8_CLAMP;
2870
0
    break;
2871
5
  case NVPTXISD::Suld1DI16Clamp:
2872
0
    Opc = NVPTX::SULD_1D_I16_CLAMP;
2873
0
    break;
2874
5
  case NVPTXISD::Suld1DI32Clamp:
2875
0
    Opc = NVPTX::SULD_1D_I32_CLAMP;
2876
0
    break;
2877
5
  case NVPTXISD::Suld1DI64Clamp:
2878
0
    Opc = NVPTX::SULD_1D_I64_CLAMP;
2879
0
    break;
2880
5
  case NVPTXISD::Suld1DV2I8Clamp:
2881
0
    Opc = NVPTX::SULD_1D_V2I8_CLAMP;
2882
0
    break;
2883
5
  case NVPTXISD::Suld1DV2I16Clamp:
2884
0
    Opc = NVPTX::SULD_1D_V2I16_CLAMP;
2885
0
    break;
2886
5
  case NVPTXISD::Suld1DV2I32Clamp:
2887
0
    Opc = NVPTX::SULD_1D_V2I32_CLAMP;
2888
0
    break;
2889
5
  case NVPTXISD::Suld1DV2I64Clamp:
2890
0
    Opc = NVPTX::SULD_1D_V2I64_CLAMP;
2891
0
    break;
2892
5
  case NVPTXISD::Suld1DV4I8Clamp:
2893
0
    Opc = NVPTX::SULD_1D_V4I8_CLAMP;
2894
0
    break;
2895
5
  case NVPTXISD::Suld1DV4I16Clamp:
2896
0
    Opc = NVPTX::SULD_1D_V4I16_CLAMP;
2897
0
    break;
2898
5
  case NVPTXISD::Suld1DV4I32Clamp:
2899
0
    Opc = NVPTX::SULD_1D_V4I32_CLAMP;
2900
0
    break;
2901
5
  case NVPTXISD::Suld1DArrayI8Clamp:
2902
0
    Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
2903
0
    break;
2904
5
  case NVPTXISD::Suld1DArrayI16Clamp:
2905
0
    Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
2906
0
    break;
2907
5
  case NVPTXISD::Suld1DArrayI32Clamp:
2908
0
    Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
2909
0
    break;
2910
5
  case NVPTXISD::Suld1DArrayI64Clamp:
2911
0
    Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
2912
0
    break;
2913
5
  case NVPTXISD::Suld1DArrayV2I8Clamp:
2914
0
    Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
2915
0
    break;
2916
5
  case NVPTXISD::Suld1DArrayV2I16Clamp:
2917
0
    Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
2918
0
    break;
2919
5
  case NVPTXISD::Suld1DArrayV2I32Clamp:
2920
0
    Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
2921
0
    break;
2922
5
  case NVPTXISD::Suld1DArrayV2I64Clamp:
2923
0
    Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
2924
0
    break;
2925
5
  case NVPTXISD::Suld1DArrayV4I8Clamp:
2926
0
    Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
2927
0
    break;
2928
5
  case NVPTXISD::Suld1DArrayV4I16Clamp:
2929
0
    Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
2930
0
    break;
2931
5
  case NVPTXISD::Suld1DArrayV4I32Clamp:
2932
0
    Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
2933
0
    break;
2934
5
  case NVPTXISD::Suld2DI8Clamp:
2935
0
    Opc = NVPTX::SULD_2D_I8_CLAMP;
2936
0
    break;
2937
5
  case NVPTXISD::Suld2DI16Clamp:
2938
0
    Opc = NVPTX::SULD_2D_I16_CLAMP;
2939
0
    break;
2940
5
  case NVPTXISD::Suld2DI32Clamp:
2941
0
    Opc = NVPTX::SULD_2D_I32_CLAMP;
2942
0
    break;
2943
5
  case NVPTXISD::Suld2DI64Clamp:
2944
0
    Opc = NVPTX::SULD_2D_I64_CLAMP;
2945
0
    break;
2946
5
  case NVPTXISD::Suld2DV2I8Clamp:
2947
0
    Opc = NVPTX::SULD_2D_V2I8_CLAMP;
2948
0
    break;
2949
5
  case NVPTXISD::Suld2DV2I16Clamp:
2950
0
    Opc = NVPTX::SULD_2D_V2I16_CLAMP;
2951
0
    break;
2952
5
  case NVPTXISD::Suld2DV2I32Clamp:
2953
0
    Opc = NVPTX::SULD_2D_V2I32_CLAMP;
2954
0
    break;
2955
5
  case NVPTXISD::Suld2DV2I64Clamp:
2956
0
    Opc = NVPTX::SULD_2D_V2I64_CLAMP;
2957
0
    break;
2958
5
  case NVPTXISD::Suld2DV4I8Clamp:
2959
0
    Opc = NVPTX::SULD_2D_V4I8_CLAMP;
2960
0
    break;
2961
5
  case NVPTXISD::Suld2DV4I16Clamp:
2962
0
    Opc = NVPTX::SULD_2D_V4I16_CLAMP;
2963
0
    break;
2964
5
  case NVPTXISD::Suld2DV4I32Clamp:
2965
0
    Opc = NVPTX::SULD_2D_V4I32_CLAMP;
2966
0
    break;
2967
5
  case NVPTXISD::Suld2DArrayI8Clamp:
2968
0
    Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
2969
0
    break;
2970
5
  case NVPTXISD::Suld2DArrayI16Clamp:
2971
0
    Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
2972
0
    break;
2973
5
  case NVPTXISD::Suld2DArrayI32Clamp:
2974
0
    Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
2975
0
    break;
2976
5
  case NVPTXISD::Suld2DArrayI64Clamp:
2977
0
    Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
2978
0
    break;
2979
5
  case NVPTXISD::Suld2DArrayV2I8Clamp:
2980
0
    Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
2981
0
    break;
2982
5
  case NVPTXISD::Suld2DArrayV2I16Clamp:
2983
0
    Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
2984
0
    break;
2985
5
  case NVPTXISD::Suld2DArrayV2I32Clamp:
2986
0
    Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
2987
0
    break;
2988
5
  case NVPTXISD::Suld2DArrayV2I64Clamp:
2989
0
    Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
2990
0
    break;
2991
5
  case NVPTXISD::Suld2DArrayV4I8Clamp:
2992
0
    Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
2993
0
    break;
2994
5
  case NVPTXISD::Suld2DArrayV4I16Clamp:
2995
0
    Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
2996
0
    break;
2997
5
  case NVPTXISD::Suld2DArrayV4I32Clamp:
2998
0
    Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
2999
0
    break;
3000
5
  case NVPTXISD::Suld3DI8Clamp:
3001
0
    Opc = NVPTX::SULD_3D_I8_CLAMP;
3002
0
    break;
3003
5
  case NVPTXISD::Suld3DI16Clamp:
3004
0
    Opc = NVPTX::SULD_3D_I16_CLAMP;
3005
0
    break;
3006
5
  case NVPTXISD::Suld3DI32Clamp:
3007
0
    Opc = NVPTX::SULD_3D_I32_CLAMP;
3008
0
    break;
3009
5
  case NVPTXISD::Suld3DI64Clamp:
3010
0
    Opc = NVPTX::SULD_3D_I64_CLAMP;
3011
0
    break;
3012
5
  case NVPTXISD::Suld3DV2I8Clamp:
3013
0
    Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3014
0
    break;
3015
5
  case NVPTXISD::Suld3DV2I16Clamp:
3016
0
    Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3017
0
    break;
3018
5
  case NVPTXISD::Suld3DV2I32Clamp:
3019
0
    Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3020
0
    break;
3021
5
  case NVPTXISD::Suld3DV2I64Clamp:
3022
0
    Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3023
0
    break;
3024
5
  case NVPTXISD::Suld3DV4I8Clamp:
3025
0
    Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3026
0
    break;
3027
5
  case NVPTXISD::Suld3DV4I16Clamp:
3028
0
    Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3029
0
    break;
3030
5
  case NVPTXISD::Suld3DV4I32Clamp:
3031
0
    Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3032
0
    break;
3033
5
  case NVPTXISD::Suld1DI8Trap:
3034
0
    Opc = NVPTX::SULD_1D_I8_TRAP;
3035
0
    break;
3036
5
  case NVPTXISD::Suld1DI16Trap:
3037
0
    Opc = NVPTX::SULD_1D_I16_TRAP;
3038
0
    break;
3039
5
  case NVPTXISD::Suld1DI32Trap:
3040
5
    Opc = NVPTX::SULD_1D_I32_TRAP;
3041
5
    break;
3042
5
  case NVPTXISD::Suld1DI64Trap:
3043
0
    Opc = NVPTX::SULD_1D_I64_TRAP;
3044
0
    break;
3045
5
  case NVPTXISD::Suld1DV2I8Trap:
3046
0
    Opc = NVPTX::SULD_1D_V2I8_TRAP;
3047
0
    break;
3048
5
  case NVPTXISD::Suld1DV2I16Trap:
3049
0
    Opc = NVPTX::SULD_1D_V2I16_TRAP;
3050
0
    break;
3051
5
  case NVPTXISD::Suld1DV2I32Trap:
3052
0
    Opc = NVPTX::SULD_1D_V2I32_TRAP;
3053
0
    break;
3054
5
  case NVPTXISD::Suld1DV2I64Trap:
3055
0
    Opc = NVPTX::SULD_1D_V2I64_TRAP;
3056
0
    break;
3057
5
  case NVPTXISD::Suld1DV4I8Trap:
3058
0
    Opc = NVPTX::SULD_1D_V4I8_TRAP;
3059
0
    break;
3060
5
  case NVPTXISD::Suld1DV4I16Trap:
3061
0
    Opc = NVPTX::SULD_1D_V4I16_TRAP;
3062
0
    break;
3063
5
  case NVPTXISD::Suld1DV4I32Trap:
3064
0
    Opc = NVPTX::SULD_1D_V4I32_TRAP;
3065
0
    break;
3066
5
  case NVPTXISD::Suld1DArrayI8Trap:
3067
0
    Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
3068
0
    break;
3069
5
  case NVPTXISD::Suld1DArrayI16Trap:
3070
0
    Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
3071
0
    break;
3072
5
  case NVPTXISD::Suld1DArrayI32Trap:
3073
0
    Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
3074
0
    break;
3075
5
  case NVPTXISD::Suld1DArrayI64Trap:
3076
0
    Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
3077
0
    break;
3078
5
  case NVPTXISD::Suld1DArrayV2I8Trap:
3079
0
    Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
3080
0
    break;
3081
5
  case NVPTXISD::Suld1DArrayV2I16Trap:
3082
0
    Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
3083
0
    break;
3084
5
  case NVPTXISD::Suld1DArrayV2I32Trap:
3085
0
    Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
3086
0
    break;
3087
5
  case NVPTXISD::Suld1DArrayV2I64Trap:
3088
0
    Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
3089
0
    break;
3090
5
  case NVPTXISD::Suld1DArrayV4I8Trap:
3091
0
    Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
3092
0
    break;
3093
5
  case NVPTXISD::Suld1DArrayV4I16Trap:
3094
0
    Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
3095
0
    break;
3096
5
  case NVPTXISD::Suld1DArrayV4I32Trap:
3097
0
    Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
3098
0
    break;
3099
5
  case NVPTXISD::Suld2DI8Trap:
3100
0
    Opc = NVPTX::SULD_2D_I8_TRAP;
3101
0
    break;
3102
5
  case NVPTXISD::Suld2DI16Trap:
3103
0
    Opc = NVPTX::SULD_2D_I16_TRAP;
3104
0
    break;
3105
5
  case NVPTXISD::Suld2DI32Trap:
3106
0
    Opc = NVPTX::SULD_2D_I32_TRAP;
3107
0
    break;
3108
5
  case NVPTXISD::Suld2DI64Trap:
3109
0
    Opc = NVPTX::SULD_2D_I64_TRAP;
3110
0
    break;
3111
5
  case NVPTXISD::Suld2DV2I8Trap:
3112
0
    Opc = NVPTX::SULD_2D_V2I8_TRAP;
3113
0
    break;
3114
5
  case NVPTXISD::Suld2DV2I16Trap:
3115
0
    Opc = NVPTX::SULD_2D_V2I16_TRAP;
3116
0
    break;
3117
5
  case NVPTXISD::Suld2DV2I32Trap:
3118
0
    Opc = NVPTX::SULD_2D_V2I32_TRAP;
3119
0
    break;
3120
5
  case NVPTXISD::Suld2DV2I64Trap:
3121
0
    Opc = NVPTX::SULD_2D_V2I64_TRAP;
3122
0
    break;
3123
5
  case NVPTXISD::Suld2DV4I8Trap:
3124
0
    Opc = NVPTX::SULD_2D_V4I8_TRAP;
3125
0
    break;
3126
5
  case NVPTXISD::Suld2DV4I16Trap:
3127
0
    Opc = NVPTX::SULD_2D_V4I16_TRAP;
3128
0
    break;
3129
5
  case NVPTXISD::Suld2DV4I32Trap:
3130
0
    Opc = NVPTX::SULD_2D_V4I32_TRAP;
3131
0
    break;
3132
5
  case NVPTXISD::Suld2DArrayI8Trap:
3133
0
    Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
3134
0
    break;
3135
5
  case NVPTXISD::Suld2DArrayI16Trap:
3136
0
    Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
3137
0
    break;
3138
5
  case NVPTXISD::Suld2DArrayI32Trap:
3139
0
    Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
3140
0
    break;
3141
5
  case NVPTXISD::Suld2DArrayI64Trap:
3142
0
    Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
3143
0
    break;
3144
5
  case NVPTXISD::Suld2DArrayV2I8Trap:
3145
0
    Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
3146
0
    break;
3147
5
  case NVPTXISD::Suld2DArrayV2I16Trap:
3148
0
    Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
3149
0
    break;
3150
5
  case NVPTXISD::Suld2DArrayV2I32Trap:
3151
0
    Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
3152
0
    break;
3153
5
  case NVPTXISD::Suld2DArrayV2I64Trap:
3154
0
    Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
3155
0
    break;
3156
5
  case NVPTXISD::Suld2DArrayV4I8Trap:
3157
0
    Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
3158
0
    break;
3159
5
  case NVPTXISD::Suld2DArrayV4I16Trap:
3160
0
    Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
3161
0
    break;
3162
5
  case NVPTXISD::Suld2DArrayV4I32Trap:
3163
0
    Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
3164
0
    break;
3165
5
  case NVPTXISD::Suld3DI8Trap:
3166
0
    Opc = NVPTX::SULD_3D_I8_TRAP;
3167
0
    break;
3168
5
  case NVPTXISD::Suld3DI16Trap:
3169
0
    Opc = NVPTX::SULD_3D_I16_TRAP;
3170
0
    break;
3171
5
  case NVPTXISD::Suld3DI32Trap:
3172
0
    Opc = NVPTX::SULD_3D_I32_TRAP;
3173
0
    break;
3174
5
  case NVPTXISD::Suld3DI64Trap:
3175
0
    Opc = NVPTX::SULD_3D_I64_TRAP;
3176
0
    break;
3177
5
  case NVPTXISD::Suld3DV2I8Trap:
3178
0
    Opc = NVPTX::SULD_3D_V2I8_TRAP;
3179
0
    break;
3180
5
  case NVPTXISD::Suld3DV2I16Trap:
3181
0
    Opc = NVPTX::SULD_3D_V2I16_TRAP;
3182
0
    break;
3183
5
  case NVPTXISD::Suld3DV2I32Trap:
3184
0
    Opc = NVPTX::SULD_3D_V2I32_TRAP;
3185
0
    break;
3186
5
  case NVPTXISD::Suld3DV2I64Trap:
3187
0
    Opc = NVPTX::SULD_3D_V2I64_TRAP;
3188
0
    break;
3189
5
  case NVPTXISD::Suld3DV4I8Trap:
3190
0
    Opc = NVPTX::SULD_3D_V4I8_TRAP;
3191
0
    break;
3192
5
  case NVPTXISD::Suld3DV4I16Trap:
3193
0
    Opc = NVPTX::SULD_3D_V4I16_TRAP;
3194
0
    break;
3195
5
  case NVPTXISD::Suld3DV4I32Trap:
3196
0
    Opc = NVPTX::SULD_3D_V4I32_TRAP;
3197
0
    break;
3198
5
  case NVPTXISD::Suld1DI8Zero:
3199
0
    Opc = NVPTX::SULD_1D_I8_ZERO;
3200
0
    break;
3201
5
  case NVPTXISD::Suld1DI16Zero:
3202
0
    Opc = NVPTX::SULD_1D_I16_ZERO;
3203
0
    break;
3204
5
  case NVPTXISD::Suld1DI32Zero:
3205
0
    Opc = NVPTX::SULD_1D_I32_ZERO;
3206
0
    break;
3207
5
  case NVPTXISD::Suld1DI64Zero:
3208
0
    Opc = NVPTX::SULD_1D_I64_ZERO;
3209
0
    break;
3210
5
  case NVPTXISD::Suld1DV2I8Zero:
3211
0
    Opc = NVPTX::SULD_1D_V2I8_ZERO;
3212
0
    break;
3213
5
  case NVPTXISD::Suld1DV2I16Zero:
3214
0
    Opc = NVPTX::SULD_1D_V2I16_ZERO;
3215
0
    break;
3216
5
  case NVPTXISD::Suld1DV2I32Zero:
3217
0
    Opc = NVPTX::SULD_1D_V2I32_ZERO;
3218
0
    break;
3219
5
  case NVPTXISD::Suld1DV2I64Zero:
3220
0
    Opc = NVPTX::SULD_1D_V2I64_ZERO;
3221
0
    break;
3222
5
  case NVPTXISD::Suld1DV4I8Zero:
3223
0
    Opc = NVPTX::SULD_1D_V4I8_ZERO;
3224
0
    break;
3225
5
  case NVPTXISD::Suld1DV4I16Zero:
3226
0
    Opc = NVPTX::SULD_1D_V4I16_ZERO;
3227
0
    break;
3228
5
  case NVPTXISD::Suld1DV4I32Zero:
3229
0
    Opc = NVPTX::SULD_1D_V4I32_ZERO;
3230
0
    break;
3231
5
  case NVPTXISD::Suld1DArrayI8Zero:
3232
0
    Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
3233
0
    break;
3234
5
  case NVPTXISD::Suld1DArrayI16Zero:
3235
0
    Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
3236
0
    break;
3237
5
  case NVPTXISD::Suld1DArrayI32Zero:
3238
0
    Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
3239
0
    break;
3240
5
  case NVPTXISD::Suld1DArrayI64Zero:
3241
0
    Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
3242
0
    break;
3243
5
  case NVPTXISD::Suld1DArrayV2I8Zero:
3244
0
    Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
3245
0
    break;
3246
5
  case NVPTXISD::Suld1DArrayV2I16Zero:
3247
0
    Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
3248
0
    break;
3249
5
  case NVPTXISD::Suld1DArrayV2I32Zero:
3250
0
    Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
3251
0
    break;
3252
5
  case NVPTXISD::Suld1DArrayV2I64Zero:
3253
0
    Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
3254
0
    break;
3255
5
  case NVPTXISD::Suld1DArrayV4I8Zero:
3256
0
    Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
3257
0
    break;
3258
5
  case NVPTXISD::Suld1DArrayV4I16Zero:
3259
0
    Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
3260
0
    break;
3261
5
  case NVPTXISD::Suld1DArrayV4I32Zero:
3262
0
    Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
3263
0
    break;
3264
5
  case NVPTXISD::Suld2DI8Zero:
3265
0
    Opc = NVPTX::SULD_2D_I8_ZERO;
3266
0
    break;
3267
5
  case NVPTXISD::Suld2DI16Zero:
3268
0
    Opc = NVPTX::SULD_2D_I16_ZERO;
3269
0
    break;
3270
5
  case NVPTXISD::Suld2DI32Zero:
3271
0
    Opc = NVPTX::SULD_2D_I32_ZERO;
3272
0
    break;
3273
5
  case NVPTXISD::Suld2DI64Zero:
3274
0
    Opc = NVPTX::SULD_2D_I64_ZERO;
3275
0
    break;
3276
5
  case NVPTXISD::Suld2DV2I8Zero:
3277
0
    Opc = NVPTX::SULD_2D_V2I8_ZERO;
3278
0
    break;
3279
5
  case NVPTXISD::Suld2DV2I16Zero:
3280
0
    Opc = NVPTX::SULD_2D_V2I16_ZERO;
3281
0
    break;
3282
5
  case NVPTXISD::Suld2DV2I32Zero:
3283
0
    Opc = NVPTX::SULD_2D_V2I32_ZERO;
3284
0
    break;
3285
5
  case NVPTXISD::Suld2DV2I64Zero:
3286
0
    Opc = NVPTX::SULD_2D_V2I64_ZERO;
3287
0
    break;
3288
5
  case NVPTXISD::Suld2DV4I8Zero:
3289
0
    Opc = NVPTX::SULD_2D_V4I8_ZERO;
3290
0
    break;
3291
5
  case NVPTXISD::Suld2DV4I16Zero:
3292
0
    Opc = NVPTX::SULD_2D_V4I16_ZERO;
3293
0
    break;
3294
5
  case NVPTXISD::Suld2DV4I32Zero:
3295
0
    Opc = NVPTX::SULD_2D_V4I32_ZERO;
3296
0
    break;
3297
5
  case NVPTXISD::Suld2DArrayI8Zero:
3298
0
    Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
3299
0
    break;
3300
5
  case NVPTXISD::Suld2DArrayI16Zero:
3301
0
    Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
3302
0
    break;
3303
5
  case NVPTXISD::Suld2DArrayI32Zero:
3304
0
    Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
3305
0
    break;
3306
5
  case NVPTXISD::Suld2DArrayI64Zero:
3307
0
    Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
3308
0
    break;
3309
5
  case NVPTXISD::Suld2DArrayV2I8Zero:
3310
0
    Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
3311
0
    break;
3312
5
  case NVPTXISD::Suld2DArrayV2I16Zero:
3313
0
    Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
3314
0
    break;
3315
5
  case NVPTXISD::Suld2DArrayV2I32Zero:
3316
0
    Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
3317
0
    break;
3318
5
  case NVPTXISD::Suld2DArrayV2I64Zero:
3319
0
    Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
3320
0
    break;
3321
5
  case NVPTXISD::Suld2DArrayV4I8Zero:
3322
0
    Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
3323
0
    break;
3324
5
  case NVPTXISD::Suld2DArrayV4I16Zero:
3325
0
    Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
3326
0
    break;
3327
5
  case NVPTXISD::Suld2DArrayV4I32Zero:
3328
0
    Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
3329
0
    break;
3330
5
  case NVPTXISD::Suld3DI8Zero:
3331
0
    Opc = NVPTX::SULD_3D_I8_ZERO;
3332
0
    break;
3333
5
  case NVPTXISD::Suld3DI16Zero:
3334
0
    Opc = NVPTX::SULD_3D_I16_ZERO;
3335
0
    break;
3336
5
  case NVPTXISD::Suld3DI32Zero:
3337
0
    Opc = NVPTX::SULD_3D_I32_ZERO;
3338
0
    break;
3339
5
  case NVPTXISD::Suld3DI64Zero:
3340
0
    Opc = NVPTX::SULD_3D_I64_ZERO;
3341
0
    break;
3342
5
  case NVPTXISD::Suld3DV2I8Zero:
3343
0
    Opc = NVPTX::SULD_3D_V2I8_ZERO;
3344
0
    break;
3345
5
  case NVPTXISD::Suld3DV2I16Zero:
3346
0
    Opc = NVPTX::SULD_3D_V2I16_ZERO;
3347
0
    break;
3348
5
  case NVPTXISD::Suld3DV2I32Zero:
3349
0
    Opc = NVPTX::SULD_3D_V2I32_ZERO;
3350
0
    break;
3351
5
  case NVPTXISD::Suld3DV2I64Zero:
3352
0
    Opc = NVPTX::SULD_3D_V2I64_ZERO;
3353
0
    break;
3354
5
  case NVPTXISD::Suld3DV4I8Zero:
3355
0
    Opc = NVPTX::SULD_3D_V4I8_ZERO;
3356
0
    break;
3357
5
  case NVPTXISD::Suld3DV4I16Zero:
3358
0
    Opc = NVPTX::SULD_3D_V4I16_ZERO;
3359
0
    break;
3360
5
  case NVPTXISD::Suld3DV4I32Zero:
3361
0
    Opc = NVPTX::SULD_3D_V4I32_ZERO;
3362
0
    break;
3363
5
  }
3364
5
3365
5
  // Copy over operands
3366
5
  SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
3367
5
  Ops.push_back(N->getOperand(0)); // Move chain to the back.
3368
5
3369
5
  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3370
5
  return true;
3371
5
}
3372
3373
3374
/// SelectBFE - Look for instruction sequences that can be made more efficient
3375
/// by using the 'bfe' (bit-field extract) PTX instruction
3376
240
bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
3377
240
  SDLoc DL(N);
3378
240
  SDValue LHS = N->getOperand(0);
3379
240
  SDValue RHS = N->getOperand(1);
3380
240
  SDValue Len;
3381
240
  SDValue Start;
3382
240
  SDValue Val;
3383
240
  bool IsSigned = false;
3384
240
3385
240
  if (N->getOpcode() == ISD::AND) {
3386
158
    // Canonicalize the operands
3387
158
    // We want 'and %val, %mask'
3388
158
    if (isa<ConstantSDNode>(LHS) && 
!isa<ConstantSDNode>(RHS)0
) {
3389
0
      std::swap(LHS, RHS);
3390
0
    }
3391
158
3392
158
    ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
3393
158
    if (!Mask) {
3394
14
      // We need a constant mask on the RHS of the AND
3395
14
      return false;
3396
14
    }
3397
144
3398
144
    // Extract the mask bits
3399
144
    uint64_t MaskVal = Mask->getZExtValue();
3400
144
    if (!isMask_64(MaskVal)) {
3401
76
      // We *could* handle shifted masks here, but doing so would require an
3402
76
      // 'and' operation to fix up the low-order bits so we would trade
3403
76
      // shr+and for bfe+and, which has the same throughput
3404
76
      return false;
3405
76
    }
3406
68
3407
68
    // How many bits are in our mask?
3408
68
    uint64_t NumBits = countTrailingOnes(MaskVal);
3409
68
    Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3410
68
3411
68
    if (LHS.getOpcode() == ISD::SRL || 
LHS.getOpcode() == ISD::SRA65
) {
3412
3
      // We have a 'srl/and' pair, extract the effective start bit and length
3413
3
      Val = LHS.getNode()->getOperand(0);
3414
3
      Start = LHS.getNode()->getOperand(1);
3415
3
      ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3416
3
      if (StartConst) {
3417
3
        uint64_t StartVal = StartConst->getZExtValue();
3418
3
        // How many "good" bits do we have left?  "good" is defined here as bits
3419
3
        // that exist in the original value, not shifted in.
3420
3
        uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3421
3
        if (NumBits > GoodBits) {
3422
0
          // Do not handle the case where bits have been shifted in. In theory
3423
0
          // we could handle this, but the cost is likely higher than just
3424
0
          // emitting the srl/and pair.
3425
0
          return false;
3426
0
        }
3427
3
        Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
3428
3
      } else {
3429
0
        // Do not handle the case where the shift amount (can be zero if no srl
3430
0
        // was found) is not constant. We could handle this case, but it would
3431
0
        // require run-time logic that would be more expensive than just
3432
0
        // emitting the srl/and pair.
3433
0
        return false;
3434
0
      }
3435
65
    } else {
3436
65
      // Do not handle the case where the LHS of the and is not a shift. While
3437
65
      // it would be trivial to handle this case, it would just transform
3438
65
      // 'and' -> 'bfe', but 'and' has higher-throughput.
3439
65
      return false;
3440
65
    }
3441
82
  } else if (N->getOpcode() == ISD::SRL || 
N->getOpcode() == ISD::SRA27
) {
3442
82
    if (LHS->getOpcode() == ISD::AND) {
3443
20
      ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3444
20
      if (!ShiftCnst) {
3445
0
        // Shift amount must be constant
3446
0
        return false;
3447
0
      }
3448
20
3449
20
      uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3450
20
3451
20
      SDValue AndLHS = LHS->getOperand(0);
3452
20
      SDValue AndRHS = LHS->getOperand(1);
3453
20
3454
20
      // Canonicalize the AND to have the mask on the RHS
3455
20
      if (isa<ConstantSDNode>(AndLHS)) {
3456
0
        std::swap(AndLHS, AndRHS);
3457
0
      }
3458
20
3459
20
      ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3460
20
      if (!MaskCnst) {
3461
0
        // Mask must be constant
3462
0
        return false;
3463
0
      }
3464
20
3465
20
      uint64_t MaskVal = MaskCnst->getZExtValue();
3466
20
      uint64_t NumZeros;
3467
20
      uint64_t NumBits;
3468
20
      if (isMask_64(MaskVal)) {
3469
0
        NumZeros = 0;
3470
0
        // The number of bits in the result bitfield will be the number of
3471
0
        // trailing ones (the AND) minus the number of bits we shift off
3472
0
        NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
3473
20
      } else if (isShiftedMask_64(MaskVal)) {
3474
20
        NumZeros = countTrailingZeros(MaskVal);
3475
20
        unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
3476
20
        // The number of bits in the result bitfield will be the number of
3477
20
        // trailing zeros plus the number of set bits in the mask minus the
3478
20
        // number of bits we shift off
3479
20
        NumBits = NumZeros + NumOnes - ShiftAmt;
3480
20
      } else {
3481
0
        // This is not a mask we can handle
3482
0
        return false;
3483
0
      }
3484
20
3485
20
      if (ShiftAmt < NumZeros) {
3486
20
        // Handling this case would require extra logic that would make this
3487
20
        // transformation non-profitable
3488
20
        return false;
3489
20
      }
3490
0
3491
0
      Val = AndLHS;
3492
0
      Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
3493
0
      Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3494
62
    } else if (LHS->getOpcode() == ISD::SHL) {
3495
4
      // Here, we have a pattern like:
3496
4
      //
3497
4
      // (sra (shl val, NN), MM)
3498
4
      // or
3499
4
      // (srl (shl val, NN), MM)
3500
4
      //
3501
4
      // If MM >= NN, we can efficiently optimize this with bfe
3502
4
      Val = LHS->getOperand(0);
3503
4
3504
4
      SDValue ShlRHS = LHS->getOperand(1);
3505
4
      ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3506
4
      if (!ShlCnst) {
3507
0
        // Shift amount must be constant
3508
0
        return false;
3509
0
      }
3510
4
      uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3511
4
3512
4
      SDValue ShrRHS = RHS;
3513
4
      ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3514
4
      if (!ShrCnst) {
3515
0
        // Shift amount must be constant
3516
0
        return false;
3517
0
      }
3518
4
      uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3519
4
3520
4
      // To avoid extra codegen and be profitable, we need Outer >= Inner
3521
4
      if (OuterShiftAmt < InnerShiftAmt) {
3522
0
        return false;
3523
0
      }
3524
4
3525
4
      // If the outer shift is more than the type size, we have no bitfield to
3526
4
      // extract (since we also check that the inner shift is <= the outer shift
3527
4
      // then this also implies that the inner shift is < the type size)
3528
4
      if (OuterShiftAmt >= Val.getValueSizeInBits()) {
3529
0
        return false;
3530
0
      }
3531
4
3532
4
      Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
3533
4
                                        MVT::i32);
3534
4
      Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
3535
4
                                      DL, MVT::i32);
3536
4
3537
4
      if (N->getOpcode() == ISD::SRA) {
3538
4
        // If we have a arithmetic right shift, we need to use the signed bfe
3539
4
        // variant
3540
4
        IsSigned = true;
3541
4
      }
3542
58
    } else {
3543
58
      // No can do...
3544
58
      return false;
3545
58
    }
3546
0
  } else {
3547
0
    // No can do...
3548
0
    return false;
3549
0
  }
3550
7
3551
7
3552
7
  unsigned Opc;
3553
7
  // For the BFE operations we form here from "and" and "srl", always use the
3554
7
  // unsigned variants.
3555
7
  if (Val.getValueType() == MVT::i32) {
3556
5
    if (IsSigned) {
3557
2
      Opc = NVPTX::BFE_S32rii;
3558
3
    } else {
3559
3
      Opc = NVPTX::BFE_U32rii;
3560
3
    }
3561
5
  } else 
if (2
Val.getValueType() == MVT::i642
) {
3562
2
    if (IsSigned) {
3563
2
      Opc = NVPTX::BFE_S64rii;
3564
2
    } else {
3565
0
      Opc = NVPTX::BFE_U64rii;
3566
0
    }
3567
2
  } else {
3568
0
    // We cannot handle this type
3569
0
    return false;
3570
0
  }
3571
7
3572
7
  SDValue Ops[] = {
3573
7
    Val, Start, Len
3574
7
  };
3575
7
3576
7
  ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
3577
7
  return true;
3578
7
}
3579
3580
// SelectDirectAddr - Match a direct address for DAG.
3581
// A direct address could be a globaladdress or externalsymbol.
3582
4.01k
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3583
4.01k
  // Return true if TGA or ES.
3584
4.01k
  if (N.getOpcode() == ISD::TargetGlobalAddress ||
3585
4.01k
      N.getOpcode() == ISD::TargetExternalSymbol) {
3586
2.70k
    Address = N;
3587
2.70k
    return true;
3588
2.70k
  }
3589
1.31k
  if (N.getOpcode() == NVPTXISD::Wrapper) {
3590
41
    Address = N.getOperand(0);
3591
41
    return true;
3592
41
  }
3593
1.26k
  // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
3594
1.26k
  if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
3595
185
    if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
3596
185
        
CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM173
&&
3597
185
        
CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam4
)
3598
4
      return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
3599
1.26k
  }
3600
1.26k
  return false;
3601
1.26k
}
3602
3603
// symbol+offset
3604
bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3605
892
    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3606
892
  if (Addr.getOpcode() == ISD::ADD) {
3607
270
    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3608
233
      SDValue base = Addr.getOperand(0);
3609
233
      if (SelectDirectAddr(base, Base)) {
3610
84
        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3611
84
                                           mvt);
3612
84
        return true;
3613
84
      }
3614
808
    }
3615
270
  }
3616
808
  return false;
3617
808
}
3618
3619
// symbol+offset
3620
bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3621
349
                                     SDValue &Base, SDValue &Offset) {
3622
349
  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3623
349
}
3624
3625
// symbol+offset
3626
bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3627
543
                                       SDValue &Base, SDValue &Offset) {
3628
543
  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3629
543
}
3630
3631
// register+offset
3632
bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3633
860
    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3634
860
  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3635
98
    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3636
98
    Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
3637
98
    return true;
3638
98
  }
3639
762
  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3640
762
      Addr.getOpcode() == ISD::TargetGlobalAddress)
3641
0
    return false; // direct calls.
3642
762
3643
762
  if (Addr.getOpcode() == ISD::ADD) {
3644
191
    if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3645
6
      return false;
3646
6
    }
3647
185
    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3648
154
      if (FrameIndexSDNode *FIN =
3649
0
              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
3650
0
        // Constant offset from frame ref.
3651
0
        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3652
154
      else
3653
154
        Base = Addr.getOperand(0);
3654
154
      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3655
154
                                         mvt);
3656
154
      return true;
3657
154
    }
3658
602
  }
3659
602
  return false;
3660
602
}
3661
3662
// register+offset
3663
bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3664
346
                                     SDValue &Base, SDValue &Offset) {
3665
346
  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3666
346
}
3667
3668
// register+offset
3669
bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3670
514
                                       SDValue &Base, SDValue &Offset) {
3671
514
  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3672
514
}
3673
3674
bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3675
129
                                                 unsigned int spN) const {
3676
129
  const Value *Src = nullptr;
3677
129
  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3678
129
    if (spN == 0 && 
mN->getMemOperand()->getPseudoValue()38
)
3679
0
      return true;
3680
129
    Src = mN->getMemOperand()->getValue();
3681
129
  }
3682
129
  if (!Src)
3683
0
    return false;
3684
129
  if (auto *PT = dyn_cast<PointerType>(Src->getType()))
3685
129
    return (PT->getAddressSpace() == spN);
3686
0
  return false;
3687
0
}
3688
3689
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3690
/// inline asm expressions.
3691
bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
3692
0
    const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
3693
0
  SDValue Op0, Op1;
3694
0
  switch (ConstraintID) {
3695
0
  default:
3696
0
    return true;
3697
0
  case InlineAsm::Constraint_m: // memory
3698
0
    if (SelectDirectAddr(Op, Op0)) {
3699
0
      OutOps.push_back(Op0);
3700
0
      OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
3701
0
      return false;
3702
0
    }
3703
0
    if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3704
0
      OutOps.push_back(Op0);
3705
0
      OutOps.push_back(Op1);
3706
0
      return false;
3707
0
    }
3708
0
    break;
3709
0
  }
3710
0
  return true;
3711
0
}
3712
3713
/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
3714
/// conversion from \p SrcTy to \p DestTy.
3715
unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
3716
3
                                             bool IsSigned) {
3717
3
  switch (SrcTy.SimpleTy) {
3718
3
  default:
3719
0
    llvm_unreachable("Unhandled source type");
3720
3
  case MVT::i8:
3721
2
    switch (DestTy.SimpleTy) {
3722
2
    default:
3723
0
      llvm_unreachable("Unhandled dest type");
3724
2
    case MVT::i16:
3725
0
      return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3726
2
    case MVT::i32:
3727
2
      return IsSigned ? 
NVPTX::CVT_s32_s81
:
NVPTX::CVT_u32_u81
;
3728
2
    case MVT::i64:
3729
0
      return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3730
0
    }
3731
1
  case MVT::i16:
3732
1
    switch (DestTy.SimpleTy) {
3733
1
    default:
3734
0
      llvm_unreachable("Unhandled dest type");
3735
1
    case MVT::i8:
3736
0
      return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3737
1
    case MVT::i32:
3738
1
      return IsSigned ? NVPTX::CVT_s32_s16 : 
NVPTX::CVT_u32_u160
;
3739
1
    case MVT::i64:
3740
0
      return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3741
0
    }
3742
0
  case MVT::i32:
3743
0
    switch (DestTy.SimpleTy) {
3744
0
    default:
3745
0
      llvm_unreachable("Unhandled dest type");
3746
0
    case MVT::i8:
3747
0
      return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3748
0
    case MVT::i16:
3749
0
      return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3750
0
    case MVT::i64:
3751
0
      return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3752
0
    }
3753
0
  case MVT::i64:
3754
0
    switch (DestTy.SimpleTy) {
3755
0
    default:
3756
0
      llvm_unreachable("Unhandled dest type");
3757
0
    case MVT::i8:
3758
0
      return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3759
0
    case MVT::i16:
3760
0
      return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3761
0
    case MVT::i32:
3762
0
      return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3763
0
    }
3764
3
  }
3765
3
}