Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Subclass of MipsTargetLowering specialized for mips32/64.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "MipsSEISelLowering.h"
14
#include "MipsMachineFunction.h"
15
#include "MipsRegisterInfo.h"
16
#include "MipsSubtarget.h"
17
#include "llvm/ADT/APInt.h"
18
#include "llvm/ADT/ArrayRef.h"
19
#include "llvm/ADT/STLExtras.h"
20
#include "llvm/ADT/SmallVector.h"
21
#include "llvm/ADT/Triple.h"
22
#include "llvm/CodeGen/CallingConvLower.h"
23
#include "llvm/CodeGen/ISDOpcodes.h"
24
#include "llvm/CodeGen/MachineBasicBlock.h"
25
#include "llvm/CodeGen/MachineFunction.h"
26
#include "llvm/CodeGen/MachineInstr.h"
27
#include "llvm/CodeGen/MachineInstrBuilder.h"
28
#include "llvm/CodeGen/MachineMemOperand.h"
29
#include "llvm/CodeGen/MachineRegisterInfo.h"
30
#include "llvm/CodeGen/SelectionDAG.h"
31
#include "llvm/CodeGen/SelectionDAGNodes.h"
32
#include "llvm/CodeGen/TargetInstrInfo.h"
33
#include "llvm/CodeGen/TargetSubtargetInfo.h"
34
#include "llvm/CodeGen/ValueTypes.h"
35
#include "llvm/IR/DebugLoc.h"
36
#include "llvm/IR/Intrinsics.h"
37
#include "llvm/Support/Casting.h"
38
#include "llvm/Support/CommandLine.h"
39
#include "llvm/Support/Debug.h"
40
#include "llvm/Support/ErrorHandling.h"
41
#include "llvm/Support/MachineValueType.h"
42
#include "llvm/Support/MathExtras.h"
43
#include "llvm/Support/raw_ostream.h"
44
#include <algorithm>
45
#include <cassert>
46
#include <cstdint>
47
#include <iterator>
48
#include <utility>
49
50
using namespace llvm;
51
52
#define DEBUG_TYPE "mips-isel"
53
54
static cl::opt<bool>
55
UseMipsTailCalls("mips-tail-calls", cl::Hidden,
56
                    cl::desc("MIPS: permit tail calls."), cl::init(false));
57
58
static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
59
                                   cl::desc("Expand double precision loads and "
60
                                            "stores to their single precision "
61
                                            "counterparts"));
62
63
MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
64
                                           const MipsSubtarget &STI)
65
8.43k
    : MipsTargetLowering(TM, STI) {
66
8.43k
  // Set up the register classes
67
8.43k
  addRegisterClass(MVT::i32, &Mips::GPR32RegClass);
68
8.43k
69
8.43k
  if (Subtarget.isGP64bit())
70
2.96k
    addRegisterClass(MVT::i64, &Mips::GPR64RegClass);
71
8.43k
72
8.43k
  if (Subtarget.hasDSP() || 
Subtarget.hasMSA()8.34k
) {
73
851
    // Expand all truncating stores and extending loads.
74
94.4k
    for (MVT VT0 : MVT::vector_valuetypes()) {
75
10.4M
      for (MVT VT1 : MVT::vector_valuetypes()) {
76
10.4M
        setTruncStoreAction(VT0, VT1, Expand);
77
10.4M
        setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand);
78
10.4M
        setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand);
79
10.4M
        setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand);
80
10.4M
      }
81
94.4k
    }
82
851
  }
83
8.43k
84
8.43k
  if (Subtarget.hasDSP()) {
85
86
    MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
86
86
87
258
    for (unsigned i = 0; i < array_lengthof(VecTys); 
++i172
) {
88
172
      addRegisterClass(VecTys[i], &Mips::DSPRRegClass);
89
172
90
172
      // Expand all builtin opcodes.
91
49.3k
      for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; 
++Opc49.1k
)
92
49.1k
        setOperationAction(Opc, VecTys[i], Expand);
93
172
94
172
      setOperationAction(ISD::ADD, VecTys[i], Legal);
95
172
      setOperationAction(ISD::SUB, VecTys[i], Legal);
96
172
      setOperationAction(ISD::LOAD, VecTys[i], Legal);
97
172
      setOperationAction(ISD::STORE, VecTys[i], Legal);
98
172
      setOperationAction(ISD::BITCAST, VecTys[i], Legal);
99
172
    }
100
86
101
86
    setTargetDAGCombine(ISD::SHL);
102
86
    setTargetDAGCombine(ISD::SRA);
103
86
    setTargetDAGCombine(ISD::SRL);
104
86
    setTargetDAGCombine(ISD::SETCC);
105
86
    setTargetDAGCombine(ISD::VSELECT);
106
86
107
86
    if (Subtarget.hasMips32r2()) {
108
38
      setOperationAction(ISD::ADDC, MVT::i32, Legal);
109
38
      setOperationAction(ISD::ADDE, MVT::i32, Legal);
110
38
    }
111
86
  }
112
8.43k
113
8.43k
  if (Subtarget.hasDSPR2())
114
20
    setOperationAction(ISD::MUL, MVT::v2i16, Legal);
115
8.43k
116
8.43k
  if (Subtarget.hasMSA()) {
117
765
    addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass);
118
765
    addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass);
119
765
    addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass);
120
765
    addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass);
121
765
    addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass);
122
765
    addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass);
123
765
    addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass);
124
765
125
765
    // f16 is a storage-only type, always promote it to f32.
126
765
    addRegisterClass(MVT::f16, &Mips::MSA128HRegClass);
127
765
    setOperationAction(ISD::SETCC, MVT::f16, Promote);
128
765
    setOperationAction(ISD::BR_CC, MVT::f16, Promote);
129
765
    setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
130
765
    setOperationAction(ISD::SELECT, MVT::f16, Promote);
131
765
    setOperationAction(ISD::FADD, MVT::f16, Promote);
132
765
    setOperationAction(ISD::FSUB, MVT::f16, Promote);
133
765
    setOperationAction(ISD::FMUL, MVT::f16, Promote);
134
765
    setOperationAction(ISD::FDIV, MVT::f16, Promote);
135
765
    setOperationAction(ISD::FREM, MVT::f16, Promote);
136
765
    setOperationAction(ISD::FMA, MVT::f16, Promote);
137
765
    setOperationAction(ISD::FNEG, MVT::f16, Promote);
138
765
    setOperationAction(ISD::FABS, MVT::f16, Promote);
139
765
    setOperationAction(ISD::FCEIL, MVT::f16, Promote);
140
765
    setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
141
765
    setOperationAction(ISD::FCOS, MVT::f16, Promote);
142
765
    setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote);
143
765
    setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
144
765
    setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
145
765
    setOperationAction(ISD::FPOW, MVT::f16, Promote);
146
765
    setOperationAction(ISD::FPOWI, MVT::f16, Promote);
147
765
    setOperationAction(ISD::FRINT, MVT::f16, Promote);
148
765
    setOperationAction(ISD::FSIN, MVT::f16, Promote);
149
765
    setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
150
765
    setOperationAction(ISD::FSQRT, MVT::f16, Promote);
151
765
    setOperationAction(ISD::FEXP, MVT::f16, Promote);
152
765
    setOperationAction(ISD::FEXP2, MVT::f16, Promote);
153
765
    setOperationAction(ISD::FLOG, MVT::f16, Promote);
154
765
    setOperationAction(ISD::FLOG2, MVT::f16, Promote);
155
765
    setOperationAction(ISD::FLOG10, MVT::f16, Promote);
156
765
    setOperationAction(ISD::FROUND, MVT::f16, Promote);
157
765
    setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
158
765
    setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
159
765
    setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
160
765
    setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
161
765
    setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
162
765
163
765
    setTargetDAGCombine(ISD::AND);
164
765
    setTargetDAGCombine(ISD::OR);
165
765
    setTargetDAGCombine(ISD::SRA);
166
765
    setTargetDAGCombine(ISD::VSELECT);
167
765
    setTargetDAGCombine(ISD::XOR);
168
765
  }
169
8.43k
170
8.43k
  if (!Subtarget.useSoftFloat()) {
171
8.24k
    addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
172
8.24k
173
8.24k
    // When dealing with single precision only, use libcalls
174
8.24k
    if (!Subtarget.isSingleFloat()) {
175
8.22k
      if (Subtarget.isFP64bit())
176
4.45k
        addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
177
3.76k
      else
178
3.76k
        addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
179
8.22k
    }
180
8.24k
  }
181
8.43k
182
8.43k
  setOperationAction(ISD::SMUL_LOHI,          MVT::i32, Custom);
183
8.43k
  setOperationAction(ISD::UMUL_LOHI,          MVT::i32, Custom);
184
8.43k
  setOperationAction(ISD::MULHS,              MVT::i32, Custom);
185
8.43k
  setOperationAction(ISD::MULHU,              MVT::i32, Custom);
186
8.43k
187
8.43k
  if (Subtarget.hasCnMips())
188
16
    setOperationAction(ISD::MUL,              MVT::i64, Legal);
189
8.41k
  else if (Subtarget.isGP64bit())
190
2.94k
    setOperationAction(ISD::MUL,              MVT::i64, Custom);
191
8.43k
192
8.43k
  if (Subtarget.isGP64bit()) {
193
2.96k
    setOperationAction(ISD::SMUL_LOHI,        MVT::i64, Custom);
194
2.96k
    setOperationAction(ISD::UMUL_LOHI,        MVT::i64, Custom);
195
2.96k
    setOperationAction(ISD::MULHS,            MVT::i64, Custom);
196
2.96k
    setOperationAction(ISD::MULHU,            MVT::i64, Custom);
197
2.96k
    setOperationAction(ISD::SDIVREM,          MVT::i64, Custom);
198
2.96k
    setOperationAction(ISD::UDIVREM,          MVT::i64, Custom);
199
2.96k
  }
200
8.43k
201
8.43k
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
202
8.43k
  setOperationAction(ISD::INTRINSIC_W_CHAIN,  MVT::i64, Custom);
203
8.43k
204
8.43k
  setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
205
8.43k
  setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
206
8.43k
  setOperationAction(ISD::ATOMIC_FENCE,       MVT::Other, Custom);
207
8.43k
  setOperationAction(ISD::LOAD,               MVT::i32, Custom);
208
8.43k
  setOperationAction(ISD::STORE,              MVT::i32, Custom);
209
8.43k
210
8.43k
  setTargetDAGCombine(ISD::MUL);
211
8.43k
212
8.43k
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
213
8.43k
  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
214
8.43k
  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
215
8.43k
216
8.43k
  if (Subtarget.hasMips32r2() && 
!Subtarget.useSoftFloat()4.29k
&&
217
8.43k
      
!Subtarget.hasMips64()4.25k
) {
218
2.86k
    setOperationAction(ISD::BITCAST, MVT::i64, Custom);
219
2.86k
  }
220
8.43k
221
8.43k
  if (NoDPLoadStore) {
222
52
    setOperationAction(ISD::LOAD, MVT::f64, Custom);
223
52
    setOperationAction(ISD::STORE, MVT::f64, Custom);
224
52
  }
225
8.43k
226
8.43k
  if (Subtarget.hasMips32r6()) {
227
1.15k
    // MIPS32r6 replaces the accumulator-based multiplies with a three register
228
1.15k
    // instruction
229
1.15k
    setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
230
1.15k
    setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
231
1.15k
    setOperationAction(ISD::MUL, MVT::i32, Legal);
232
1.15k
    setOperationAction(ISD::MULHS, MVT::i32, Legal);
233
1.15k
    setOperationAction(ISD::MULHU, MVT::i32, Legal);
234
1.15k
235
1.15k
    // MIPS32r6 replaces the accumulator-based division/remainder with separate
236
1.15k
    // three register division and remainder instructions.
237
1.15k
    setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
238
1.15k
    setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
239
1.15k
    setOperationAction(ISD::SDIV, MVT::i32, Legal);
240
1.15k
    setOperationAction(ISD::UDIV, MVT::i32, Legal);
241
1.15k
    setOperationAction(ISD::SREM, MVT::i32, Legal);
242
1.15k
    setOperationAction(ISD::UREM, MVT::i32, Legal);
243
1.15k
244
1.15k
    // MIPS32r6 replaces conditional moves with an equivalent that removes the
245
1.15k
    // need for three GPR read ports.
246
1.15k
    setOperationAction(ISD::SETCC, MVT::i32, Legal);
247
1.15k
    setOperationAction(ISD::SELECT, MVT::i32, Legal);
248
1.15k
    setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
249
1.15k
250
1.15k
    setOperationAction(ISD::SETCC, MVT::f32, Legal);
251
1.15k
    setOperationAction(ISD::SELECT, MVT::f32, Legal);
252
1.15k
    setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
253
1.15k
254
1.15k
    assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6");
255
1.15k
    setOperationAction(ISD::SETCC, MVT::f64, Legal);
256
1.15k
    setOperationAction(ISD::SELECT, MVT::f64, Custom);
257
1.15k
    setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
258
1.15k
259
1.15k
    setOperationAction(ISD::BRCOND, MVT::Other, Legal);
260
1.15k
261
1.15k
    // Floating point > and >= are supported via < and <=
262
1.15k
    setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
263
1.15k
    setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
264
1.15k
    setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
265
1.15k
    setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
266
1.15k
267
1.15k
    setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
268
1.15k
    setCondCodeAction(ISD::SETOGT, MVT::f64, Expand);
269
1.15k
    setCondCodeAction(ISD::SETUGE, MVT::f64, Expand);
270
1.15k
    setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
271
1.15k
  }
272
8.43k
273
8.43k
  if (Subtarget.hasMips64r6()) {
274
424
    // MIPS64r6 replaces the accumulator-based multiplies with a three register
275
424
    // instruction
276
424
    setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
277
424
    setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
278
424
    setOperationAction(ISD::MUL, MVT::i64, Legal);
279
424
    setOperationAction(ISD::MULHS, MVT::i64, Legal);
280
424
    setOperationAction(ISD::MULHU, MVT::i64, Legal);
281
424
282
424
    // MIPS32r6 replaces the accumulator-based division/remainder with separate
283
424
    // three register division and remainder instructions.
284
424
    setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
285
424
    setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
286
424
    setOperationAction(ISD::SDIV, MVT::i64, Legal);
287
424
    setOperationAction(ISD::UDIV, MVT::i64, Legal);
288
424
    setOperationAction(ISD::SREM, MVT::i64, Legal);
289
424
    setOperationAction(ISD::UREM, MVT::i64, Legal);
290
424
291
424
    // MIPS64r6 replaces conditional moves with an equivalent that removes the
292
424
    // need for three GPR read ports.
293
424
    setOperationAction(ISD::SETCC, MVT::i64, Legal);
294
424
    setOperationAction(ISD::SELECT, MVT::i64, Legal);
295
424
    setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
296
424
  }
297
8.43k
298
8.43k
  computeRegisterProperties(Subtarget.getRegisterInfo());
299
8.43k
}
300
301
const MipsTargetLowering *
302
llvm::createMipsSETargetLowering(const MipsTargetMachine &TM,
303
8.43k
                                 const MipsSubtarget &STI) {
304
8.43k
  return new MipsSETargetLowering(TM, STI);
305
8.43k
}
306
307
const TargetRegisterClass *
308
413k
MipsSETargetLowering::getRepRegClassFor(MVT VT) const {
309
413k
  if (VT == MVT::Untyped)
310
1.22k
    return Subtarget.hasDSP() ? 
&Mips::ACC64DSPRegClass172
:
&Mips::ACC64RegClass1.04k
;
311
412k
312
412k
  return TargetLowering::getRepRegClassFor(VT);
313
412k
}
314
315
// Enable MSA support for the given integer type and Register class.
316
void MipsSETargetLowering::
317
3.06k
addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
318
3.06k
  addRegisterClass(Ty, RC);
319
3.06k
320
3.06k
  // Expand all builtin opcodes.
321
878k
  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; 
++Opc875k
)
322
875k
    setOperationAction(Opc, Ty, Expand);
323
3.06k
324
3.06k
  setOperationAction(ISD::BITCAST, Ty, Legal);
325
3.06k
  setOperationAction(ISD::LOAD, Ty, Legal);
326
3.06k
  setOperationAction(ISD::STORE, Ty, Legal);
327
3.06k
  setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom);
328
3.06k
  setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal);
329
3.06k
  setOperationAction(ISD::BUILD_VECTOR, Ty, Custom);
330
3.06k
331
3.06k
  setOperationAction(ISD::ADD, Ty, Legal);
332
3.06k
  setOperationAction(ISD::AND, Ty, Legal);
333
3.06k
  setOperationAction(ISD::CTLZ, Ty, Legal);
334
3.06k
  setOperationAction(ISD::CTPOP, Ty, Legal);
335
3.06k
  setOperationAction(ISD::MUL, Ty, Legal);
336
3.06k
  setOperationAction(ISD::OR, Ty, Legal);
337
3.06k
  setOperationAction(ISD::SDIV, Ty, Legal);
338
3.06k
  setOperationAction(ISD::SREM, Ty, Legal);
339
3.06k
  setOperationAction(ISD::SHL, Ty, Legal);
340
3.06k
  setOperationAction(ISD::SRA, Ty, Legal);
341
3.06k
  setOperationAction(ISD::SRL, Ty, Legal);
342
3.06k
  setOperationAction(ISD::SUB, Ty, Legal);
343
3.06k
  setOperationAction(ISD::SMAX, Ty, Legal);
344
3.06k
  setOperationAction(ISD::SMIN, Ty, Legal);
345
3.06k
  setOperationAction(ISD::UDIV, Ty, Legal);
346
3.06k
  setOperationAction(ISD::UREM, Ty, Legal);
347
3.06k
  setOperationAction(ISD::UMAX, Ty, Legal);
348
3.06k
  setOperationAction(ISD::UMIN, Ty, Legal);
349
3.06k
  setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom);
350
3.06k
  setOperationAction(ISD::VSELECT, Ty, Legal);
351
3.06k
  setOperationAction(ISD::XOR, Ty, Legal);
352
3.06k
353
3.06k
  if (Ty == MVT::v4i32 || 
Ty == MVT::v2i642.29k
) {
354
1.53k
    setOperationAction(ISD::FP_TO_SINT, Ty, Legal);
355
1.53k
    setOperationAction(ISD::FP_TO_UINT, Ty, Legal);
356
1.53k
    setOperationAction(ISD::SINT_TO_FP, Ty, Legal);
357
1.53k
    setOperationAction(ISD::UINT_TO_FP, Ty, Legal);
358
1.53k
  }
359
3.06k
360
3.06k
  setOperationAction(ISD::SETCC, Ty, Legal);
361
3.06k
  setCondCodeAction(ISD::SETNE, Ty, Expand);
362
3.06k
  setCondCodeAction(ISD::SETGE, Ty, Expand);
363
3.06k
  setCondCodeAction(ISD::SETGT, Ty, Expand);
364
3.06k
  setCondCodeAction(ISD::SETUGE, Ty, Expand);
365
3.06k
  setCondCodeAction(ISD::SETUGT, Ty, Expand);
366
3.06k
}
367
368
// Enable MSA support for the given floating-point type and Register class.
369
void MipsSETargetLowering::
370
2.29k
addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
371
2.29k
  addRegisterClass(Ty, RC);
372
2.29k
373
2.29k
  // Expand all builtin opcodes.
374
658k
  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; 
++Opc656k
)
375
656k
    setOperationAction(Opc, Ty, Expand);
376
2.29k
377
2.29k
  setOperationAction(ISD::LOAD, Ty, Legal);
378
2.29k
  setOperationAction(ISD::STORE, Ty, Legal);
379
2.29k
  setOperationAction(ISD::BITCAST, Ty, Legal);
380
2.29k
  setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal);
381
2.29k
  setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal);
382
2.29k
  setOperationAction(ISD::BUILD_VECTOR, Ty, Custom);
383
2.29k
384
2.29k
  if (Ty != MVT::v8f16) {
385
1.53k
    setOperationAction(ISD::FABS,  Ty, Legal);
386
1.53k
    setOperationAction(ISD::FADD,  Ty, Legal);
387
1.53k
    setOperationAction(ISD::FDIV,  Ty, Legal);
388
1.53k
    setOperationAction(ISD::FEXP2, Ty, Legal);
389
1.53k
    setOperationAction(ISD::FLOG2, Ty, Legal);
390
1.53k
    setOperationAction(ISD::FMA,   Ty, Legal);
391
1.53k
    setOperationAction(ISD::FMUL,  Ty, Legal);
392
1.53k
    setOperationAction(ISD::FRINT, Ty, Legal);
393
1.53k
    setOperationAction(ISD::FSQRT, Ty, Legal);
394
1.53k
    setOperationAction(ISD::FSUB,  Ty, Legal);
395
1.53k
    setOperationAction(ISD::VSELECT, Ty, Legal);
396
1.53k
397
1.53k
    setOperationAction(ISD::SETCC, Ty, Legal);
398
1.53k
    setCondCodeAction(ISD::SETOGE, Ty, Expand);
399
1.53k
    setCondCodeAction(ISD::SETOGT, Ty, Expand);
400
1.53k
    setCondCodeAction(ISD::SETUGE, Ty, Expand);
401
1.53k
    setCondCodeAction(ISD::SETUGT, Ty, Expand);
402
1.53k
    setCondCodeAction(ISD::SETGE,  Ty, Expand);
403
1.53k
    setCondCodeAction(ISD::SETGT,  Ty, Expand);
404
1.53k
  }
405
2.29k
}
406
407
1.87k
SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
408
1.87k
  if(!Subtarget.hasMips32r6())
409
1.84k
    return MipsTargetLowering::LowerOperation(Op, DAG);
410
35
411
35
  EVT ResTy = Op->getValueType(0);
412
35
  SDLoc DL(Op);
413
35
414
35
  // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the
415
35
  // floating point register are undefined. Not really an issue as sel.d, which
416
35
  // is produced from an FSELECT node, only looks at bit 0.
417
35
  SDValue Tmp = DAG.getNode(MipsISD::MTC1_D64, DL, MVT::f64, Op->getOperand(0));
418
35
  return DAG.getNode(MipsISD::FSELECT, DL, ResTy, Tmp, Op->getOperand(1),
419
35
                     Op->getOperand(2));
420
35
}
421
422
bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(
423
286
    EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const {
424
286
  MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
425
286
426
286
  if (Subtarget.systemSupportsUnalignedAccess()) {
427
76
    // MIPS32r6/MIPS64r6 is required to support unaligned access. It's
428
76
    // implementation defined whether this is handled by hardware, software, or
429
76
    // a hybrid of the two but it's expected that most implementations will
430
76
    // handle the majority of cases in hardware.
431
76
    if (Fast)
432
20
      *Fast = true;
433
76
    return true;
434
76
  }
435
210
436
210
  switch (SVT) {
437
210
  case MVT::i64:
438
41
  case MVT::i32:
439
41
    if (Fast)
440
41
      *Fast = true;
441
41
    return true;
442
169
  default:
443
169
    return false;
444
210
  }
445
210
}
446
447
SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
448
44.4k
                                             SelectionDAG &DAG) const {
449
44.4k
  switch(Op.getOpcode()) {
450
44.4k
  
case ISD::LOAD: return lowerLOAD(Op, DAG)17.7k
;
451
44.4k
  
case ISD::STORE: return lowerSTORE(Op, DAG)7.67k
;
452
44.4k
  
case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG)0
;
453
44.4k
  
case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG)14
;
454
44.4k
  
case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG)5
;
455
44.4k
  
case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG)0
;
456
44.4k
  
case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG)54
;
457
44.4k
  
case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG)144
;
458
44.4k
  case ISD::UDIVREM:   return lowerMulDiv(Op, MipsISD::DivRemU, true, true,
459
142
                                          DAG);
460
44.4k
  
case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG)2.87k
;
461
44.4k
  
case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG)215
;
462
44.4k
  
case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG)102
;
463
44.4k
  
case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG)554
;
464
44.4k
  
case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG)3.38k
;
465
44.4k
  
case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG)206
;
466
44.4k
  
case ISD::SELECT: return lowerSELECT(Op, DAG)1.87k
;
467
44.4k
  
case ISD::BITCAST: return lowerBITCAST(Op, DAG)10
;
468
9.43k
  }
469
9.43k
470
9.43k
  return MipsTargetLowering::LowerOperation(Op, DAG);
471
9.43k
}
472
473
// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
474
//
475
// Performs the following transformations:
476
// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
477
//   sign/zero-extension is completely overwritten by the new one performed by
478
//   the ISD::AND.
479
// - Removes redundant zero extensions performed by an ISD::AND.
480
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
481
                                 TargetLowering::DAGCombinerInfo &DCI,
482
3.85k
                                 const MipsSubtarget &Subtarget) {
483
3.85k
  if (!Subtarget.hasMSA())
484
3.45k
    return SDValue();
485
397
486
397
  SDValue Op0 = N->getOperand(0);
487
397
  SDValue Op1 = N->getOperand(1);
488
397
  unsigned Op0Opcode = Op0->getOpcode();
489
397
490
397
  // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d)
491
397
  // where $d + 1 == 2^n and n == 32
492
397
  // or    $d + 1 == 2^n and n <= 32 and ZExt
493
397
  // -> (MipsVExtractZExt $a, $b, $c)
494
397
  if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT ||
495
397
      
Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT361
) {
496
36
    ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1);
497
36
498
36
    if (!Mask)
499
0
      return SDValue();
500
36
501
36
    int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2();
502
36
503
36
    if (Log2IfPositive <= 0)
504
0
      return SDValue(); // Mask+1 is not a power of 2
505
36
506
36
    SDValue Op0Op2 = Op0->getOperand(2);
507
36
    EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT();
508
36
    unsigned ExtendTySize = ExtendTy.getSizeInBits();
509
36
    unsigned Log2 = Log2IfPositive;
510
36
511
36
    if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && 
Log2 >= ExtendTySize0
) ||
512
36
        Log2 == ExtendTySize) {
513
24
      SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 };
514
24
      return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0),
515
24
                         Op0->getVTList(),
516
24
                         makeArrayRef(Ops, Op0->getNumOperands()));
517
24
    }
518
373
  }
519
373
520
373
  return SDValue();
521
373
}
522
523
// Determine if the specified node is a constant vector splat.
524
//
525
// Returns true and sets Imm if:
526
// * N is a ISD::BUILD_VECTOR representing a constant splat
527
//
528
// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
529
// differences are that it assumes the MSA has already been checked and the
530
// arbitrary requirement for a maximum of 32-bit integers isn't applied (and
531
// must not be in order for binsri.d to be selectable).
532
96
static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) {
533
96
  BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode());
534
96
535
96
  if (!Node)
536
52
    return false;
537
44
538
44
  APInt SplatValue, SplatUndef;
539
44
  unsigned SplatBitSize;
540
44
  bool HasAnyUndefs;
541
44
542
44
  if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
543
44
                             8, !IsLittleEndian))
544
0
    return false;
545
44
546
44
  Imm = SplatValue;
547
44
548
44
  return true;
549
44
}
550
551
// Test whether the given node is an all-ones build_vector.
552
16
static bool isVectorAllOnes(SDValue N) {
553
16
  // Look through bitcasts. Endianness doesn't matter because we are looking
554
16
  // for an all-ones value.
555
16
  if (N->getOpcode() == ISD::BITCAST)
556
0
    N = N->getOperand(0);
557
16
558
16
  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
559
16
560
16
  if (!BVN)
561
8
    return false;
562
8
563
8
  APInt SplatValue, SplatUndef;
564
8
  unsigned SplatBitSize;
565
8
  bool HasAnyUndefs;
566
8
567
8
  // Endianness doesn't matter in this context because we are looking for
568
8
  // an all-ones value.
569
8
  if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs))
570
8
    return SplatValue.isAllOnesValue();
571
0
572
0
  return false;
573
0
}
574
575
// Test whether N is the bitwise inverse of OfNode.
576
24
static bool isBitwiseInverse(SDValue N, SDValue OfNode) {
577
24
  if (N->getOpcode() != ISD::XOR)
578
16
    return false;
579
8
580
8
  if (isVectorAllOnes(N->getOperand(0)))
581
0
    return N->getOperand(1) == OfNode;
582
8
583
8
  if (isVectorAllOnes(N->getOperand(1)))
584
8
    return N->getOperand(0) == OfNode;
585
0
586
0
  return false;
587
0
}
588
589
// Perform combines where ISD::OR is the root node.
590
//
591
// Performs the following transformations:
592
// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
593
//   where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
594
//   vector type.
595
static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
596
                                TargetLowering::DAGCombinerInfo &DCI,
597
4.22k
                                const MipsSubtarget &Subtarget) {
598
4.22k
  if (!Subtarget.hasMSA())
599
3.24k
    return SDValue();
600
979
601
979
  EVT Ty = N->getValueType(0);
602
979
603
979
  if (!Ty.is128BitVector())
604
806
    return SDValue();
605
173
606
173
  SDValue Op0 = N->getOperand(0);
607
173
  SDValue Op1 = N->getOperand(1);
608
173
609
173
  if (Op0->getOpcode() == ISD::AND && 
Op1->getOpcode() == ISD::AND26
) {
610
26
    SDValue Op0Op0 = Op0->getOperand(0);
611
26
    SDValue Op0Op1 = Op0->getOperand(1);
612
26
    SDValue Op1Op0 = Op1->getOperand(0);
613
26
    SDValue Op1Op1 = Op1->getOperand(1);
614
26
    bool IsLittleEndian = !Subtarget.isLittle();
615
26
616
26
    SDValue IfSet, IfClr, Cond;
617
26
    bool IsConstantMask = false;
618
26
    APInt Mask, InvMask;
619
26
620
26
    // If Op0Op0 is an appropriate mask, try to find it's inverse in either
621
26
    // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
622
26
    // looking.
623
26
    // IfClr will be set if we find a valid match.
624
26
    if (isVSplat(Op0Op0, Mask, IsLittleEndian)) {
625
0
      Cond = Op0Op0;
626
0
      IfSet = Op0Op1;
627
0
628
0
      if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
629
0
          Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
630
0
        IfClr = Op1Op1;
631
0
      else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
632
0
               Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
633
0
        IfClr = Op1Op0;
634
0
635
0
      IsConstantMask = true;
636
0
    }
637
26
638
26
    // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
639
26
    // thing again using this mask.
640
26
    // IfClr will be set if we find a valid match.
641
26
    if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) {
642
22
      Cond = Op0Op1;
643
22
      IfSet = Op0Op0;
644
22
645
22
      if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
646
22
          
Mask.getBitWidth() == InvMask.getBitWidth()0
&&
Mask == ~InvMask0
)
647
0
        IfClr = Op1Op1;
648
22
      else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
649
22
               Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
650
22
        IfClr = Op1Op0;
651
22
652
22
      IsConstantMask = true;
653
22
    }
654
26
655
26
    // If IfClr is not yet set, try looking for a non-constant match.
656
26
    // IfClr will be set if we find a valid match amongst the eight
657
26
    // possibilities.
658
26
    if (!IfClr.getNode()) {
659
4
      if (isBitwiseInverse(Op0Op0, Op1Op0)) {
660
0
        Cond = Op1Op0;
661
0
        IfSet = Op1Op1;
662
0
        IfClr = Op0Op1;
663
4
      } else if (isBitwiseInverse(Op0Op1, Op1Op0)) {
664
0
        Cond = Op1Op0;
665
0
        IfSet = Op1Op1;
666
0
        IfClr = Op0Op0;
667
4
      } else if (isBitwiseInverse(Op0Op0, Op1Op1)) {
668
0
        Cond = Op1Op1;
669
0
        IfSet = Op1Op0;
670
0
        IfClr = Op0Op1;
671
4
      } else if (isBitwiseInverse(Op0Op1, Op1Op1)) {
672
2
        Cond = Op1Op1;
673
2
        IfSet = Op1Op0;
674
2
        IfClr = Op0Op0;
675
2
      } else if (isBitwiseInverse(Op1Op0, Op0Op0)) {
676
0
        Cond = Op0Op0;
677
0
        IfSet = Op0Op1;
678
0
        IfClr = Op1Op1;
679
2
      } else if (isBitwiseInverse(Op1Op1, Op0Op0)) {
680
0
        Cond = Op0Op0;
681
0
        IfSet = Op0Op1;
682
0
        IfClr = Op1Op0;
683
2
      } else if (isBitwiseInverse(Op1Op0, Op0Op1)) {
684
0
        Cond = Op0Op1;
685
0
        IfSet = Op0Op0;
686
0
        IfClr = Op1Op1;
687
2
      } else if (isBitwiseInverse(Op1Op1, Op0Op1)) {
688
2
        Cond = Op0Op1;
689
2
        IfSet = Op0Op0;
690
2
        IfClr = Op1Op0;
691
2
      }
692
4
    }
693
26
694
26
    // At this point, IfClr will be set if we have a valid match.
695
26
    if (!IfClr.getNode())
696
0
      return SDValue();
697
26
698
26
    assert(Cond.getNode() && IfSet.getNode());
699
26
700
26
    // Fold degenerate cases.
701
26
    if (IsConstantMask) {
702
22
      if (Mask.isAllOnesValue())
703
0
        return IfSet;
704
22
      else if (Mask == 0)
705
0
        return IfClr;
706
26
    }
707
26
708
26
    // Transform the DAG into an equivalent VSELECT.
709
26
    return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr);
710
26
  }
711
147
712
147
  return SDValue();
713
147
}
714
715
static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
716
                                               SelectionDAG &DAG,
717
55
                                               const MipsSubtarget &Subtarget) {
718
55
  // Estimate the number of operations the below transform will turn a
719
55
  // constant multiply into. The number is approximately equal to the minimal
720
55
  // number of powers of two that constant can be broken down to by adding
721
55
  // or subtracting them.
722
55
  //
723
55
  // If we have taken more than 12[1] / 8[2] steps to attempt the
724
55
  // optimization for a native sized value, it is more than likely that this
725
55
  // optimization will make things worse.
726
55
  //
727
55
  // [1] MIPS64 requires 6 instructions at most to materialize any constant,
728
55
  //     multiplication requires at least 4 cycles, but another cycle (or two)
729
55
  //     to retrieve the result from the HI/LO registers.
730
55
  //
731
55
  // [2] For MIPS32, more than 8 steps is expensive as the constant could be
732
55
  //     materialized in 2 instructions, multiplication requires at least 4
733
55
  //     cycles, but another cycle (or two) to retrieve the result from the
734
55
  //     HI/LO registers.
735
55
  //
736
55
  // TODO:
737
55
  // - MaxSteps needs to consider the `VT` of the constant for the current
738
55
  //   target.
739
55
  // - Consider to perform this optimization after type legalization.
740
55
  //   That allows to remove a workaround for types not supported natively.
741
55
  // - Take in account `-Os, -Oz` flags because this optimization
742
55
  //   increases code size.
743
55
  unsigned MaxSteps = Subtarget.isABI_O32() ? 
830
:
1225
;
744
55
745
55
  SmallVector<APInt, 16> WorkStack(1, C);
746
55
  unsigned Steps = 0;
747
55
  unsigned BitWidth = C.getBitWidth();
748
55
749
475
  while (!WorkStack.empty()) {
750
452
    APInt Val = WorkStack.pop_back_val();
751
452
752
452
    if (Val == 0 || Val == 1)
753
20
      continue;
754
432
755
432
    if (Steps >= MaxSteps)
756
32
      return false;
757
400
758
400
    if (Val.isPowerOf2()) {
759
49
      ++Steps;
760
49
      continue;
761
49
    }
762
351
763
351
    APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
764
351
    APInt Ceil = Val.isNegative() ? 
APInt(BitWidth, 0)6
765
351
                                  : 
APInt(BitWidth, 1) << C.ceilLogBase2()345
;
766
351
    if ((Val - Floor).ule(Ceil - Val)) {
767
347
      WorkStack.push_back(Floor);
768
347
      WorkStack.push_back(Val - Floor);
769
347
    } else {
770
4
      WorkStack.push_back(Ceil);
771
4
      WorkStack.push_back(Ceil - Val);
772
4
    }
773
351
774
351
    ++Steps;
775
351
  }
776
55
777
55
  // If the value being multiplied is not supported natively, we have to pay
778
55
  // an additional legalization cost, conservatively assume an increase in the
779
55
  // cost of 3 instructions per step. This values for this heuristic were
780
55
  // determined experimentally.
781
55
  unsigned RegisterSize = DAG.getTargetLoweringInfo()
782
23
                              .getRegisterType(*DAG.getContext(), VT)
783
23
                              .getSizeInBits();
784
23
  Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
785
23
  if (Steps > 27)
786
0
    return false;
787
23
788
23
  return true;
789
23
}
790
791
static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
792
109
                            EVT ShiftTy, SelectionDAG &DAG) {
793
109
  // Return 0.
794
109
  if (C == 0)
795
0
    return DAG.getConstant(0, DL, VT);
796
109
797
109
  // Return x.
798
109
  if (C == 1)
799
17
    return X;
800
92
801
92
  // If c is power of 2, return (shl x, log2(c)).
802
92
  if (C.isPowerOf2())
803
49
    return DAG.getNode(ISD::SHL, DL, VT, X,
804
49
                       DAG.getConstant(C.logBase2(), DL, ShiftTy));
805
43
806
43
  unsigned BitWidth = C.getBitWidth();
807
43
  APInt Floor = APInt(BitWidth, 1) << C.logBase2();
808
43
  APInt Ceil = C.isNegative() ? 
APInt(BitWidth, 0)6
:
809
43
                                
APInt(BitWidth, 1) << C.ceilLogBase2()37
;
810
43
811
43
  // If |c - floor_c| <= |c - ceil_c|,
812
43
  // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
813
43
  // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
814
43
  if ((C - Floor).ule(Ceil - C)) {
815
39
    SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG);
816
39
    SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG);
817
39
    return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
818
39
  }
819
4
820
4
  // If |c - floor_c| > |c - ceil_c|,
821
4
  // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
822
4
  SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG);
823
4
  SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG);
824
4
  return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
825
4
}
826
827
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
828
                                 const TargetLowering::DAGCombinerInfo &DCI,
829
                                 const MipsSETargetLowering *TL,
830
594
                                 const MipsSubtarget &Subtarget) {
831
594
  EVT VT = N->getValueType(0);
832
594
833
594
  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
834
55
    if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs(
835
55
                              C->getAPIntValue(), VT, DAG, Subtarget))
836
23
      return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT,
837
23
                          TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT),
838
23
                          DAG);
839
571
840
571
  return SDValue(N, 0);
841
571
}
842
843
static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
844
                                      SelectionDAG &DAG,
845
18
                                      const MipsSubtarget &Subtarget) {
846
18
  // See if this is a vector splat immediate node.
847
18
  APInt SplatValue, SplatUndef;
848
18
  unsigned SplatBitSize;
849
18
  bool HasAnyUndefs;
850
18
  unsigned EltSize = Ty.getScalarSizeInBits();
851
18
  BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
852
18
853
18
  if (!Subtarget.hasDSP())
854
0
    return SDValue();
855
18
856
18
  if (!BV ||
857
18
      !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
858
18
                           EltSize, !Subtarget.isLittle()) ||
859
18
      (SplatBitSize != EltSize) ||
860
18
      
(SplatValue.getZExtValue() >= EltSize)10
)
861
8
    return SDValue();
862
10
863
10
  SDLoc DL(N);
864
10
  return DAG.getNode(Opc, DL, Ty, N->getOperand(0),
865
10
                     DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32));
866
10
}
867
868
static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
869
                                 TargetLowering::DAGCombinerInfo &DCI,
870
2.89k
                                 const MipsSubtarget &Subtarget) {
871
2.89k
  EVT Ty = N->getValueType(0);
872
2.89k
873
2.89k
  if ((Ty != MVT::v2i16) && 
(Ty != MVT::v4i8)2.88k
)
874
2.88k
    return SDValue();
875
12
876
12
  return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget);
877
12
}
878
879
// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
880
// constant splats into MipsISD::SHRA_DSP for DSPr2.
881
//
882
// Performs the following transformations:
883
// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
884
//   sign/zero-extension is completely overwritten by the new one performed by
885
//   the ISD::SRA and ISD::SHL nodes.
886
// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
887
//   sequence.
888
//
889
// See performDSPShiftCombine for more information about the transformation
890
// used for DSPr2.
891
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
892
                                 TargetLowering::DAGCombinerInfo &DCI,
893
179
                                 const MipsSubtarget &Subtarget) {
894
179
  EVT Ty = N->getValueType(0);
895
179
896
179
  if (Subtarget.hasMSA()) {
897
102
    SDValue Op0 = N->getOperand(0);
898
102
    SDValue Op1 = N->getOperand(1);
899
102
900
102
    // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
901
102
    // where $d + sizeof($c) == 32
902
102
    // or    $d + sizeof($c) <= 32 and SExt
903
102
    // -> (MipsVExtractSExt $a, $b, $c)
904
102
    if (Op0->getOpcode() == ISD::SHL && 
Op1 == Op0->getOperand(1)12
) {
905
12
      SDValue Op0Op0 = Op0->getOperand(0);
906
12
      ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1);
907
12
908
12
      if (!ShAmount)
909
12
        return SDValue();
910
0
911
0
      if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT &&
912
0
          Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT)
913
0
        return SDValue();
914
0
915
0
      EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT();
916
0
      unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits();
917
0
918
0
      if (TotalBits == 32 ||
919
0
          (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT &&
920
0
           TotalBits <= 32)) {
921
0
        SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1),
922
0
                          Op0Op0->getOperand(2) };
923
0
        return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0),
924
0
                           Op0Op0->getVTList(),
925
0
                           makeArrayRef(Ops, Op0Op0->getNumOperands()));
926
0
      }
927
167
    }
928
102
  }
929
167
930
167
  if ((Ty != MVT::v2i16) && 
(165
(Ty != MVT::v4i8)165
||
!Subtarget.hasDSPR2()3
))
931
164
    return SDValue();
932
3
933
3
  return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget);
934
3
}
935
936
937
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
938
                                 TargetLowering::DAGCombinerInfo &DCI,
939
20
                                 const MipsSubtarget &Subtarget) {
940
20
  EVT Ty = N->getValueType(0);
941
20
942
20
  if (((Ty != MVT::v2i16) || 
!Subtarget.hasDSPR2()3
) &&
(Ty != MVT::v4i8)19
)
943
17
    return SDValue();
944
3
945
3
  return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget);
946
3
}
947
948
42
static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) {
949
42
  bool IsV216 = (Ty == MVT::v2i16);
950
42
951
42
  switch (CC) {
952
42
  case ISD::SETEQ:
953
8
  case ISD::SETNE:  return true;
954
18
  case ISD::SETLT:
955
18
  case ISD::SETLE:
956
18
  case ISD::SETGT:
957
18
  case ISD::SETGE:  return IsV216;
958
18
  case ISD::SETULT:
959
16
  case ISD::SETULE:
960
16
  case ISD::SETUGT:
961
16
  case ISD::SETUGE: return !IsV216;
962
16
  
default: return false0
;
963
42
  }
964
42
}
965
966
197
static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
967
197
  EVT Ty = N->getValueType(0);
968
197
969
197
  if ((Ty != MVT::v2i16) && 
(Ty != MVT::v4i8)175
)
970
155
    return SDValue();
971
42
972
42
  if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get()))
973
16
    return SDValue();
974
26
975
26
  return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0),
976
26
                     N->getOperand(1), N->getOperand(2));
977
26
}
978
979
346
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {
980
346
  EVT Ty = N->getValueType(0);
981
346
982
346
  if (Ty == MVT::v2i16 || 
Ty == MVT::v4i8317
) {
983
55
    SDValue SetCC = N->getOperand(0);
984
55
985
55
    if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
986
42
      return SDValue();
987
13
988
13
    return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty,
989
13
                       SetCC.getOperand(0), SetCC.getOperand(1),
990
13
                       N->getOperand(1), N->getOperand(2), SetCC.getOperand(2));
991
13
  }
992
291
993
291
  return SDValue();
994
291
}
995
996
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
997
247
                                 const MipsSubtarget &Subtarget) {
998
247
  EVT Ty = N->getValueType(0);
999
247
1000
247
  if (Subtarget.hasMSA() && Ty.is128BitVector() && 
Ty.isInteger()219
) {
1001
219
    // Try the following combines:
1002
219
    //   (xor (or $a, $b), (build_vector allones))
1003
219
    //   (xor (or $a, $b), (bitcast (build_vector allones)))
1004
219
    SDValue Op0 = N->getOperand(0);
1005
219
    SDValue Op1 = N->getOperand(1);
1006
219
    SDValue NotOp;
1007
219
1008
219
    if (ISD::isBuildVectorAllOnes(Op0.getNode()))
1009
0
      NotOp = Op1;
1010
219
    else if (ISD::isBuildVectorAllOnes(Op1.getNode()))
1011
75
      NotOp = Op0;
1012
144
    else
1013
144
      return SDValue();
1014
75
1015
75
    if (NotOp->getOpcode() == ISD::OR)
1016
29
      return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0),
1017
29
                         NotOp->getOperand(1));
1018
74
  }
1019
74
1020
74
  return SDValue();
1021
74
}
1022
1023
SDValue
1024
88.0k
MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
1025
88.0k
  SelectionDAG &DAG = DCI.DAG;
1026
88.0k
  SDValue Val;
1027
88.0k
1028
88.0k
  switch (N->getOpcode()) {
1029
88.0k
  case ISD::AND:
1030
3.85k
    Val = performANDCombine(N, DAG, DCI, Subtarget);
1031
3.85k
    break;
1032
88.0k
  case ISD::OR:
1033
4.22k
    Val = performORCombine(N, DAG, DCI, Subtarget);
1034
4.22k
    break;
1035
88.0k
  case ISD::MUL:
1036
594
    return performMULCombine(N, DAG, DCI, this, Subtarget);
1037
88.0k
  case ISD::SHL:
1038
2.89k
    Val = performSHLCombine(N, DAG, DCI, Subtarget);
1039
2.89k
    break;
1040
88.0k
  case ISD::SRA:
1041
179
    return performSRACombine(N, DAG, DCI, Subtarget);
1042
88.0k
  case ISD::SRL:
1043
20
    return performSRLCombine(N, DAG, DCI, Subtarget);
1044
88.0k
  case ISD::VSELECT:
1045
346
    return performVSELECTCombine(N, DAG);
1046
88.0k
  case ISD::XOR:
1047
247
    Val = performXORCombine(N, DAG, Subtarget);
1048
247
    break;
1049
88.0k
  case ISD::SETCC:
1050
197
    Val = performSETCCCombine(N, DAG);
1051
197
    break;
1052
86.9k
  }
1053
86.9k
1054
86.9k
  if (Val.getNode()) {
1055
109
    LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n";
1056
109
               N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n";
1057
109
               Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n");
1058
109
    return Val;
1059
109
  }
1060
86.8k
1061
86.8k
  return MipsTargetLowering::PerformDAGCombine(N, DCI);
1062
86.8k
}
1063
1064
MachineBasicBlock *
1065
MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1066
1.74k
                                                  MachineBasicBlock *BB) const {
1067
1.74k
  switch (MI.getOpcode()) {
1068
1.74k
  default:
1069
858
    return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
1070
1.74k
  case Mips::BPOSGE32_PSEUDO:
1071
1
    return emitBPOSGE32(MI, BB);
1072
1.74k
  case Mips::SNZ_B_PSEUDO:
1073
2
    return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B);
1074
1.74k
  case Mips::SNZ_H_PSEUDO:
1075
2
    return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H);
1076
1.74k
  case Mips::SNZ_W_PSEUDO:
1077
2
    return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W);
1078
1.74k
  case Mips::SNZ_D_PSEUDO:
1079
2
    return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D);
1080
1.74k
  case Mips::SNZ_V_PSEUDO:
1081
2
    return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V);
1082
1.74k
  case Mips::SZ_B_PSEUDO:
1083
0
    return emitMSACBranchPseudo(MI, BB, Mips::BZ_B);
1084
1.74k
  case Mips::SZ_H_PSEUDO:
1085
0
    return emitMSACBranchPseudo(MI, BB, Mips::BZ_H);
1086
1.74k
  case Mips::SZ_W_PSEUDO:
1087
0
    return emitMSACBranchPseudo(MI, BB, Mips::BZ_W);
1088
1.74k
  case Mips::SZ_D_PSEUDO:
1089
0
    return emitMSACBranchPseudo(MI, BB, Mips::BZ_D);
1090
1.74k
  case Mips::SZ_V_PSEUDO:
1091
2
    return emitMSACBranchPseudo(MI, BB, Mips::BZ_V);
1092
1.74k
  case Mips::COPY_FW_PSEUDO:
1093
38
    return emitCOPY_FW(MI, BB);
1094
1.74k
  case Mips::COPY_FD_PSEUDO:
1095
12
    return emitCOPY_FD(MI, BB);
1096
1.74k
  case Mips::INSERT_FW_PSEUDO:
1097
18
    return emitINSERT_FW(MI, BB);
1098
1.74k
  case Mips::INSERT_FD_PSEUDO:
1099
8
    return emitINSERT_FD(MI, BB);
1100
1.74k
  case Mips::INSERT_B_VIDX_PSEUDO:
1101
6
  case Mips::INSERT_B_VIDX64_PSEUDO:
1102
6
    return emitINSERT_DF_VIDX(MI, BB, 1, false);
1103
6
  case Mips::INSERT_H_VIDX_PSEUDO:
1104
6
  case Mips::INSERT_H_VIDX64_PSEUDO:
1105
6
    return emitINSERT_DF_VIDX(MI, BB, 2, false);
1106
10
  case Mips::INSERT_W_VIDX_PSEUDO:
1107
10
  case Mips::INSERT_W_VIDX64_PSEUDO:
1108
10
    return emitINSERT_DF_VIDX(MI, BB, 4, false);
1109
10
  case Mips::INSERT_D_VIDX_PSEUDO:
1110
4
  case Mips::INSERT_D_VIDX64_PSEUDO:
1111
4
    return emitINSERT_DF_VIDX(MI, BB, 8, false);
1112
6
  case Mips::INSERT_FW_VIDX_PSEUDO:
1113
6
  case Mips::INSERT_FW_VIDX64_PSEUDO:
1114
6
    return emitINSERT_DF_VIDX(MI, BB, 4, true);
1115
6
  case Mips::INSERT_FD_VIDX_PSEUDO:
1116
6
  case Mips::INSERT_FD_VIDX64_PSEUDO:
1117
6
    return emitINSERT_DF_VIDX(MI, BB, 8, true);
1118
12
  case Mips::FILL_FW_PSEUDO:
1119
12
    return emitFILL_FW(MI, BB);
1120
6
  case Mips::FILL_FD_PSEUDO:
1121
6
    return emitFILL_FD(MI, BB);
1122
6
  case Mips::FEXP2_W_1_PSEUDO:
1123
4
    return emitFEXP2_W_1(MI, BB);
1124
6
  case Mips::FEXP2_D_1_PSEUDO:
1125
4
    return emitFEXP2_D_1(MI, BB);
1126
174
  case Mips::ST_F16:
1127
174
    return emitST_F16_PSEUDO(MI, BB);
1128
192
  case Mips::LD_F16:
1129
192
    return emitLD_F16_PSEUDO(MI, BB);
1130
174
  case Mips::MSA_FP_EXTEND_W_PSEUDO:
1131
174
    return emitFPEXTEND_PSEUDO(MI, BB, false);
1132
162
  case Mips::MSA_FP_ROUND_W_PSEUDO:
1133
162
    return emitFPROUND_PSEUDO(MI, BB, false);
1134
18
  case Mips::MSA_FP_EXTEND_D_PSEUDO:
1135
18
    return emitFPEXTEND_PSEUDO(MI, BB, true);
1136
12
  case Mips::MSA_FP_ROUND_D_PSEUDO:
1137
12
    return emitFPROUND_PSEUDO(MI, BB, true);
1138
1.74k
  }
1139
1.74k
}
1140
1141
bool MipsSETargetLowering::isEligibleForTailCallOptimization(
1142
    const CCState &CCInfo, unsigned NextStackOffset,
1143
554
    const MipsFunctionInfo &FI) const {
1144
554
  if (!UseMipsTailCalls)
1145
235
    return false;
1146
319
1147
319
  // Exception has to be cleared with eret.
1148
319
  if (FI.isISR())
1149
0
    return false;
1150
319
1151
319
  // Return false if either the callee or caller has a byval argument.
1152
319
  if (CCInfo.getInRegsParamsCount() > 0 || 
FI.hasByvalArg()295
)
1153
36
    return false;
1154
283
1155
283
  // Return true if the callee's argument area is no larger than the
1156
283
  // caller's.
1157
283
  return NextStackOffset <= FI.getIncomingArgSize();
1158
283
}
1159
1160
void MipsSETargetLowering::
1161
getOpndList(SmallVectorImpl<SDValue> &Ops,
1162
            std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
1163
            bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
1164
            bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
1165
2.12k
            SDValue Chain) const {
1166
2.12k
  Ops.push_back(Callee);
1167
2.12k
  MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
1168
2.12k
                                  InternalLinkage, IsCallReloc, CLI, Callee,
1169
2.12k
                                  Chain);
1170
2.12k
}
1171
1172
17.7k
SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1173
17.7k
  LoadSDNode &Nd = *cast<LoadSDNode>(Op);
1174
17.7k
1175
17.7k
  if (Nd.getMemoryVT() != MVT::f64 || 
!NoDPLoadStore26
)
1176
17.7k
    return MipsTargetLowering::lowerLOAD(Op, DAG);
1177
26
1178
26
  // Replace a double precision load with two i32 loads and a buildpair64.
1179
26
  SDLoc DL(Op);
1180
26
  SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1181
26
  EVT PtrVT = Ptr.getValueType();
1182
26
1183
26
  // i32 load from lower address.
1184
26
  SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(),
1185
26
                           Nd.getAlignment(), Nd.getMemOperand()->getFlags());
1186
26
1187
26
  // i32 load from higher address.
1188
26
  Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
1189
26
  SDValue Hi = DAG.getLoad(
1190
26
      MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(),
1191
26
      std::min(Nd.getAlignment(), 4U), Nd.getMemOperand()->getFlags());
1192
26
1193
26
  if (!Subtarget.isLittle())
1194
10
    std::swap(Lo, Hi);
1195
26
1196
26
  SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1197
26
  SDValue Ops[2] = {BP, Hi.getValue(1)};
1198
26
  return DAG.getMergeValues(Ops, DL);
1199
26
}
1200
1201
7.67k
SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1202
7.67k
  StoreSDNode &Nd = *cast<StoreSDNode>(Op);
1203
7.67k
1204
7.67k
  if (Nd.getMemoryVT() != MVT::f64 || 
!NoDPLoadStore26
)
1205
7.65k
    return MipsTargetLowering::lowerSTORE(Op, DAG);
1206
26
1207
26
  // Replace a double precision store with two extractelement64s and i32 stores.
1208
26
  SDLoc DL(Op);
1209
26
  SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1210
26
  EVT PtrVT = Ptr.getValueType();
1211
26
  SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
1212
26
                           Val, DAG.getConstant(0, DL, MVT::i32));
1213
26
  SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
1214
26
                           Val, DAG.getConstant(1, DL, MVT::i32));
1215
26
1216
26
  if (!Subtarget.isLittle())
1217
10
    std::swap(Lo, Hi);
1218
26
1219
26
  // i32 store to lower address.
1220
26
  Chain =
1221
26
      DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlignment(),
1222
26
                   Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
1223
26
1224
26
  // i32 store to higher address.
1225
26
  Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
1226
26
  return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(),
1227
26
                      std::min(Nd.getAlignment(), 4U),
1228
26
                      Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
1229
26
}
1230
1231
SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op,
1232
10
                                           SelectionDAG &DAG) const {
1233
10
  SDLoc DL(Op);
1234
10
  MVT Src = Op.getOperand(0).getValueType().getSimpleVT();
1235
10
  MVT Dest = Op.getValueType().getSimpleVT();
1236
10
1237
10
  // Bitcast i64 to double.
1238
10
  if (Src == MVT::i64 && 
Dest == MVT::f645
) {
1239
5
    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
1240
5
                             Op.getOperand(0), DAG.getIntPtrConstant(0, DL));
1241
5
    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
1242
5
                             Op.getOperand(0), DAG.getIntPtrConstant(1, DL));
1243
5
    return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1244
5
  }
1245
5
1246
5
  // Bitcast double to i64.
1247
5
  if (Src == MVT::f64 && Dest == MVT::i64) {
1248
5
    SDValue Lo =
1249
5
        DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
1250
5
                    DAG.getConstant(0, DL, MVT::i32));
1251
5
    SDValue Hi =
1252
5
        DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
1253
5
                    DAG.getConstant(1, DL, MVT::i32));
1254
5
    return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
1255
5
  }
1256
0
1257
0
  // Skip other cases of bitcast and use default lowering.
1258
0
  return SDValue();
1259
0
}
1260
1261
SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
1262
                                          bool HasLo, bool HasHi,
1263
359
                                          SelectionDAG &DAG) const {
1264
359
  // MIPS32r6/MIPS64r6 removed accumulator based multiplies.
1265
359
  assert(!Subtarget.hasMips32r6());
1266
359
1267
359
  EVT Ty = Op.getOperand(0).getValueType();
1268
359
  SDLoc DL(Op);
1269
359
  SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped,
1270
359
                             Op.getOperand(0), Op.getOperand(1));
1271
359
  SDValue Lo, Hi;
1272
359
1273
359
  if (HasLo)
1274
354
    Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult);
1275
359
  if (HasHi)
1276
305
    Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult);
1277
359
1278
359
  if (!HasLo || 
!HasHi354
)
1279
59
    return HasLo ? 
Lo54
:
Hi5
;
1280
300
1281
300
  SDValue Vals[] = { Lo, Hi };
1282
300
  return DAG.getMergeValues(Vals, DL);
1283
300
}
1284
1285
43
static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) {
1286
43
  SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
1287
43
                             DAG.getConstant(0, DL, MVT::i32));
1288
43
  SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
1289
43
                             DAG.getConstant(1, DL, MVT::i32));
1290
43
  return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi);
1291
43
}
1292
1293
33
static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) {
1294
33
  SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op);
1295
33
  SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op);
1296
33
  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
1297
33
}
1298
1299
// This function expands mips intrinsic nodes which have 64-bit input operands
1300
// or output values.
1301
//
1302
// out64 = intrinsic-node in64
1303
// =>
1304
// lo = copy (extract-element (in64, 0))
1305
// hi = copy (extract-element (in64, 1))
1306
// mips-specific-node
1307
// v0 = copy lo
1308
// v1 = copy hi
1309
// out64 = merge-values (v0, v1)
1310
//
1311
45
static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
1312
45
  SDLoc DL(Op);
1313
45
  bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
1314
45
  SmallVector<SDValue, 3> Ops;
1315
45
  unsigned OpNo = 0;
1316
45
1317
45
  // See if Op has a chain input.
1318
45
  if (HasChainIn)
1319
26
    Ops.push_back(Op->getOperand(OpNo++));
1320
45
1321
45
  // The next operand is the intrinsic opcode.
1322
45
  assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
1323
45
1324
45
  // See if the next operand has type i64.
1325
45
  SDValue Opnd = Op->getOperand(++OpNo), In64;
1326
45
1327
45
  if (Opnd.getValueType() == MVT::i64)
1328
43
    In64 = initAccumulator(Opnd, DL, DAG);
1329
2
  else
1330
2
    Ops.push_back(Opnd);
1331
45
1332
45
  // Push the remaining operands.
1333
118
  for (++OpNo ; OpNo < Op->getNumOperands(); 
++OpNo73
)
1334
73
    Ops.push_back(Op->getOperand(OpNo));
1335
45
1336
45
  // Add In64 to the end of the list.
1337
45
  if (In64.getNode())
1338
43
    Ops.push_back(In64);
1339
45
1340
45
  // Scan output.
1341
45
  SmallVector<EVT, 2> ResTys;
1342
45
1343
45
  for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end();
1344
116
       I != E; 
++I71
)
1345
71
    ResTys.push_back((*I == MVT::i64) ? 
MVT::Untyped33
:
*I38
);
1346
45
1347
45
  // Create node.
1348
45
  SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops);
1349
45
  SDValue Out = (ResTys[0] == MVT::Untyped) ? 
extractLOHI(Val, DL, DAG)33
:
Val12
;
1350
45
1351
45
  if (!HasChainIn)
1352
19
    return Out;
1353
26
1354
26
  assert(Val->getValueType(1) == MVT::Other);
1355
26
  SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
1356
26
  return DAG.getMergeValues(Vals, DL);
1357
26
}
1358
1359
// Lower an MSA copy intrinsic into the specified SelectionDAG node
1360
58
static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
1361
58
  SDLoc DL(Op);
1362
58
  SDValue Vec = Op->getOperand(1);
1363
58
  SDValue Idx = Op->getOperand(2);
1364
58
  EVT ResTy = Op->getValueType(0);
1365
58
  EVT EltTy = Vec->getValueType(0).getVectorElementType();
1366
58
1367
58
  SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx,
1368
58
                               DAG.getValueType(EltTy));
1369
58
1370
58
  return Result;
1371
58
}
1372
1373
12
static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
1374
12
  EVT ResVecTy = Op->getValueType(0);
1375
12
  EVT ViaVecTy = ResVecTy;
1376
12
  bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1377
12
  SDLoc DL(Op);
1378
12
1379
12
  // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
1380
12
  // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
1381
12
  // lanes.
1382
12
  SDValue LaneA = Op->getOperand(OpNr);
1383
12
  SDValue LaneB;
1384
12
1385
12
  if (ResVecTy == MVT::v2i64) {
1386
6
    // In case of the index being passed as an immediate value, set the upper
1387
6
    // lane to 0 so that the splati.d instruction can be matched.
1388
6
    if (isa<ConstantSDNode>(LaneA))
1389
2
      LaneB = DAG.getConstant(0, DL, MVT::i32);
1390
4
    // Having the index passed in a register, set the upper lane to the same
1391
4
    // value as the lower - this results in the BUILD_VECTOR node not being
1392
4
    // expanded through stack. This way we are able to pattern match the set of
1393
4
    // nodes created here to splat.d.
1394
4
    else
1395
4
      LaneB = LaneA;
1396
6
    ViaVecTy = MVT::v4i32;
1397
6
    if(BigEndian)
1398
3
      std::swap(LaneA, LaneB);
1399
6
  } else
1400
6
    LaneB = LaneA;
1401
12
1402
12
  SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
1403
12
                      LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB };
1404
12
1405
12
  SDValue Result = DAG.getBuildVector(
1406
12
      ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1407
12
1408
12
  if (ViaVecTy != ResVecTy) {
1409
6
    SDValue One = DAG.getConstant(1, DL, ViaVecTy);
1410
6
    Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy,
1411
6
                         DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One));
1412
6
  }
1413
12
1414
12
  return Result;
1415
12
}
1416
1417
static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG,
1418
411
                                bool IsSigned = false) {
1419
411
  auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
1420
411
  return DAG.getConstant(
1421
411
      APInt(Op->getValueType(0).getScalarType().getSizeInBits(),
1422
411
            IsSigned ? 
CImm->getSExtValue()156
:
CImm->getZExtValue()255
, IsSigned),
1423
411
      SDLoc(Op), Op->getValueType(0));
1424
411
}
1425
1426
static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
1427
150
                                   bool BigEndian, SelectionDAG &DAG) {
1428
150
  EVT ViaVecTy = VecTy;
1429
150
  SDValue SplatValueA = SplatValue;
1430
150
  SDValue SplatValueB = SplatValue;
1431
150
  SDLoc DL(SplatValue);
1432
150
1433
150
  if (VecTy == MVT::v2i64) {
1434
24
    // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
1435
24
    ViaVecTy = MVT::v4i32;
1436
24
1437
24
    SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue);
1438
24
    SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue,
1439
24
                              DAG.getConstant(32, DL, MVT::i32));
1440
24
    SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB);
1441
24
  }
1442
150
1443
150
  // We currently hold the parts in little endian order. Swap them if
1444
150
  // necessary.
1445
150
  if (BigEndian)
1446
84
    std::swap(SplatValueA, SplatValueB);
1447
150
1448
150
  SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1449
150
                      SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1450
150
                      SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1451
150
                      SplatValueA, SplatValueB, SplatValueA, SplatValueB };
1452
150
1453
150
  SDValue Result = DAG.getBuildVector(
1454
150
      ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1455
150
1456
150
  if (VecTy != ViaVecTy)
1457
24
    Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result);
1458
150
1459
150
  return Result;
1460
150
}
1461
1462
static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
1463
                                        unsigned Opc, SDValue Imm,
1464
40
                                        bool BigEndian) {
1465
40
  EVT VecTy = Op->getValueType(0);
1466
40
  SDValue Exp2Imm;
1467
40
  SDLoc DL(Op);
1468
40
1469
40
  // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
1470
40
  // here for now.
1471
40
  if (VecTy == MVT::v2i64) {
1472
10
    if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) {
1473
10
      APInt BitImm = APInt(64, 1) << CImm->getAPIntValue();
1474
10
1475
10
      SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL,
1476
10
                                           MVT::i32);
1477
10
      SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32);
1478
10
1479
10
      if (BigEndian)
1480
8
        std::swap(BitImmLoOp, BitImmHiOp);
1481
10
1482
10
      Exp2Imm = DAG.getNode(
1483
10
          ISD::BITCAST, DL, MVT::v2i64,
1484
10
          DAG.getBuildVector(MVT::v4i32, DL,
1485
10
                             {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp}));
1486
10
    }
1487
10
  }
1488
40
1489
40
  if (!Exp2Imm.getNode()) {
1490
30
    // We couldnt constant fold, do a vector shift instead
1491
30
1492
30
    // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
1493
30
    // only values 0-63 are valid.
1494
30
    if (VecTy == MVT::v2i64)
1495
0
      Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm);
1496
30
1497
30
    Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG);
1498
30
1499
30
    Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy),
1500
30
                          Exp2Imm);
1501
30
  }
1502
40
1503
40
  return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm);
1504
40
}
1505
1506
120
static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) {
1507
120
  SDLoc DL(Op);
1508
120
  EVT ResTy = Op->getValueType(0);
1509
120
  SDValue Vec = Op->getOperand(2);
1510
120
  bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1511
120
  MVT ResEltTy = ResTy == MVT::v2i64 ? 
MVT::i6424
:
MVT::i3296
;
1512
120
  SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1,
1513
120
                                       DL, ResEltTy);
1514
120
  SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG);
1515
120
1516
120
  return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec);
1517
120
}
1518
1519
20
static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) {
1520
20
  EVT ResTy = Op->getValueType(0);
1521
20
  SDLoc DL(Op);
1522
20
  SDValue One = DAG.getConstant(1, DL, ResTy);
1523
20
  SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG));
1524
20
1525
20
  return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1),
1526
20
                     DAG.getNOT(DL, Bit, ResTy));
1527
20
}
1528
1529
20
static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) {
1530
20
  SDLoc DL(Op);
1531
20
  EVT ResTy = Op->getValueType(0);
1532
20
  APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1)
1533
20
                 << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue();
1534
20
  SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy);
1535
20
1536
20
  return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask);
1537
20
}
1538
1539
SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1540
2.87k
                                                      SelectionDAG &DAG) const {
1541
2.87k
  SDLoc DL(Op);
1542
2.87k
  unsigned Intrinsic = cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue();
1543
2.87k
  switch (Intrinsic) {
1544
2.87k
  default:
1545
840
    return SDValue();
1546
2.87k
  case Intrinsic::mips_shilo:
1547
2
    return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
1548
2.87k
  case Intrinsic::mips_dpau_h_qbl:
1549
1
    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
1550
2.87k
  case Intrinsic::mips_dpau_h_qbr:
1551
1
    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
1552
2.87k
  case Intrinsic::mips_dpsu_h_qbl:
1553
1
    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
1554
2.87k
  case Intrinsic::mips_dpsu_h_qbr:
1555
1
    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
1556
2.87k
  case Intrinsic::mips_dpa_w_ph:
1557
1
    return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
1558
2.87k
  case Intrinsic::mips_dps_w_ph:
1559
1
    return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
1560
2.87k
  case Intrinsic::mips_dpax_w_ph:
1561
1
    return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
1562
2.87k
  case Intrinsic::mips_dpsx_w_ph:
1563
1
    return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
1564
2.87k
  case Intrinsic::mips_mulsa_w_ph:
1565
1
    return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
1566
2.87k
  case Intrinsic::mips_mult:
1567
1
    return lowerDSPIntr(Op, DAG, MipsISD::Mult);
1568
2.87k
  case Intrinsic::mips_multu:
1569
1
    return lowerDSPIntr(Op, DAG, MipsISD::Multu);
1570
2.87k
  case Intrinsic::mips_madd:
1571
1
    return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
1572
2.87k
  case Intrinsic::mips_maddu:
1573
3
    return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
1574
2.87k
  case Intrinsic::mips_msub:
1575
1
    return lowerDSPIntr(Op, DAG, MipsISD::MSub);
1576
2.87k
  case Intrinsic::mips_msubu:
1577
1
    return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
1578
2.87k
  case Intrinsic::mips_addv_b:
1579
648
  case Intrinsic::mips_addv_h:
1580
648
  case Intrinsic::mips_addv_w:
1581
648
  case Intrinsic::mips_addv_d:
1582
648
    return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
1583
648
                       Op->getOperand(2));
1584
648
  case Intrinsic::mips_addvi_b:
1585
20
  case Intrinsic::mips_addvi_h:
1586
20
  case Intrinsic::mips_addvi_w:
1587
20
  case Intrinsic::mips_addvi_d:
1588
20
    return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
1589
20
                       lowerMSASplatImm(Op, 2, DAG));
1590
20
  case Intrinsic::mips_and_v:
1591
8
    return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
1592
8
                       Op->getOperand(2));
1593
20
  case Intrinsic::mips_andi_b:
1594
5
    return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
1595
5
                       lowerMSASplatImm(Op, 2, DAG));
1596
20
  case Intrinsic::mips_bclr_b:
1597
20
  case Intrinsic::mips_bclr_h:
1598
20
  case Intrinsic::mips_bclr_w:
1599
20
  case Intrinsic::mips_bclr_d:
1600
20
    return lowerMSABitClear(Op, DAG);
1601
20
  case Intrinsic::mips_bclri_b:
1602
20
  case Intrinsic::mips_bclri_h:
1603
20
  case Intrinsic::mips_bclri_w:
1604
20
  case Intrinsic::mips_bclri_d:
1605
20
    return lowerMSABitClearImm(Op, DAG);
1606
21
  case Intrinsic::mips_binsli_b:
1607
21
  case Intrinsic::mips_binsli_h:
1608
21
  case Intrinsic::mips_binsli_w:
1609
21
  case Intrinsic::mips_binsli_d: {
1610
21
    // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear)
1611
21
    EVT VecTy = Op->getValueType(0);
1612
21
    EVT EltTy = VecTy.getVectorElementType();
1613
21
    if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
1614
1
      report_fatal_error("Immediate out of range");
1615
20
    APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(),
1616
20
                                       Op->getConstantOperandVal(3) + 1);
1617
20
    return DAG.getNode(ISD::VSELECT, DL, VecTy,
1618
20
                       DAG.getConstant(Mask, DL, VecTy, true),
1619
20
                       Op->getOperand(2), Op->getOperand(1));
1620
20
  }
1621
20
  case Intrinsic::mips_binsri_b:
1622
20
  case Intrinsic::mips_binsri_h:
1623
20
  case Intrinsic::mips_binsri_w:
1624
20
  case Intrinsic::mips_binsri_d: {
1625
20
    // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear)
1626
20
    EVT VecTy = Op->getValueType(0);
1627
20
    EVT EltTy = VecTy.getVectorElementType();
1628
20
    if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
1629
0
      report_fatal_error("Immediate out of range");
1630
20
    APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(),
1631
20
                                      Op->getConstantOperandVal(3) + 1);
1632
20
    return DAG.getNode(ISD::VSELECT, DL, VecTy,
1633
20
                       DAG.getConstant(Mask, DL, VecTy, true),
1634
20
                       Op->getOperand(2), Op->getOperand(1));
1635
20
  }
1636
20
  case Intrinsic::mips_bmnz_v:
1637
8
    return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
1638
8
                       Op->getOperand(2), Op->getOperand(1));
1639
20
  case Intrinsic::mips_bmnzi_b:
1640
8
    return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1641
8
                       lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2),
1642
8
                       Op->getOperand(1));
1643
20
  case Intrinsic::mips_bmz_v:
1644
8
    return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
1645
8
                       Op->getOperand(1), Op->getOperand(2));
1646
20
  case Intrinsic::mips_bmzi_b:
1647
8
    return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1648
8
                       lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1),
1649
8
                       Op->getOperand(2));
1650
20
  case Intrinsic::mips_bneg_b:
1651
20
  case Intrinsic::mips_bneg_h:
1652
20
  case Intrinsic::mips_bneg_w:
1653
20
  case Intrinsic::mips_bneg_d: {
1654
20
    EVT VecTy = Op->getValueType(0);
1655
20
    SDValue One = DAG.getConstant(1, DL, VecTy);
1656
20
1657
20
    return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1),
1658
20
                       DAG.getNode(ISD::SHL, DL, VecTy, One,
1659
20
                                   truncateVecElts(Op, DAG)));
1660
20
  }
1661
20
  case Intrinsic::mips_bnegi_b:
1662
20
  case Intrinsic::mips_bnegi_h:
1663
20
  case Intrinsic::mips_bnegi_w:
1664
20
  case Intrinsic::mips_bnegi_d:
1665
20
    return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2),
1666
20
                                    !Subtarget.isLittle());
1667
20
  case Intrinsic::mips_bnz_b:
1668
8
  case Intrinsic::mips_bnz_h:
1669
8
  case Intrinsic::mips_bnz_w:
1670
8
  case Intrinsic::mips_bnz_d:
1671
8
    return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0),
1672
8
                       Op->getOperand(1));
1673
8
  case Intrinsic::mips_bnz_v:
1674
2
    return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0),
1675
2
                       Op->getOperand(1));
1676
8
  case Intrinsic::mips_bsel_v:
1677
8
    // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1678
8
    return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1679
8
                       Op->getOperand(1), Op->getOperand(3),
1680
8
                       Op->getOperand(2));
1681
8
  case Intrinsic::mips_bseli_b:
1682
5
    // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1683
5
    return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1684
5
                       Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG),
1685
5
                       Op->getOperand(2));
1686
20
  case Intrinsic::mips_bset_b:
1687
20
  case Intrinsic::mips_bset_h:
1688
20
  case Intrinsic::mips_bset_w:
1689
20
  case Intrinsic::mips_bset_d: {
1690
20
    EVT VecTy = Op->getValueType(0);
1691
20
    SDValue One = DAG.getConstant(1, DL, VecTy);
1692
20
1693
20
    return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1),
1694
20
                       DAG.getNode(ISD::SHL, DL, VecTy, One,
1695
20
                                   truncateVecElts(Op, DAG)));
1696
20
  }
1697
20
  case Intrinsic::mips_bseti_b:
1698
20
  case Intrinsic::mips_bseti_h:
1699
20
  case Intrinsic::mips_bseti_w:
1700
20
  case Intrinsic::mips_bseti_d:
1701
20
    return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2),
1702
20
                                    !Subtarget.isLittle());
1703
20
  case Intrinsic::mips_bz_b:
1704
0
  case Intrinsic::mips_bz_h:
1705
0
  case Intrinsic::mips_bz_w:
1706
0
  case Intrinsic::mips_bz_d:
1707
0
    return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0),
1708
0
                       Op->getOperand(1));
1709
2
  case Intrinsic::mips_bz_v:
1710
2
    return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0),
1711
2
                       Op->getOperand(1));
1712
8
  case Intrinsic::mips_ceq_b:
1713
8
  case Intrinsic::mips_ceq_h:
1714
8
  case Intrinsic::mips_ceq_w:
1715
8
  case Intrinsic::mips_ceq_d:
1716
8
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1717
8
                        Op->getOperand(2), ISD::SETEQ);
1718
16
  case Intrinsic::mips_ceqi_b:
1719
16
  case Intrinsic::mips_ceqi_h:
1720
16
  case Intrinsic::mips_ceqi_w:
1721
16
  case Intrinsic::mips_ceqi_d:
1722
16
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1723
16
                        lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ);
1724
16
  case Intrinsic::mips_cle_s_b:
1725
8
  case Intrinsic::mips_cle_s_h:
1726
8
  case Intrinsic::mips_cle_s_w:
1727
8
  case Intrinsic::mips_cle_s_d:
1728
8
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1729
8
                        Op->getOperand(2), ISD::SETLE);
1730
28
  case Intrinsic::mips_clei_s_b:
1731
28
  case Intrinsic::mips_clei_s_h:
1732
28
  case Intrinsic::mips_clei_s_w:
1733
28
  case Intrinsic::mips_clei_s_d:
1734
28
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1735
28
                        lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE);
1736
28
  case Intrinsic::mips_cle_u_b:
1737
8
  case Intrinsic::mips_cle_u_h:
1738
8
  case Intrinsic::mips_cle_u_w:
1739
8
  case Intrinsic::mips_cle_u_d:
1740
8
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1741
8
                        Op->getOperand(2), ISD::SETULE);
1742
20
  case Intrinsic::mips_clei_u_b:
1743
20
  case Intrinsic::mips_clei_u_h:
1744
20
  case Intrinsic::mips_clei_u_w:
1745
20
  case Intrinsic::mips_clei_u_d:
1746
20
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1747
20
                        lowerMSASplatImm(Op, 2, DAG), ISD::SETULE);
1748
20
  case Intrinsic::mips_clt_s_b:
1749
8
  case Intrinsic::mips_clt_s_h:
1750
8
  case Intrinsic::mips_clt_s_w:
1751
8
  case Intrinsic::mips_clt_s_d:
1752
8
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1753
8
                        Op->getOperand(2), ISD::SETLT);
1754
28
  case Intrinsic::mips_clti_s_b:
1755
28
  case Intrinsic::mips_clti_s_h:
1756
28
  case Intrinsic::mips_clti_s_w:
1757
28
  case Intrinsic::mips_clti_s_d:
1758
28
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1759
28
                        lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT);
1760
28
  case Intrinsic::mips_clt_u_b:
1761
8
  case Intrinsic::mips_clt_u_h:
1762
8
  case Intrinsic::mips_clt_u_w:
1763
8
  case Intrinsic::mips_clt_u_d:
1764
8
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1765
8
                        Op->getOperand(2), ISD::SETULT);
1766
20
  case Intrinsic::mips_clti_u_b:
1767
20
  case Intrinsic::mips_clti_u_h:
1768
20
  case Intrinsic::mips_clti_u_w:
1769
20
  case Intrinsic::mips_clti_u_d:
1770
20
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1771
20
                        lowerMSASplatImm(Op, 2, DAG), ISD::SETULT);
1772
29
  case Intrinsic::mips_copy_s_b:
1773
29
  case Intrinsic::mips_copy_s_h:
1774
29
  case Intrinsic::mips_copy_s_w:
1775
29
    return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
1776
29
  case Intrinsic::mips_copy_s_d:
1777
7
    if (Subtarget.hasMips64())
1778
4
      // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
1779
4
      return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
1780
3
    else {
1781
3
      // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1782
3
      // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1783
3
      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
1784
3
                         Op->getValueType(0), Op->getOperand(1),
1785
3
                         Op->getOperand(2));
1786
3
    }
1787
21
  case Intrinsic::mips_copy_u_b:
1788
21
  case Intrinsic::mips_copy_u_h:
1789
21
  case Intrinsic::mips_copy_u_w:
1790
21
    return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
1791
21
  case Intrinsic::mips_copy_u_d:
1792
7
    if (Subtarget.hasMips64())
1793
4
      // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
1794
4
      return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
1795
3
    else {
1796
3
      // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1797
3
      // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1798
3
      // Note: When i64 is illegal, this results in copy_s.w instructions
1799
3
      // instead of copy_u.w instructions. This makes no difference to the
1800
3
      // behaviour since i64 is only illegal when the register file is 32-bit.
1801
3
      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
1802
3
                         Op->getValueType(0), Op->getOperand(1),
1803
3
                         Op->getOperand(2));
1804
3
    }
1805
8
  case Intrinsic::mips_div_s_b:
1806
8
  case Intrinsic::mips_div_s_h:
1807
8
  case Intrinsic::mips_div_s_w:
1808
8
  case Intrinsic::mips_div_s_d:
1809
8
    return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1),
1810
8
                       Op->getOperand(2));
1811
8
  case Intrinsic::mips_div_u_b:
1812
8
  case Intrinsic::mips_div_u_h:
1813
8
  case Intrinsic::mips_div_u_w:
1814
8
  case Intrinsic::mips_div_u_d:
1815
8
    return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1),
1816
8
                       Op->getOperand(2));
1817
63
  case Intrinsic::mips_fadd_w:
1818
63
  case Intrinsic::mips_fadd_d:
1819
63
    // TODO: If intrinsics have fast-math-flags, propagate them.
1820
63
    return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1),
1821
63
                       Op->getOperand(2));
1822
63
  // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
1823
63
  case Intrinsic::mips_fceq_w:
1824
4
  case Intrinsic::mips_fceq_d:
1825
4
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1826
4
                        Op->getOperand(2), ISD::SETOEQ);
1827
4
  case Intrinsic::mips_fcle_w:
1828
4
  case Intrinsic::mips_fcle_d:
1829
4
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1830
4
                        Op->getOperand(2), ISD::SETOLE);
1831
4
  case Intrinsic::mips_fclt_w:
1832
4
  case Intrinsic::mips_fclt_d:
1833
4
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1834
4
                        Op->getOperand(2), ISD::SETOLT);
1835
4
  case Intrinsic::mips_fcne_w:
1836
4
  case Intrinsic::mips_fcne_d:
1837
4
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1838
4
                        Op->getOperand(2), ISD::SETONE);
1839
4
  case Intrinsic::mips_fcor_w:
1840
4
  case Intrinsic::mips_fcor_d:
1841
4
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1842
4
                        Op->getOperand(2), ISD::SETO);
1843
4
  case Intrinsic::mips_fcueq_w:
1844
4
  case Intrinsic::mips_fcueq_d:
1845
4
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1846
4
                        Op->getOperand(2), ISD::SETUEQ);
1847
4
  case Intrinsic::mips_fcule_w:
1848
4
  case Intrinsic::mips_fcule_d:
1849
4
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1850
4
                        Op->getOperand(2), ISD::SETULE);
1851
4
  case Intrinsic::mips_fcult_w:
1852
4
  case Intrinsic::mips_fcult_d:
1853
4
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1854
4
                        Op->getOperand(2), ISD::SETULT);
1855
4
  case Intrinsic::mips_fcun_w:
1856
4
  case Intrinsic::mips_fcun_d:
1857
4
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1858
4
                        Op->getOperand(2), ISD::SETUO);
1859
4
  case Intrinsic::mips_fcune_w:
1860
4
  case Intrinsic::mips_fcune_d:
1861
4
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1862
4
                        Op->getOperand(2), ISD::SETUNE);
1863
4
  case Intrinsic::mips_fdiv_w:
1864
4
  case Intrinsic::mips_fdiv_d:
1865
4
    // TODO: If intrinsics have fast-math-flags, propagate them.
1866
4
    return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1),
1867
4
                       Op->getOperand(2));
1868
4
  case Intrinsic::mips_ffint_u_w:
1869
4
  case Intrinsic::mips_ffint_u_d:
1870
4
    return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0),
1871
4
                       Op->getOperand(1));
1872
4
  case Intrinsic::mips_ffint_s_w:
1873
4
  case Intrinsic::mips_ffint_s_d:
1874
4
    return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0),
1875
4
                       Op->getOperand(1));
1876
16
  case Intrinsic::mips_fill_b:
1877
16
  case Intrinsic::mips_fill_h:
1878
16
  case Intrinsic::mips_fill_w:
1879
16
  case Intrinsic::mips_fill_d: {
1880
16
    EVT ResTy = Op->getValueType(0);
1881
16
    SmallVector<SDValue, 16> Ops(ResTy.getVectorNumElements(),
1882
16
                                 Op->getOperand(1));
1883
16
1884
16
    // If ResTy is v2i64 then the type legalizer will break this node down into
1885
16
    // an equivalent v4i32.
1886
16
    return DAG.getBuildVector(ResTy, DL, Ops);
1887
16
  }
1888
16
  case Intrinsic::mips_fexp2_w:
1889
4
  case Intrinsic::mips_fexp2_d: {
1890
4
    // TODO: If intrinsics have fast-math-flags, propagate them.
1891
4
    EVT ResTy = Op->getValueType(0);
1892
4
    return DAG.getNode(
1893
4
        ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1),
1894
4
        DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2)));
1895
4
  }
1896
4
  case Intrinsic::mips_flog2_w:
1897
4
  case Intrinsic::mips_flog2_d:
1898
4
    return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1));
1899
4
  case Intrinsic::mips_fmadd_w:
1900
4
  case Intrinsic::mips_fmadd_d:
1901
4
    return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0),
1902
4
                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
1903
10
  case Intrinsic::mips_fmul_w:
1904
10
  case Intrinsic::mips_fmul_d:
1905
10
    // TODO: If intrinsics have fast-math-flags, propagate them.
1906
10
    return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1),
1907
10
                       Op->getOperand(2));
1908
10
  case Intrinsic::mips_fmsub_w:
1909
4
  case Intrinsic::mips_fmsub_d: {
1910
4
    // TODO: If intrinsics have fast-math-flags, propagate them.
1911
4
    return DAG.getNode(MipsISD::FMS, SDLoc(Op), Op->getValueType(0),
1912
4
                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
1913
4
  }
1914
4
  case Intrinsic::mips_frint_w:
1915
4
  case Intrinsic::mips_frint_d:
1916
4
    return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1));
1917
4
  case Intrinsic::mips_fsqrt_w:
1918
4
  case Intrinsic::mips_fsqrt_d:
1919
4
    return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1));
1920
7
  case Intrinsic::mips_fsub_w:
1921
7
  case Intrinsic::mips_fsub_d:
1922
7
    // TODO: If intrinsics have fast-math-flags, propagate them.
1923
7
    return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1),
1924
7
                       Op->getOperand(2));
1925
7
  case Intrinsic::mips_ftrunc_u_w:
1926
4
  case Intrinsic::mips_ftrunc_u_d:
1927
4
    return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0),
1928
4
                       Op->getOperand(1));
1929
4
  case Intrinsic::mips_ftrunc_s_w:
1930
4
  case Intrinsic::mips_ftrunc_s_d:
1931
4
    return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0),
1932
4
                       Op->getOperand(1));
1933
8
  case Intrinsic::mips_ilvev_b:
1934
8
  case Intrinsic::mips_ilvev_h:
1935
8
  case Intrinsic::mips_ilvev_w:
1936
8
  case Intrinsic::mips_ilvev_d:
1937
8
    return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0),
1938
8
                       Op->getOperand(1), Op->getOperand(2));
1939
8
  case Intrinsic::mips_ilvl_b:
1940
8
  case Intrinsic::mips_ilvl_h:
1941
8
  case Intrinsic::mips_ilvl_w:
1942
8
  case Intrinsic::mips_ilvl_d:
1943
8
    return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0),
1944
8
                       Op->getOperand(1), Op->getOperand(2));
1945
8
  case Intrinsic::mips_ilvod_b:
1946
8
  case Intrinsic::mips_ilvod_h:
1947
8
  case Intrinsic::mips_ilvod_w:
1948
8
  case Intrinsic::mips_ilvod_d:
1949
8
    return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0),
1950
8
                       Op->getOperand(1), Op->getOperand(2));
1951
8
  case Intrinsic::mips_ilvr_b:
1952
8
  case Intrinsic::mips_ilvr_h:
1953
8
  case Intrinsic::mips_ilvr_w:
1954
8
  case Intrinsic::mips_ilvr_d:
1955
8
    return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0),
1956
8
                       Op->getOperand(1), Op->getOperand(2));
1957
16
  case Intrinsic::mips_insert_b:
1958
16
  case Intrinsic::mips_insert_h:
1959
16
  case Intrinsic::mips_insert_w:
1960
16
  case Intrinsic::mips_insert_d:
1961
16
    return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0),
1962
16
                       Op->getOperand(1), Op->getOperand(3), Op->getOperand(2));
1963
16
  case Intrinsic::mips_insve_b:
1964
16
  case Intrinsic::mips_insve_h:
1965
16
  case Intrinsic::mips_insve_w:
1966
16
  case Intrinsic::mips_insve_d: {
1967
16
    // Report an error for out of range values.
1968
16
    int64_t Max;
1969
16
    switch (Intrinsic) {
1970
16
    
case Intrinsic::mips_insve_b: Max = 15; break4
;
1971
16
    
case Intrinsic::mips_insve_h: Max = 7; break4
;
1972
16
    
case Intrinsic::mips_insve_w: Max = 3; break4
;
1973
16
    
case Intrinsic::mips_insve_d: Max = 1; break4
;
1974
16
    
default: 0
llvm_unreachable0
("Unmatched intrinsic");
1975
16
    }
1976
16
    int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
1977
16
    if (Value < 0 || Value > Max)
1978
0
      report_fatal_error("Immediate out of range");
1979
16
    return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0),
1980
16
                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3),
1981
16
                       DAG.getConstant(0, DL, MVT::i32));
1982
16
    }
1983
28
  case Intrinsic::mips_ldi_b:
1984
28
  case Intrinsic::mips_ldi_h:
1985
28
  case Intrinsic::mips_ldi_w:
1986
28
  case Intrinsic::mips_ldi_d:
1987
28
    return lowerMSASplatImm(Op, 1, DAG, true);
1988
28
  case Intrinsic::mips_lsa:
1989
8
  case Intrinsic::mips_dlsa: {
1990
8
    EVT ResTy = Op->getValueType(0);
1991
8
    return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
1992
8
                       DAG.getNode(ISD::SHL, SDLoc(Op), ResTy,
1993
8
                                   Op->getOperand(2), Op->getOperand(3)));
1994
8
  }
1995
8
  case Intrinsic::mips_maddv_b:
1996
8
  case Intrinsic::mips_maddv_h:
1997
8
  case Intrinsic::mips_maddv_w:
1998
8
  case Intrinsic::mips_maddv_d: {
1999
8
    EVT ResTy = Op->getValueType(0);
2000
8
    return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
2001
8
                       DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
2002
8
                                   Op->getOperand(2), Op->getOperand(3)));
2003
8
  }
2004
8
  case Intrinsic::mips_max_s_b:
2005
8
  case Intrinsic::mips_max_s_h:
2006
8
  case Intrinsic::mips_max_s_w:
2007
8
  case Intrinsic::mips_max_s_d:
2008
8
    return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0),
2009
8
                       Op->getOperand(1), Op->getOperand(2));
2010
8
  case Intrinsic::mips_max_u_b:
2011
8
  case Intrinsic::mips_max_u_h:
2012
8
  case Intrinsic::mips_max_u_w:
2013
8
  case Intrinsic::mips_max_u_d:
2014
8
    return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0),
2015
8
                       Op->getOperand(1), Op->getOperand(2));
2016
28
  case Intrinsic::mips_maxi_s_b:
2017
28
  case Intrinsic::mips_maxi_s_h:
2018
28
  case Intrinsic::mips_maxi_s_w:
2019
28
  case Intrinsic::mips_maxi_s_d:
2020
28
    return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0),
2021
28
                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true));
2022
28
  case Intrinsic::mips_maxi_u_b:
2023
20
  case Intrinsic::mips_maxi_u_h:
2024
20
  case Intrinsic::mips_maxi_u_w:
2025
20
  case Intrinsic::mips_maxi_u_d:
2026
20
    return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0),
2027
20
                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2028
20
  case Intrinsic::mips_min_s_b:
2029
8
  case Intrinsic::mips_min_s_h:
2030
8
  case Intrinsic::mips_min_s_w:
2031
8
  case Intrinsic::mips_min_s_d:
2032
8
    return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0),
2033
8
                       Op->getOperand(1), Op->getOperand(2));
2034
8
  case Intrinsic::mips_min_u_b:
2035
8
  case Intrinsic::mips_min_u_h:
2036
8
  case Intrinsic::mips_min_u_w:
2037
8
  case Intrinsic::mips_min_u_d:
2038
8
    return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0),
2039
8
                       Op->getOperand(1), Op->getOperand(2));
2040
28
  case Intrinsic::mips_mini_s_b:
2041
28
  case Intrinsic::mips_mini_s_h:
2042
28
  case Intrinsic::mips_mini_s_w:
2043
28
  case Intrinsic::mips_mini_s_d:
2044
28
    return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0),
2045
28
                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true));
2046
28
  case Intrinsic::mips_mini_u_b:
2047
20
  case Intrinsic::mips_mini_u_h:
2048
20
  case Intrinsic::mips_mini_u_w:
2049
20
  case Intrinsic::mips_mini_u_d:
2050
20
    return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0),
2051
20
                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2052
20
  case Intrinsic::mips_mod_s_b:
2053
8
  case Intrinsic::mips_mod_s_h:
2054
8
  case Intrinsic::mips_mod_s_w:
2055
8
  case Intrinsic::mips_mod_s_d:
2056
8
    return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1),
2057
8
                       Op->getOperand(2));
2058
8
  case Intrinsic::mips_mod_u_b:
2059
8
  case Intrinsic::mips_mod_u_h:
2060
8
  case Intrinsic::mips_mod_u_w:
2061
8
  case Intrinsic::mips_mod_u_d:
2062
8
    return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1),
2063
8
                       Op->getOperand(2));
2064
8
  case Intrinsic::mips_mulv_b:
2065
8
  case Intrinsic::mips_mulv_h:
2066
8
  case Intrinsic::mips_mulv_w:
2067
8
  case Intrinsic::mips_mulv_d:
2068
8
    return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1),
2069
8
                       Op->getOperand(2));
2070
8
  case Intrinsic::mips_msubv_b:
2071
8
  case Intrinsic::mips_msubv_h:
2072
8
  case Intrinsic::mips_msubv_w:
2073
8
  case Intrinsic::mips_msubv_d: {
2074
8
    EVT ResTy = Op->getValueType(0);
2075
8
    return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1),
2076
8
                       DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
2077
8
                                   Op->getOperand(2), Op->getOperand(3)));
2078
8
  }
2079
8
  case Intrinsic::mips_nlzc_b:
2080
8
  case Intrinsic::mips_nlzc_h:
2081
8
  case Intrinsic::mips_nlzc_w:
2082
8
  case Intrinsic::mips_nlzc_d:
2083
8
    return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1));
2084
8
  case Intrinsic::mips_nor_v: {
2085
8
    SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2086
8
                              Op->getOperand(1), Op->getOperand(2));
2087
8
    return DAG.getNOT(DL, Res, Res->getValueType(0));
2088
8
  }
2089
8
  case Intrinsic::mips_nori_b: {
2090
5
    SDValue Res =  DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2091
5
                               Op->getOperand(1),
2092
5
                               lowerMSASplatImm(Op, 2, DAG));
2093
5
    return DAG.getNOT(DL, Res, Res->getValueType(0));
2094
8
  }
2095
8
  case Intrinsic::mips_or_v:
2096
8
    return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1),
2097
8
                       Op->getOperand(2));
2098
8
  case Intrinsic::mips_ori_b:
2099
5
    return DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2100
5
                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2101
8
  case Intrinsic::mips_pckev_b:
2102
8
  case Intrinsic::mips_pckev_h:
2103
8
  case Intrinsic::mips_pckev_w:
2104
8
  case Intrinsic::mips_pckev_d:
2105
8
    return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0),
2106
8
                       Op->getOperand(1), Op->getOperand(2));
2107
8
  case Intrinsic::mips_pckod_b:
2108
8
  case Intrinsic::mips_pckod_h:
2109
8
  case Intrinsic::mips_pckod_w:
2110
8
  case Intrinsic::mips_pckod_d:
2111
8
    return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0),
2112
8
                       Op->getOperand(1), Op->getOperand(2));
2113
8
  case Intrinsic::mips_pcnt_b:
2114
8
  case Intrinsic::mips_pcnt_h:
2115
8
  case Intrinsic::mips_pcnt_w:
2116
8
  case Intrinsic::mips_pcnt_d:
2117
8
    return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1));
2118
32
  case Intrinsic::mips_sat_s_b:
2119
32
  case Intrinsic::mips_sat_s_h:
2120
32
  case Intrinsic::mips_sat_s_w:
2121
32
  case Intrinsic::mips_sat_s_d:
2122
32
  case Intrinsic::mips_sat_u_b:
2123
32
  case Intrinsic::mips_sat_u_h:
2124
32
  case Intrinsic::mips_sat_u_w:
2125
32
  case Intrinsic::mips_sat_u_d: {
2126
32
    // Report an error for out of range values.
2127
32
    int64_t Max;
2128
32
    switch (Intrinsic) {
2129
32
    case Intrinsic::mips_sat_s_b:
2130
8
    case Intrinsic::mips_sat_u_b: Max = 7;  break;
2131
8
    case Intrinsic::mips_sat_s_h:
2132
8
    case Intrinsic::mips_sat_u_h: Max = 15; break;
2133
8
    case Intrinsic::mips_sat_s_w:
2134
8
    case Intrinsic::mips_sat_u_w: Max = 31; break;
2135
8
    case Intrinsic::mips_sat_s_d:
2136
8
    case Intrinsic::mips_sat_u_d: Max = 63; break;
2137
8
    
default: 0
llvm_unreachable0
("Unmatched intrinsic");
2138
32
    }
2139
32
    int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2140
32
    if (Value < 0 || Value > Max)
2141
0
      report_fatal_error("Immediate out of range");
2142
32
    return SDValue();
2143
32
  }
2144
32
  case Intrinsic::mips_shf_b:
2145
6
  case Intrinsic::mips_shf_h:
2146
6
  case Intrinsic::mips_shf_w: {
2147
6
    int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2148
6
    if (Value < 0 || Value > 255)
2149
0
      report_fatal_error("Immediate out of range");
2150
6
    return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0),
2151
6
                       Op->getOperand(2), Op->getOperand(1));
2152
6
  }
2153
40
  case Intrinsic::mips_sldi_b:
2154
40
  case Intrinsic::mips_sldi_h:
2155
40
  case Intrinsic::mips_sldi_w:
2156
40
  case Intrinsic::mips_sldi_d: {
2157
40
    // Report an error for out of range values.
2158
40
    int64_t Max;
2159
40
    switch (Intrinsic) {
2160
40
    
case Intrinsic::mips_sldi_b: Max = 15; break10
;
2161
40
    
case Intrinsic::mips_sldi_h: Max = 7; break10
;
2162
40
    
case Intrinsic::mips_sldi_w: Max = 3; break10
;
2163
40
    
case Intrinsic::mips_sldi_d: Max = 1; break10
;
2164
40
    
default: 0
llvm_unreachable0
("Unmatched intrinsic");
2165
40
    }
2166
40
    int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue();
2167
40
    if (Value < 0 || Value > Max)
2168
0
      report_fatal_error("Immediate out of range");
2169
40
    return SDValue();
2170
40
  }
2171
40
  case Intrinsic::mips_sll_b:
2172
20
  case Intrinsic::mips_sll_h:
2173
20
  case Intrinsic::mips_sll_w:
2174
20
  case Intrinsic::mips_sll_d:
2175
20
    return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1),
2176
20
                       truncateVecElts(Op, DAG));
2177
36
  case Intrinsic::mips_slli_b:
2178
36
  case Intrinsic::mips_slli_h:
2179
36
  case Intrinsic::mips_slli_w:
2180
36
  case Intrinsic::mips_slli_d:
2181
36
    return DAG.getNode(ISD::SHL, DL, Op->getValueType(0),
2182
36
                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2183
36
  case Intrinsic::mips_splat_b:
2184
12
  case Intrinsic::mips_splat_h:
2185
12
  case Intrinsic::mips_splat_w:
2186
12
  case Intrinsic::mips_splat_d:
2187
12
    // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
2188
12
    // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
2189
12
    // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
2190
12
    // Instead we lower to MipsISD::VSHF and match from there.
2191
12
    return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2192
12
                       lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1),
2193
12
                       Op->getOperand(1));
2194
17
  case Intrinsic::mips_splati_b:
2195
17
  case Intrinsic::mips_splati_h:
2196
17
  case Intrinsic::mips_splati_w:
2197
17
  case Intrinsic::mips_splati_d:
2198
17
    return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2199
17
                       lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1),
2200
17
                       Op->getOperand(1));
2201
20
  case Intrinsic::mips_sra_b:
2202
20
  case Intrinsic::mips_sra_h:
2203
20
  case Intrinsic::mips_sra_w:
2204
20
  case Intrinsic::mips_sra_d:
2205
20
    return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1),
2206
20
                       truncateVecElts(Op, DAG));
2207
20
  case Intrinsic::mips_srai_b:
2208
20
  case Intrinsic::mips_srai_h:
2209
20
  case Intrinsic::mips_srai_w:
2210
20
  case Intrinsic::mips_srai_d:
2211
20
    return DAG.getNode(ISD::SRA, DL, Op->getValueType(0),
2212
20
                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2213
40
  case Intrinsic::mips_srari_b:
2214
40
  case Intrinsic::mips_srari_h:
2215
40
  case Intrinsic::mips_srari_w:
2216
40
  case Intrinsic::mips_srari_d: {
2217
40
    // Report an error for out of range values.
2218
40
    int64_t Max;
2219
40
    switch (Intrinsic) {
2220
40
    
case Intrinsic::mips_srari_b: Max = 7; break10
;
2221
40
    
case Intrinsic::mips_srari_h: Max = 15; break10
;
2222
40
    
case Intrinsic::mips_srari_w: Max = 31; break10
;
2223
40
    
case Intrinsic::mips_srari_d: Max = 63; break10
;
2224
40
    
default: 0
llvm_unreachable0
("Unmatched intrinsic");
2225
40
    }
2226
40
    int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2227
40
    if (Value < 0 || Value > Max)
2228
0
      report_fatal_error("Immediate out of range");
2229
40
    return SDValue();
2230
40
  }
2231
40
  case Intrinsic::mips_srl_b:
2232
20
  case Intrinsic::mips_srl_h:
2233
20
  case Intrinsic::mips_srl_w:
2234
20
  case Intrinsic::mips_srl_d:
2235
20
    return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1),
2236
20
                       truncateVecElts(Op, DAG));
2237
36
  case Intrinsic::mips_srli_b:
2238
36
  case Intrinsic::mips_srli_h:
2239
36
  case Intrinsic::mips_srli_w:
2240
36
  case Intrinsic::mips_srli_d:
2241
36
    return DAG.getNode(ISD::SRL, DL, Op->getValueType(0),
2242
36
                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2243
40
  case Intrinsic::mips_srlri_b:
2244
40
  case Intrinsic::mips_srlri_h:
2245
40
  case Intrinsic::mips_srlri_w:
2246
40
  case Intrinsic::mips_srlri_d: {
2247
40
    // Report an error for out of range values.
2248
40
    int64_t Max;
2249
40
    switch (Intrinsic) {
2250
40
    
case Intrinsic::mips_srlri_b: Max = 7; break10
;
2251
40
    
case Intrinsic::mips_srlri_h: Max = 15; break10
;
2252
40
    
case Intrinsic::mips_srlri_w: Max = 31; break10
;
2253
40
    
case Intrinsic::mips_srlri_d: Max = 63; break10
;
2254
40
    
default: 0
llvm_unreachable0
("Unmatched intrinsic");
2255
40
    }
2256
40
    int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2257
40
    if (Value < 0 || Value > Max)
2258
0
      report_fatal_error("Immediate out of range");
2259
40
    return SDValue();
2260
40
  }
2261
40
  case Intrinsic::mips_subv_b:
2262
8
  case Intrinsic::mips_subv_h:
2263
8
  case Intrinsic::mips_subv_w:
2264
8
  case Intrinsic::mips_subv_d:
2265
8
    return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1),
2266
8
                       Op->getOperand(2));
2267
8
  case Intrinsic::mips_subvi_b:
2268
8
  case Intrinsic::mips_subvi_h:
2269
8
  case Intrinsic::mips_subvi_w:
2270
8
  case Intrinsic::mips_subvi_d:
2271
8
    return DAG.getNode(ISD::SUB, DL, Op->getValueType(0),
2272
8
                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2273
8
  case Intrinsic::mips_vshf_b:
2274
8
  case Intrinsic::mips_vshf_h:
2275
8
  case Intrinsic::mips_vshf_w:
2276
8
  case Intrinsic::mips_vshf_d:
2277
8
    return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2278
8
                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
2279
8
  case Intrinsic::mips_xor_v:
2280
8
    return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1),
2281
8
                       Op->getOperand(2));
2282
8
  case Intrinsic::mips_xori_b:
2283
2
    return DAG.getNode(ISD::XOR, DL, Op->getValueType(0),
2284
2
                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2285
8
  case Intrinsic::thread_pointer: {
2286
4
    EVT PtrVT = getPointerTy(DAG.getDataLayout());
2287
4
    return DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT);
2288
8
  }
2289
2.87k
  }
2290
2.87k
}
2291
2292
static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
2293
51
                                const MipsSubtarget &Subtarget) {
2294
51
  SDLoc DL(Op);
2295
51
  SDValue ChainIn = Op->getOperand(0);
2296
51
  SDValue Address = Op->getOperand(2);
2297
51
  SDValue Offset  = Op->getOperand(3);
2298
51
  EVT ResTy = Op->getValueType(0);
2299
51
  EVT PtrTy = Address->getValueType(0);
2300
51
2301
51
  // For N64 addresses have the underlying type MVT::i64. This intrinsic
2302
51
  // however takes an i32 signed constant offset. The actual type of the
2303
51
  // intrinsic is a scaled signed i10.
2304
51
  if (Subtarget.isABI_N64())
2305
1
    Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
2306
51
2307
51
  Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
2308
51
  return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(),
2309
51
                     /* Alignment = */ 16);
2310
51
}
2311
2312
SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2313
215
                                                     SelectionDAG &DAG) const {
2314
215
  unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
2315
215
  switch (Intr) {
2316
215
  default:
2317
138
    return SDValue();
2318
215
  case Intrinsic::mips_extp:
2319
2
    return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
2320
215
  case Intrinsic::mips_extpdp:
2321
2
    return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
2322
215
  case Intrinsic::mips_extr_w:
2323
2
    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
2324
215
  case Intrinsic::mips_extr_r_w:
2325
2
    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
2326
215
  case Intrinsic::mips_extr_rs_w:
2327
2
    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
2328
215
  case Intrinsic::mips_extr_s_h:
2329
2
    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
2330
215
  case Intrinsic::mips_mthlip:
2331
1
    return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
2332
215
  case Intrinsic::mips_mulsaq_s_w_ph:
2333
1
    return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH);
2334
215
  case Intrinsic::mips_maq_s_w_phl:
2335
1
    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
2336
215
  case Intrinsic::mips_maq_s_w_phr:
2337
1
    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
2338
215
  case Intrinsic::mips_maq_sa_w_phl:
2339
1
    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL);
2340
215
  case Intrinsic::mips_maq_sa_w_phr:
2341
1
    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR);
2342
215
  case Intrinsic::mips_dpaq_s_w_ph:
2343
1
    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
2344
215
  case Intrinsic::mips_dpsq_s_w_ph:
2345
1
    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
2346
215
  case Intrinsic::mips_dpaq_sa_l_w:
2347
1
    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
2348
215
  case Intrinsic::mips_dpsq_sa_l_w:
2349
1
    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
2350
215
  case Intrinsic::mips_dpaqx_s_w_ph:
2351
1
    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH);
2352
215
  case Intrinsic::mips_dpaqx_sa_w_ph:
2353
1
    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH);
2354
215
  case Intrinsic::mips_dpsqx_s_w_ph:
2355
1
    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
2356
215
  case Intrinsic::mips_dpsqx_sa_w_ph:
2357
1
    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
2358
215
  case Intrinsic::mips_ld_b:
2359
51
  case Intrinsic::mips_ld_h:
2360
51
  case Intrinsic::mips_ld_w:
2361
51
  case Intrinsic::mips_ld_d:
2362
51
   return lowerMSALoadIntr(Op, DAG, Intr, Subtarget);
2363
215
  }
2364
215
}
2365
2366
static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
2367
48
                                 const MipsSubtarget &Subtarget) {
2368
48
  SDLoc DL(Op);
2369
48
  SDValue ChainIn = Op->getOperand(0);
2370
48
  SDValue Value   = Op->getOperand(2);
2371
48
  SDValue Address = Op->getOperand(3);
2372
48
  SDValue Offset  = Op->getOperand(4);
2373
48
  EVT PtrTy = Address->getValueType(0);
2374
48
2375
48
  // For N64 addresses have the underlying type MVT::i64. This intrinsic
2376
48
  // however takes an i32 signed constant offset. The actual type of the
2377
48
  // intrinsic is a scaled signed i10.
2378
48
  if (Subtarget.isABI_N64())
2379
0
    Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
2380
48
2381
48
  Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
2382
48
2383
48
  return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(),
2384
48
                      /* Alignment = */ 16);
2385
48
}
2386
2387
SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2388
102
                                                  SelectionDAG &DAG) const {
2389
102
  unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
2390
102
  switch (Intr) {
2391
102
  default:
2392
54
    return SDValue();
2393
102
  case Intrinsic::mips_st_b:
2394
48
  case Intrinsic::mips_st_h:
2395
48
  case Intrinsic::mips_st_w:
2396
48
  case Intrinsic::mips_st_d:
2397
48
    return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget);
2398
102
  }
2399
102
}
2400
2401
// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
2402
//
2403
// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
2404
// choose to sign-extend but we could have equally chosen zero-extend. The
2405
// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
2406
// result into this node later (possibly changing it to a zero-extend in the
2407
// process).
2408
SDValue MipsSETargetLowering::
2409
554
lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
2410
554
  SDLoc DL(Op);
2411
554
  EVT ResTy = Op->getValueType(0);
2412
554
  SDValue Op0 = Op->getOperand(0);
2413
554
  EVT VecTy = Op0->getValueType(0);
2414
554
2415
554
  if (!VecTy.is128BitVector())
2416
0
    return SDValue();
2417
554
2418
554
  if (ResTy.isInteger()) {
2419
554
    SDValue Op1 = Op->getOperand(1);
2420
554
    EVT EltTy = VecTy.getVectorElementType();
2421
554
    return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1,
2422
554
                       DAG.getValueType(EltTy));
2423
554
  }
2424
0
2425
0
  return Op;
2426
0
}
2427
2428
1.00k
static bool isConstantOrUndef(const SDValue Op) {
2429
1.00k
  if (Op->isUndef())
2430
88
    return true;
2431
920
  if (isa<ConstantSDNode>(Op))
2432
163
    return true;
2433
757
  if (isa<ConstantFPSDNode>(Op))
2434
24
    return true;
2435
733
  return false;
2436
733
}
2437
2438
439
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
2439
1.17k
  for (unsigned i = 0; i < Op->getNumOperands(); 
++i733
)
2440
1.00k
    if (isConstantOrUndef(Op->getOperand(i)))
2441
275
      return true;
2442
439
  
return false164
;
2443
439
}
2444
2445
// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
2446
// backend.
2447
//
2448
// Lowers according to the following rules:
2449
// - Constant splats are legal as-is as long as the SplatBitSize is a power of
2450
//   2 less than or equal to 64 and the value fits into a signed 10-bit
2451
//   immediate
2452
// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
2453
//   is a power of 2 less than or equal to 64 and the value does not fit into a
2454
//   signed 10-bit immediate
2455
// - Non-constant splats are legal as-is.
2456
// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
2457
// - All others are illegal and must be expanded.
2458
SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
2459
3.38k
                                                SelectionDAG &DAG) const {
2460
3.38k
  BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2461
3.38k
  EVT ResTy = Op->getValueType(0);
2462
3.38k
  SDLoc DL(Op);
2463
3.38k
  APInt SplatValue, SplatUndef;
2464
3.38k
  unsigned SplatBitSize;
2465
3.38k
  bool HasAnyUndefs;
2466
3.38k
2467
3.38k
  if (!Subtarget.hasMSA() || !ResTy.is128BitVector())
2468
0
    return SDValue();
2469
3.38k
2470
3.38k
  if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
2471
3.38k
                            HasAnyUndefs, 8,
2472
3.38k
                            !Subtarget.isLittle()) && 
SplatBitSize <= 643.03k
) {
2473
2.86k
    // We can only cope with 8, 16, 32, or 64-bit elements
2474
2.86k
    if (SplatBitSize != 8 && 
SplatBitSize != 161.69k
&&
SplatBitSize != 321.19k
&&
2475
2.86k
        
SplatBitSize != 64599
)
2476
0
      return SDValue();
2477
2.86k
2478
2.86k
    // If the value isn't an integer type we will have to bitcast
2479
2.86k
    // from an integer type first. Also, if there are any undefs, we must
2480
2.86k
    // lower them to defined values first.
2481
2.86k
    if (ResTy.isInteger() && 
!HasAnyUndefs2.79k
)
2482
2.79k
      return Op;
2483
72
2484
72
    EVT ViaVecTy;
2485
72
2486
72
    switch (SplatBitSize) {
2487
72
    default:
2488
0
      return SDValue();
2489
72
    case 8:
2490
26
      ViaVecTy = MVT::v16i8;
2491
26
      break;
2492
72
    case 16:
2493
0
      ViaVecTy = MVT::v8i16;
2494
0
      break;
2495
72
    case 32:
2496
12
      ViaVecTy = MVT::v4i32;
2497
12
      break;
2498
72
    case 64:
2499
34
      // There's no fill.d to fall back on for 64-bit values
2500
34
      return SDValue();
2501
38
    }
2502
38
2503
38
    // SelectionDAG::getConstant will promote SplatValue appropriately.
2504
38
    SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2505
38
2506
38
    // Bitcast to the type we originally wanted
2507
38
    if (ViaVecTy != ResTy)
2508
38
      Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2509
38
2510
38
    return Result;
2511
527
  } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false))
2512
88
    return Op;
2513
439
  else if (!isConstantOrUndefBUILD_VECTOR(Node)) {
2514
164
    // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2515
164
    // The resulting code is the same length as the expansion, but it doesn't
2516
164
    // use memory operations
2517
164
    EVT ResTy = Node->getValueType(0);
2518
164
2519
164
    assert(ResTy.isVector());
2520
164
2521
164
    unsigned NumElts = ResTy.getVectorNumElements();
2522
164
    SDValue Vector = DAG.getUNDEF(ResTy);
2523
800
    for (unsigned i = 0; i < NumElts; 
++i636
) {
2524
636
      Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
2525
636
                           Node->getOperand(i),
2526
636
                           DAG.getConstant(i, DL, MVT::i32));
2527
636
    }
2528
164
    return Vector;
2529
164
  }
2530
275
2531
275
  return SDValue();
2532
275
}
2533
2534
// Lower VECTOR_SHUFFLE into SHF (if possible).
2535
//
2536
// SHF splits the vector into blocks of four elements, then shuffles these
2537
// elements according to a <4 x i2> constant (encoded as an integer immediate).
2538
//
2539
// It is therefore possible to lower into SHF when the mask takes the form:
2540
//   <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2541
// When undef's appear they are treated as if they were whatever value is
2542
// necessary in order to fit the above forms.
2543
//
2544
// For example:
2545
//   %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2546
//                      <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2547
//                                 i32 7, i32 6, i32 5, i32 4>
2548
// is lowered to:
2549
//   (SHF_H $w0, $w1, 27)
2550
// where the 27 comes from:
2551
//   3 + (2 << 2) + (1 << 4) + (0 << 6)
2552
static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy,
2553
                                       SmallVector<int, 16> Indices,
2554
30
                                       SelectionDAG &DAG) {
2555
30
  int SHFIndices[4] = { -1, -1, -1, -1 };
2556
30
2557
30
  if (Indices.size() < 4)
2558
6
    return SDValue();
2559
24
2560
78
  
for (unsigned i = 0; 24
i < 4;
++i54
) {
2561
188
    for (unsigned j = i; j < Indices.size(); 
j += 4120
) {
2562
134
      int Idx = Indices[j];
2563
134
2564
134
      // Convert from vector index to 4-element subvector index
2565
134
      // If an index refers to an element outside of the subvector then give up
2566
134
      if (Idx != -1) {
2567
134
        Idx -= 4 * (j / 4);
2568
134
        if (Idx < 0 || 
Idx >= 4128
)
2569
14
          return SDValue();
2570
120
      }
2571
120
2572
120
      // If the mask has an undef, replace it with the current index.
2573
120
      // Note that it might still be undef if the current index is also undef
2574
120
      if (SHFIndices[i] == -1)
2575
60
        SHFIndices[i] = Idx;
2576
120
2577
120
      // Check that non-undef values are the same as in the mask. If they
2578
120
      // aren't then give up
2579
120
      if (!(Idx == -1 || Idx == SHFIndices[i]))
2580
0
        return SDValue();
2581
120
    }
2582
68
  }
2583
24
2584
24
  // Calculate the immediate. Replace any remaining undefs with zero
2585
24
  APInt Imm(32, 0);
2586
50
  for (int i = 3; i >= 0; 
--i40
) {
2587
40
    int Idx = SHFIndices[i];
2588
40
2589
40
    if (Idx == -1)
2590
0
      Idx = 0;
2591
40
2592
40
    Imm <<= 2;
2593
40
    Imm |= Idx & 0x3;
2594
40
  }
2595
10
2596
10
  SDLoc DL(Op);
2597
10
  return DAG.getNode(MipsISD::SHF, DL, ResTy,
2598
10
                     DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0));
2599
24
}
2600
2601
/// Determine whether a range fits a regular pattern of values.
2602
/// This function accounts for the possibility of jumping over the End iterator.
2603
template <typename ValType>
2604
static bool
2605
fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
2606
                   unsigned CheckStride,
2607
                   typename SmallVectorImpl<ValType>::const_iterator End,
2608
1.49k
                   ValType ExpectedIndex, unsigned ExpectedIndexStride) {
2609
1.49k
  auto &I = Begin;
2610
1.49k
2611
3.15k
  while (I != End) {
2612
2.82k
    if (*I != -1 && 
*I != ExpectedIndex2.80k
)
2613
1.16k
      return false;
2614
1.66k
    ExpectedIndex += ExpectedIndexStride;
2615
1.66k
2616
1.66k
    // Incrementing past End is undefined behaviour so we must increment one
2617
1.66k
    // step at a time and check for End at each step.
2618
4.08k
    for (unsigned n = 0; n < CheckStride && 
I != End2.50k
;
++n, ++I2.42k
)
2619
2.42k
      ; // Empty loop body.
2620
1.66k
  }
2621
1.49k
  
return true328
;
2622
1.49k
}
2623
2624
// Determine whether VECTOR_SHUFFLE is a SPLATI.
2625
//
2626
// It is a SPLATI when the mask is:
2627
//   <x, x, x, ...>
2628
// where x is any valid index.
2629
//
2630
// When undef's appear in the mask they are treated as if they were whatever
2631
// value is necessary in order to fit the above form.
2632
static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy,
2633
                                    SmallVector<int, 16> Indices,
2634
206
                                    SelectionDAG &DAG) {
2635
206
  assert((Indices.size() % 2) == 0);
2636
206
2637
206
  int SplatIndex = -1;
2638
208
  for (const auto &V : Indices) {
2639
208
    if (V != -1) {
2640
206
      SplatIndex = V;
2641
206
      break;
2642
206
    }
2643
208
  }
2644
206
2645
206
  return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex,
2646
206
                                 0);
2647
206
}
2648
2649
// Lower VECTOR_SHUFFLE into ILVEV (if possible).
2650
//
2651
// ILVEV interleaves the even elements from each vector.
2652
//
2653
// It is possible to lower into ILVEV when the mask consists of two of the
2654
// following forms interleaved:
2655
//   <0, 2, 4, ...>
2656
//   <n, n+2, n+4, ...>
2657
// where n is the number of elements in the vector.
2658
// For example:
2659
//   <0, 0, 2, 2, 4, 4, ...>
2660
//   <0, n, 2, n+2, 4, n+4, ...>
2661
//
2662
// When undef's appear in the mask they are treated as if they were whatever
2663
// value is necessary in order to fit the above forms.
2664
static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy,
2665
                                         SmallVector<int, 16> Indices,
2666
158
                                         SelectionDAG &DAG) {
2667
158
  assert((Indices.size() % 2) == 0);
2668
158
2669
158
  SDValue Wt;
2670
158
  SDValue Ws;
2671
158
  const auto &Begin = Indices.begin();
2672
158
  const auto &End = Indices.end();
2673
158
2674
158
  // Check even elements are taken from the even elements of one half or the
2675
158
  // other and pick an operand accordingly.
2676
158
  if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
2677
32
    Wt = Op->getOperand(0);
2678
126
  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2))
2679
0
    Wt = Op->getOperand(1);
2680
126
  else
2681
126
    return SDValue();
2682
32
2683
32
  // Check odd elements are taken from the even elements of one half or the
2684
32
  // other and pick an operand accordingly.
2685
32
  if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
2686
12
    Ws = Op->getOperand(0);
2687
20
  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2))
2688
14
    Ws = Op->getOperand(1);
2689
6
  else
2690
6
    return SDValue();
2691
26
2692
26
  return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt);
2693
26
}
2694
2695
// Lower VECTOR_SHUFFLE into ILVOD (if possible).
2696
//
2697
// ILVOD interleaves the odd elements from each vector.
2698
//
2699
// It is possible to lower into ILVOD when the mask consists of two of the
2700
// following forms interleaved:
2701
//   <1, 3, 5, ...>
2702
//   <n+1, n+3, n+5, ...>
2703
// where n is the number of elements in the vector.
2704
// For example:
2705
//   <1, 1, 3, 3, 5, 5, ...>
2706
//   <1, n+1, 3, n+3, 5, n+5, ...>
2707
//
2708
// When undef's appear in the mask they are treated as if they were whatever
2709
// value is necessary in order to fit the above forms.
2710
static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy,
2711
                                         SmallVector<int, 16> Indices,
2712
132
                                         SelectionDAG &DAG) {
2713
132
  assert((Indices.size() % 2) == 0);
2714
132
2715
132
  SDValue Wt;
2716
132
  SDValue Ws;
2717
132
  const auto &Begin = Indices.begin();
2718
132
  const auto &End = Indices.end();
2719
132
2720
132
  // Check even elements are taken from the odd elements of one half or the
2721
132
  // other and pick an operand accordingly.
2722
132
  if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
2723
32
    Wt = Op->getOperand(0);
2724
100
  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2))
2725
0
    Wt = Op->getOperand(1);
2726
100
  else
2727
100
    return SDValue();
2728
32
2729
32
  // Check odd elements are taken from the odd elements of one half or the
2730
32
  // other and pick an operand accordingly.
2731
32
  if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
2732
12
    Ws = Op->getOperand(0);
2733
20
  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2))
2734
14
    Ws = Op->getOperand(1);
2735
6
  else
2736
6
    return SDValue();
2737
26
2738
26
  return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Wt, Ws);
2739
26
}
2740
2741
// Lower VECTOR_SHUFFLE into ILVR (if possible).
2742
//
2743
// ILVR interleaves consecutive elements from the right (lowest-indexed) half of
2744
// each vector.
2745
//
2746
// It is possible to lower into ILVR when the mask consists of two of the
2747
// following forms interleaved:
2748
//   <0, 1, 2, ...>
2749
//   <n, n+1, n+2, ...>
2750
// where n is the number of elements in the vector.
2751
// For example:
2752
//   <0, 0, 1, 1, 2, 2, ...>
2753
//   <0, n, 1, n+1, 2, n+2, ...>
2754
//
2755
// When undef's appear in the mask they are treated as if they were whatever
2756
// value is necessary in order to fit the above forms.
2757
static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy,
2758
                                        SmallVector<int, 16> Indices,
2759
88
                                        SelectionDAG &DAG) {
2760
88
  assert((Indices.size() % 2) == 0);
2761
88
2762
88
  SDValue Wt;
2763
88
  SDValue Ws;
2764
88
  const auto &Begin = Indices.begin();
2765
88
  const auto &End = Indices.end();
2766
88
2767
88
  // Check even elements are taken from the right (lowest-indexed) elements of
2768
88
  // one half or the other and pick an operand accordingly.
2769
88
  if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2770
18
    Wt = Op->getOperand(0);
2771
70
  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1))
2772
0
    Wt = Op->getOperand(1);
2773
70
  else
2774
70
    return SDValue();
2775
18
2776
18
  // Check odd elements are taken from the right (lowest-indexed) elements of
2777
18
  // one half or the other and pick an operand accordingly.
2778
18
  if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2779
12
    Ws = Op->getOperand(0);
2780
6
  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1))
2781
6
    Ws = Op->getOperand(1);
2782
0
  else
2783
0
    return SDValue();
2784
18
2785
18
  return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt);
2786
18
}
2787
2788
// Lower VECTOR_SHUFFLE into ILVL (if possible).
2789
//
2790
// ILVL interleaves consecutive elements from the left (highest-indexed) half
2791
// of each vector.
2792
//
2793
// It is possible to lower into ILVL when the mask consists of two of the
2794
// following forms interleaved:
2795
//   <x, x+1, x+2, ...>
2796
//   <n+x, n+x+1, n+x+2, ...>
2797
// where n is the number of elements in the vector and x is half n.
2798
// For example:
2799
//   <x, x, x+1, x+1, x+2, x+2, ...>
2800
//   <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2801
//
2802
// When undef's appear in the mask they are treated as if they were whatever
2803
// value is necessary in order to fit the above forms.
2804
static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy,
2805
                                        SmallVector<int, 16> Indices,
2806
106
                                        SelectionDAG &DAG) {
2807
106
  assert((Indices.size() % 2) == 0);
2808
106
2809
106
  unsigned HalfSize = Indices.size() / 2;
2810
106
  SDValue Wt;
2811
106
  SDValue Ws;
2812
106
  const auto &Begin = Indices.begin();
2813
106
  const auto &End = Indices.end();
2814
106
2815
106
  // Check even elements are taken from the left (highest-indexed) elements of
2816
106
  // one half or the other and pick an operand accordingly.
2817
106
  if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2818
24
    Wt = Op->getOperand(0);
2819
82
  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1))
2820
0
    Wt = Op->getOperand(1);
2821
82
  else
2822
82
    return SDValue();
2823
24
2824
24
  // Check odd elements are taken from the left (highest-indexed) elements of
2825
24
  // one half or the other and pick an operand accordingly.
2826
24
  if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2827
12
    Ws = Op->getOperand(0);
2828
12
  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize,
2829
12
                                   1))
2830
6
    Ws = Op->getOperand(1);
2831
6
  else
2832
6
    return SDValue();
2833
18
2834
18
  return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt);
2835
18
}
2836
2837
// Lower VECTOR_SHUFFLE into PCKEV (if possible).
2838
//
2839
// PCKEV copies the even elements of each vector into the result vector.
2840
//
2841
// It is possible to lower into PCKEV when the mask consists of two of the
2842
// following forms concatenated:
2843
//   <0, 2, 4, ...>
2844
//   <n, n+2, n+4, ...>
2845
// where n is the number of elements in the vector.
2846
// For example:
2847
//   <0, 2, 4, ..., 0, 2, 4, ...>
2848
//   <0, 2, 4, ..., n, n+2, n+4, ...>
2849
//
2850
// When undef's appear in the mask they are treated as if they were whatever
2851
// value is necessary in order to fit the above forms.
2852
static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy,
2853
                                         SmallVector<int, 16> Indices,
2854
70
                                         SelectionDAG &DAG) {
2855
70
  assert((Indices.size() % 2) == 0);
2856
70
2857
70
  SDValue Wt;
2858
70
  SDValue Ws;
2859
70
  const auto &Begin = Indices.begin();
2860
70
  const auto &Mid = Indices.begin() + Indices.size() / 2;
2861
70
  const auto &End = Indices.end();
2862
70
2863
70
  if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2864
20
    Wt = Op->getOperand(0);
2865
50
  else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2))
2866
0
    Wt = Op->getOperand(1);
2867
50
  else
2868
50
    return SDValue();
2869
20
2870
20
  if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2871
12
    Ws = Op->getOperand(0);
2872
8
  else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2))
2873
8
    Ws = Op->getOperand(1);
2874
0
  else
2875
0
    return SDValue();
2876
20
2877
20
  return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt);
2878
20
}
2879
2880
// Lower VECTOR_SHUFFLE into PCKOD (if possible).
2881
//
2882
// PCKOD copies the odd elements of each vector into the result vector.
2883
//
2884
// It is possible to lower into PCKOD when the mask consists of two of the
2885
// following forms concatenated:
2886
//   <1, 3, 5, ...>
2887
//   <n+1, n+3, n+5, ...>
2888
// where n is the number of elements in the vector.
2889
// For example:
2890
//   <1, 3, 5, ..., 1, 3, 5, ...>
2891
//   <1, 3, 5, ..., n+1, n+3, n+5, ...>
2892
//
2893
// When undef's appear in the mask they are treated as if they were whatever
2894
// value is necessary in order to fit the above forms.
2895
static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy,
2896
                                         SmallVector<int, 16> Indices,
2897
50
                                         SelectionDAG &DAG) {
2898
50
  assert((Indices.size() % 2) == 0);
2899
50
2900
50
  SDValue Wt;
2901
50
  SDValue Ws;
2902
50
  const auto &Begin = Indices.begin();
2903
50
  const auto &Mid = Indices.begin() + Indices.size() / 2;
2904
50
  const auto &End = Indices.end();
2905
50
2906
50
  if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2907
24
    Wt = Op->getOperand(0);
2908
26
  else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2))
2909
2
    Wt = Op->getOperand(1);
2910
24
  else
2911
24
    return SDValue();
2912
26
2913
26
  if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2914
14
    Ws = Op->getOperand(0);
2915
12
  else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2))
2916
6
    Ws = Op->getOperand(1);
2917
6
  else
2918
6
    return SDValue();
2919
20
2920
20
  return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt);
2921
20
}
2922
2923
// Lower VECTOR_SHUFFLE into VSHF.
2924
//
2925
// This mostly consists of converting the shuffle indices in Indices into a
2926
// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
2927
// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
2928
// if the type is v8i16 and all the indices are less than 8 then the second
2929
// operand is unused and can be replaced with anything. We choose to replace it
2930
// with the used operand since this reduces the number of instructions overall.
2931
static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
2932
                                        SmallVector<int, 16> Indices,
2933
68
                                        SelectionDAG &DAG) {
2934
68
  SmallVector<SDValue, 16> Ops;
2935
68
  SDValue Op0;
2936
68
  SDValue Op1;
2937
68
  EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger();
2938
68
  EVT MaskEltTy = MaskVecTy.getVectorElementType();
2939
68
  bool Using1stVec = false;
2940
68
  bool Using2ndVec = false;
2941
68
  SDLoc DL(Op);
2942
68
  int ResTyNumElts = ResTy.getVectorNumElements();
2943
68
2944
460
  for (int i = 0; i < ResTyNumElts; 
++i392
) {
2945
392
    // Idx == -1 means UNDEF
2946
392
    int Idx = Indices[i];
2947
392
2948
392
    if (0 <= Idx && Idx < ResTyNumElts)
2949
362
      Using1stVec = true;
2950
392
    if (ResTyNumElts <= Idx && 
Idx < ResTyNumElts * 230
)
2951
30
      Using2ndVec = true;
2952
392
  }
2953
68
2954
460
  for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end();
2955
392
       ++I)
2956
392
    Ops.push_back(DAG.getTargetConstant(*I, DL, MaskEltTy));
2957
68
2958
68
  SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2959
68
2960
68
  if (Using1stVec && Using2ndVec) {
2961
8
    Op0 = Op->getOperand(0);
2962
8
    Op1 = Op->getOperand(1);
2963
60
  } else if (Using1stVec)
2964
60
    Op0 = Op1 = Op->getOperand(0);
2965
0
  else if (Using2ndVec)
2966
0
    Op0 = Op1 = Op->getOperand(1);
2967
0
  else
2968
0
    llvm_unreachable("shuffle vector mask references neither vector operand?");
2969
68
2970
68
  // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2971
68
  // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2972
68
  // VSHF concatenates the vectors in a bitwise fashion:
2973
68
  // <0b00, 0b01> + <0b10, 0b11> ->
2974
68
  // 0b0100       + 0b1110       -> 0b01001110
2975
68
  //                                <0b10, 0b11, 0b00, 0b01>
2976
68
  // We must therefore swap the operands to get the correct result.
2977
68
  return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0);
2978
68
}
2979
2980
// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
2981
// indices in the shuffle.
2982
SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2983
206
                                                  SelectionDAG &DAG) const {
2984
206
  ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op);
2985
206
  EVT ResTy = Op->getValueType(0);
2986
206
2987
206
  if (!ResTy.is128BitVector())
2988
0
    return SDValue();
2989
206
2990
206
  int ResTyNumElts = ResTy.getVectorNumElements();
2991
206
  SmallVector<int, 16> Indices;
2992
206
2993
1.72k
  for (int i = 0; i < ResTyNumElts; 
++i1.52k
)
2994
1.52k
    Indices.push_back(Node->getMaskElt(i));
2995
206
2996
206
  // splati.[bhwd] is preferable to the others but is matched from
2997
206
  // MipsISD::VSHF.
2998
206
  if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG))
2999
48
    return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
3000
158
  SDValue Result;
3001
158
  if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG)))
3002
26
    return Result;
3003
132
  if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG)))
3004
26
    return Result;
3005
106
  if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG)))
3006
18
    return Result;
3007
88
  if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG)))
3008
18
    return Result;
3009
70
  if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG)))
3010
20
    return Result;
3011
50
  if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG)))
3012
20
    return Result;
3013
30
  if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG)))
3014
10
    return Result;
3015
20
  return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
3016
20
}
3017
3018
MachineBasicBlock *
3019
MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI,
3020
1
                                   MachineBasicBlock *BB) const {
3021
1
  // $bb:
3022
1
  //  bposge32_pseudo $vr0
3023
1
  //  =>
3024
1
  // $bb:
3025
1
  //  bposge32 $tbb
3026
1
  // $fbb:
3027
1
  //  li $vr2, 0
3028
1
  //  b $sink
3029
1
  // $tbb:
3030
1
  //  li $vr1, 1
3031
1
  // $sink:
3032
1
  //  $vr0 = phi($vr2, $fbb, $vr1, $tbb)
3033
1
3034
1
  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3035
1
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3036
1
  const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3037
1
  DebugLoc DL = MI.getDebugLoc();
3038
1
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
3039
1
  MachineFunction::iterator It = std::next(MachineFunction::iterator(BB));
3040
1
  MachineFunction *F = BB->getParent();
3041
1
  MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
3042
1
  MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
3043
1
  MachineBasicBlock *Sink  = F->CreateMachineBasicBlock(LLVM_BB);
3044
1
  F->insert(It, FBB);
3045
1
  F->insert(It, TBB);
3046
1
  F->insert(It, Sink);
3047
1
3048
1
  // Transfer the remainder of BB and its successor edges to Sink.
3049
1
  Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
3050
1
               BB->end());
3051
1
  Sink->transferSuccessorsAndUpdatePHIs(BB);
3052
1
3053
1
  // Add successors.
3054
1
  BB->addSuccessor(FBB);
3055
1
  BB->addSuccessor(TBB);
3056
1
  FBB->addSuccessor(Sink);
3057
1
  TBB->addSuccessor(Sink);
3058
1
3059
1
  // Insert the real bposge32 instruction to $BB.
3060
1
  BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
3061
1
  // Insert the real bposge32c instruction to $BB.
3062
1
  BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB);
3063
1
3064
1
  // Fill $FBB.
3065
1
  unsigned VR2 = RegInfo.createVirtualRegister(RC);
3066
1
  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
3067
1
    .addReg(Mips::ZERO).addImm(0);
3068
1
  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
3069
1
3070
1
  // Fill $TBB.
3071
1
  unsigned VR1 = RegInfo.createVirtualRegister(RC);
3072
1
  BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
3073
1
    .addReg(Mips::ZERO).addImm(1);
3074
1
3075
1
  // Insert phi function to $Sink.
3076
1
  BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
3077
1
          MI.getOperand(0).getReg())
3078
1
      .addReg(VR2)
3079
1
      .addMBB(FBB)
3080
1
      .addReg(VR1)
3081
1
      .addMBB(TBB);
3082
1
3083
1
  MI.eraseFromParent(); // The pseudo instruction is gone now.
3084
1
  return Sink;
3085
1
}
3086
3087
MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo(
3088
12
    MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const {
3089
12
  // $bb:
3090
12
  //  vany_nonzero $rd, $ws
3091
12
  //  =>
3092
12
  // $bb:
3093
12
  //  bnz.b $ws, $tbb
3094
12
  //  b $fbb
3095
12
  // $fbb:
3096
12
  //  li $rd1, 0
3097
12
  //  b $sink
3098
12
  // $tbb:
3099
12
  //  li $rd2, 1
3100
12
  // $sink:
3101
12
  //  $rd = phi($rd1, $fbb, $rd2, $tbb)
3102
12
3103
12
  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3104
12
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3105
12
  const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3106
12
  DebugLoc DL = MI.getDebugLoc();
3107
12
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
3108
12
  MachineFunction::iterator It = std::next(MachineFunction::iterator(BB));
3109
12
  MachineFunction *F = BB->getParent();
3110
12
  MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
3111
12
  MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
3112
12
  MachineBasicBlock *Sink  = F->CreateMachineBasicBlock(LLVM_BB);
3113
12
  F->insert(It, FBB);
3114
12
  F->insert(It, TBB);
3115
12
  F->insert(It, Sink);
3116
12
3117
12
  // Transfer the remainder of BB and its successor edges to Sink.
3118
12
  Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
3119
12
               BB->end());
3120
12
  Sink->transferSuccessorsAndUpdatePHIs(BB);
3121
12
3122
12
  // Add successors.
3123
12
  BB->addSuccessor(FBB);
3124
12
  BB->addSuccessor(TBB);
3125
12
  FBB->addSuccessor(Sink);
3126
12
  TBB->addSuccessor(Sink);
3127
12
3128
12
  // Insert the real bnz.b instruction to $BB.
3129
12
  BuildMI(BB, DL, TII->get(BranchOp))
3130
12
      .addReg(MI.getOperand(1).getReg())
3131
12
      .addMBB(TBB);
3132
12
3133
12
  // Fill $FBB.
3134
12
  unsigned RD1 = RegInfo.createVirtualRegister(RC);
3135
12
  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1)
3136
12
    .addReg(Mips::ZERO).addImm(0);
3137
12
  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
3138
12
3139
12
  // Fill $TBB.
3140
12
  unsigned RD2 = RegInfo.createVirtualRegister(RC);
3141
12
  BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2)
3142
12
    .addReg(Mips::ZERO).addImm(1);
3143
12
3144
12
  // Insert phi function to $Sink.
3145
12
  BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
3146
12
          MI.getOperand(0).getReg())
3147
12
      .addReg(RD1)
3148
12
      .addMBB(FBB)
3149
12
      .addReg(RD2)
3150
12
      .addMBB(TBB);
3151
12
3152
12
  MI.eraseFromParent(); // The pseudo instruction is gone now.
3153
12
  return Sink;
3154
12
}
3155
3156
// Emit the COPY_FW pseudo instruction.
3157
//
3158
// copy_fw_pseudo $fd, $ws, n
3159
// =>
3160
// copy_u_w $rt, $ws, $n
3161
// mtc1     $rt, $fd
3162
//
3163
// When n is zero, the equivalent operation can be performed with (potentially)
3164
// zero instructions due to register overlaps. This optimization is never valid
3165
// for lane 1 because it would require FR=0 mode which isn't supported by MSA.
3166
MachineBasicBlock *
3167
MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI,
3168
38
                                  MachineBasicBlock *BB) const {
3169
38
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3170
38
  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3171
38
  DebugLoc DL = MI.getDebugLoc();
3172
38
  unsigned Fd = MI.getOperand(0).getReg();
3173
38
  unsigned Ws = MI.getOperand(1).getReg();
3174
38
  unsigned Lane = MI.getOperand(2).getImm();
3175
38
3176
38
  if (Lane == 0) {
3177
16
    unsigned Wt = Ws;
3178
16
    if (!Subtarget.useOddSPReg()) {
3179
1
      // We must copy to an even-numbered MSA register so that the
3180
1
      // single-precision sub-register is also guaranteed to be even-numbered.
3181
1
      Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass);
3182
1
3183
1
      BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws);
3184
1
    }
3185
16
3186
16
    BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
3187
22
  } else {
3188
22
    unsigned Wt = RegInfo.createVirtualRegister(
3189
22
        Subtarget.useOddSPReg() ? 
&Mips::MSA128WRegClass21
:
3190
22
                                  
&Mips::MSA128WEvensRegClass1
);
3191
22
3192
22
    BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane);
3193
22
    BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
3194
22
  }
3195
38
3196
38
  MI.eraseFromParent(); // The pseudo instruction is gone now.
3197
38
  return BB;
3198
38
}
3199
3200
// Emit the COPY_FD pseudo instruction.
3201
//
3202
// copy_fd_pseudo $fd, $ws, n
3203
// =>
3204
// splati.d $wt, $ws, $n
3205
// copy $fd, $wt:sub_64
3206
//
3207
// When n is zero, the equivalent operation can be performed with (potentially)
3208
// zero instructions due to register overlaps. This optimization is always
3209
// valid because FR=1 mode which is the only supported mode in MSA.
3210
MachineBasicBlock *
3211
MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI,
3212
12
                                  MachineBasicBlock *BB) const {
3213
12
  assert(Subtarget.isFP64bit());
3214
12
3215
12
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3216
12
  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3217
12
  unsigned Fd = MI.getOperand(0).getReg();
3218
12
  unsigned Ws = MI.getOperand(1).getReg();
3219
12
  unsigned Lane = MI.getOperand(2).getImm() * 2;
3220
12
  DebugLoc DL = MI.getDebugLoc();
3221
12
3222
12
  if (Lane == 0)
3223
6
    BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64);
3224
6
  else {
3225
6
    unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3226
6
3227
6
    BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1);
3228
6
    BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64);
3229
6
  }
3230
12
3231
12
  MI.eraseFromParent(); // The pseudo instruction is gone now.
3232
12
  return BB;
3233
12
}
3234
3235
// Emit the INSERT_FW pseudo instruction.
3236
//
3237
// insert_fw_pseudo $wd, $wd_in, $n, $fs
3238
// =>
3239
// subreg_to_reg $wt:sub_lo, $fs
3240
// insve_w $wd[$n], $wd_in, $wt[0]
3241
MachineBasicBlock *
3242
MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI,
3243
18
                                    MachineBasicBlock *BB) const {
3244
18
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3245
18
  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3246
18
  DebugLoc DL = MI.getDebugLoc();
3247
18
  unsigned Wd = MI.getOperand(0).getReg();
3248
18
  unsigned Wd_in = MI.getOperand(1).getReg();
3249
18
  unsigned Lane = MI.getOperand(2).getImm();
3250
18
  unsigned Fs = MI.getOperand(3).getReg();
3251
18
  unsigned Wt = RegInfo.createVirtualRegister(
3252
18
      Subtarget.useOddSPReg() ? 
&Mips::MSA128WRegClass16
:
3253
18
                                
&Mips::MSA128WEvensRegClass2
);
3254
18
3255
18
  BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3256
18
      .addImm(0)
3257
18
      .addReg(Fs)
3258
18
      .addImm(Mips::sub_lo);
3259
18
  BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd)
3260
18
      .addReg(Wd_in)
3261
18
      .addImm(Lane)
3262
18
      .addReg(Wt)
3263
18
      .addImm(0);
3264
18
3265
18
  MI.eraseFromParent(); // The pseudo instruction is gone now.
3266
18
  return BB;
3267
18
}
3268
3269
// Emit the INSERT_FD pseudo instruction.
3270
//
3271
// insert_fd_pseudo $wd, $fs, n
3272
// =>
3273
// subreg_to_reg $wt:sub_64, $fs
3274
// insve_d $wd[$n], $wd_in, $wt[0]
3275
MachineBasicBlock *
3276
MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI,
3277
8
                                    MachineBasicBlock *BB) const {
3278
8
  assert(Subtarget.isFP64bit());
3279
8
3280
8
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3281
8
  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3282
8
  DebugLoc DL = MI.getDebugLoc();
3283
8
  unsigned Wd = MI.getOperand(0).getReg();
3284
8
  unsigned Wd_in = MI.getOperand(1).getReg();
3285
8
  unsigned Lane = MI.getOperand(2).getImm();
3286
8
  unsigned Fs = MI.getOperand(3).getReg();
3287
8
  unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3288
8
3289
8
  BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3290
8
      .addImm(0)
3291
8
      .addReg(Fs)
3292
8
      .addImm(Mips::sub_64);
3293
8
  BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd)
3294
8
      .addReg(Wd_in)
3295
8
      .addImm(Lane)
3296
8
      .addReg(Wt)
3297
8
      .addImm(0);
3298
8
3299
8
  MI.eraseFromParent(); // The pseudo instruction is gone now.
3300
8
  return BB;
3301
8
}
3302
3303
// Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction.
3304
//
3305
// For integer:
3306
// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs)
3307
// =>
3308
// (SLL $lanetmp1, $lane, <log2size)
3309
// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3310
// (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs)
3311
// (NEG $lanetmp2, $lanetmp1)
3312
// (SLD_B $wd, $wdtmp2, $wdtmp2,  $lanetmp2)
3313
//
3314
// For floating point:
3315
// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs)
3316
// =>
3317
// (SUBREG_TO_REG $wt, $fs, <subreg>)
3318
// (SLL $lanetmp1, $lane, <log2size)
3319
// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3320
// (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0)
3321
// (NEG $lanetmp2, $lanetmp1)
3322
// (SLD_B $wd, $wdtmp2, $wdtmp2,  $lanetmp2)
3323
MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX(
3324
    MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes,
3325
38
    bool IsFP) const {
3326
38
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3327
38
  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3328
38
  DebugLoc DL = MI.getDebugLoc();
3329
38
  unsigned Wd = MI.getOperand(0).getReg();
3330
38
  unsigned SrcVecReg = MI.getOperand(1).getReg();
3331
38
  unsigned LaneReg = MI.getOperand(2).getReg();
3332
38
  unsigned SrcValReg = MI.getOperand(3).getReg();
3333
38
3334
38
  const TargetRegisterClass *VecRC = nullptr;
3335
38
  // FIXME: This should be true for N32 too.
3336
38
  const TargetRegisterClass *GPRRC =
3337
38
      Subtarget.isABI_N64() ? 
&Mips::GPR64RegClass12
:
&Mips::GPR32RegClass26
;
3338
38
  unsigned SubRegIdx = Subtarget.isABI_N64() ? 
Mips::sub_3212
:
026
;
3339
38
  unsigned ShiftOp = Subtarget.isABI_N64() ? 
Mips::DSLL12
:
Mips::SLL26
;
3340
38
  unsigned EltLog2Size;
3341
38
  unsigned InsertOp = 0;
3342
38
  unsigned InsveOp = 0;
3343
38
  switch (EltSizeInBytes) {
3344
38
  default:
3345
0
    llvm_unreachable("Unexpected size");
3346
38
  case 1:
3347
6
    EltLog2Size = 0;
3348
6
    InsertOp = Mips::INSERT_B;
3349
6
    InsveOp = Mips::INSVE_B;
3350
6
    VecRC = &Mips::MSA128BRegClass;
3351
6
    break;
3352
38
  case 2:
3353
6
    EltLog2Size = 1;
3354
6
    InsertOp = Mips::INSERT_H;
3355
6
    InsveOp = Mips::INSVE_H;
3356
6
    VecRC = &Mips::MSA128HRegClass;
3357
6
    break;
3358
38
  case 4:
3359
16
    EltLog2Size = 2;
3360
16
    InsertOp = Mips::INSERT_W;
3361
16
    InsveOp = Mips::INSVE_W;
3362
16
    VecRC = &Mips::MSA128WRegClass;
3363
16
    break;
3364
38
  case 8:
3365
10
    EltLog2Size = 3;
3366
10
    InsertOp = Mips::INSERT_D;
3367
10
    InsveOp = Mips::INSVE_D;
3368
10
    VecRC = &Mips::MSA128DRegClass;
3369
10
    break;
3370
38
  }
3371
38
3372
38
  if (IsFP) {
3373
12
    unsigned Wt = RegInfo.createVirtualRegister(VecRC);
3374
12
    BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3375
12
        .addImm(0)
3376
12
        .addReg(SrcValReg)
3377
12
        .addImm(EltSizeInBytes == 8 ? 
Mips::sub_646
:
Mips::sub_lo6
);
3378
12
    SrcValReg = Wt;
3379
12
  }
3380
38
3381
38
  // Convert the lane index into a byte index
3382
38
  if (EltSizeInBytes != 1) {
3383
32
    unsigned LaneTmp1 = RegInfo.createVirtualRegister(GPRRC);
3384
32
    BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1)
3385
32
        .addReg(LaneReg)
3386
32
        .addImm(EltLog2Size);
3387
32
    LaneReg = LaneTmp1;
3388
32
  }
3389
38
3390
38
  // Rotate bytes around so that the desired lane is element zero
3391
38
  unsigned WdTmp1 = RegInfo.createVirtualRegister(VecRC);
3392
38
  BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1)
3393
38
      .addReg(SrcVecReg)
3394
38
      .addReg(SrcVecReg)
3395
38
      .addReg(LaneReg, 0, SubRegIdx);
3396
38
3397
38
  unsigned WdTmp2 = RegInfo.createVirtualRegister(VecRC);
3398
38
  if (IsFP) {
3399
12
    // Use insve.df to insert to element zero
3400
12
    BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2)
3401
12
        .addReg(WdTmp1)
3402
12
        .addImm(0)
3403
12
        .addReg(SrcValReg)
3404
12
        .addImm(0);
3405
26
  } else {
3406
26
    // Use insert.df to insert to element zero
3407
26
    BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2)
3408
26
        .addReg(WdTmp1)
3409
26
        .addReg(SrcValReg)
3410
26
        .addImm(0);
3411
26
  }
3412
38
3413
38
  // Rotate elements the rest of the way for a full rotation.
3414
38
  // sld.df inteprets $rt modulo the number of columns so we only need to negate
3415
38
  // the lane index to do this.
3416
38
  unsigned LaneTmp2 = RegInfo.createVirtualRegister(GPRRC);
3417
38
  BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? 
Mips::DSUB12
:
Mips::SUB26
),
3418
38
          LaneTmp2)
3419
38
      .addReg(Subtarget.isABI_N64() ? 
Mips::ZERO_6412
:
Mips::ZERO26
)
3420
38
      .addReg(LaneReg);
3421
38
  BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd)
3422
38
      .addReg(WdTmp2)
3423
38
      .addReg(WdTmp2)
3424
38
      .addReg(LaneTmp2, 0, SubRegIdx);
3425
38
3426
38
  MI.eraseFromParent(); // The pseudo instruction is gone now.
3427
38
  return BB;
3428
38
}
3429
3430
// Emit the FILL_FW pseudo instruction.
3431
//
3432
// fill_fw_pseudo $wd, $fs
3433
// =>
3434
// implicit_def $wt1
3435
// insert_subreg $wt2:subreg_lo, $wt1, $fs
3436
// splati.w $wd, $wt2[0]
3437
MachineBasicBlock *
3438
MipsSETargetLowering::emitFILL_FW(MachineInstr &MI,
3439
12
                                  MachineBasicBlock *BB) const {
3440
12
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3441
12
  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3442
12
  DebugLoc DL = MI.getDebugLoc();
3443
12
  unsigned Wd = MI.getOperand(0).getReg();
3444
12
  unsigned Fs = MI.getOperand(1).getReg();
3445
12
  unsigned Wt1 = RegInfo.createVirtualRegister(
3446
12
      Subtarget.useOddSPReg() ? 
&Mips::MSA128WRegClass10
3447
12
                              : 
&Mips::MSA128WEvensRegClass2
);
3448
12
  unsigned Wt2 = RegInfo.createVirtualRegister(
3449
12
      Subtarget.useOddSPReg() ? 
&Mips::MSA128WRegClass10
3450
12
                              : 
&Mips::MSA128WEvensRegClass2
);
3451
12
3452
12
  BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
3453
12
  BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
3454
12
      .addReg(Wt1)
3455
12
      .addReg(Fs)
3456
12
      .addImm(Mips::sub_lo);
3457
12
  BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0);
3458
12
3459
12
  MI.eraseFromParent(); // The pseudo instruction is gone now.
3460
12
  return BB;
3461
12
}
3462
3463
// Emit the FILL_FD pseudo instruction.
3464
//
3465
// fill_fd_pseudo $wd, $fs
3466
// =>
3467
// implicit_def $wt1
3468
// insert_subreg $wt2:subreg_64, $wt1, $fs
3469
// splati.d $wd, $wt2[0]
3470
MachineBasicBlock *
3471
MipsSETargetLowering::emitFILL_FD(MachineInstr &MI,
3472
6
                                  MachineBasicBlock *BB) const {
3473
6
  assert(Subtarget.isFP64bit());
3474
6
3475
6
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3476
6
  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3477
6
  DebugLoc DL = MI.getDebugLoc();
3478
6
  unsigned Wd = MI.getOperand(0).getReg();
3479
6
  unsigned Fs = MI.getOperand(1).getReg();
3480
6
  unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3481
6
  unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3482
6
3483
6
  BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
3484
6
  BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
3485
6
      .addReg(Wt1)
3486
6
      .addReg(Fs)
3487
6
      .addImm(Mips::sub_64);
3488
6
  BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0);
3489
6
3490
6
  MI.eraseFromParent(); // The pseudo instruction is gone now.
3491
6
  return BB;
3492
6
}
3493
3494
// Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA
3495
// register.
3496
//
3497
// STF16 MSA128F16:$wd, mem_simm10:$addr
3498
// =>
3499
//  copy_u.h $rtemp,$wd[0]
3500
//  sh $rtemp, $addr
3501
//
3502
// Safety: We can't use st.h & co as they would over write the memory after
3503
// the destination. It would require half floats be allocated 16 bytes(!) of
3504
// space.
3505
MachineBasicBlock *
3506
MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI,
3507
174
                                       MachineBasicBlock *BB) const {
3508
174
3509
174
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3510
174
  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3511
174
  DebugLoc DL = MI.getDebugLoc();
3512
174
  unsigned Ws = MI.getOperand(0).getReg();
3513
174
  unsigned Rt = MI.getOperand(1).getReg();
3514
174
  const MachineMemOperand &MMO = **MI.memoperands_begin();
3515
174
  unsigned Imm = MMO.getOffset();
3516
174
3517
174
  // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3518
174
  //          spill and reload can expand as a GPR64 operand. Examine the
3519
174
  //          operand in detail and default to ABI.
3520
174
  const TargetRegisterClass *RC =
3521
174
      MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg())
3522
174
                               : 
(Subtarget.isABI_O32() 0
?
&Mips::GPR32RegClass0
3523
0
                                                        : &Mips::GPR64RegClass);
3524
174
  const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3525
174
  unsigned Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
3526
174
3527
174
  BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0);
3528
174
  if(!UsingMips32) {
3529
58
    unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass);
3530
58
    BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp)
3531
58
        .addImm(0)
3532
58
        .addReg(Rs)
3533
58
        .addImm(Mips::sub_32);
3534
58
    Rs = Tmp;
3535
58
  }
3536
174
  BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? 
Mips::SH116
:
Mips::SH6458
))
3537
174
      .addReg(Rs)
3538
174
      .addReg(Rt)
3539
174
      .addImm(Imm)
3540
174
      .addMemOperand(BB->getParent()->getMachineMemOperand(
3541
174
          &MMO, MMO.getOffset(), MMO.getSize()));
3542
174
3543
174
  MI.eraseFromParent();
3544
174
  return BB;
3545
174
}
3546
3547
// Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register.
3548
//
3549
// LD_F16 MSA128F16:$wd, mem_simm10:$addr
3550
// =>
3551
//  lh $rtemp, $addr
3552
//  fill.h $wd, $rtemp
3553
//
3554
// Safety: We can't use ld.h & co as they over-read from the source.
3555
// Additionally, if the address is not modulo 16, 2 cases can occur:
3556
//  a) Segmentation fault as the load instruction reads from a memory page
3557
//     memory it's not supposed to.
3558
//  b) The load crosses an implementation specific boundary, requiring OS
3559
//     intervention.
3560
MachineBasicBlock *
3561
MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
3562
192
                                       MachineBasicBlock *BB) const {
3563
192
3564
192
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3565
192
  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3566
192
  DebugLoc DL = MI.getDebugLoc();
3567
192
  unsigned Wd = MI.getOperand(0).getReg();
3568
192
3569
192
  // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3570
192
  //          spill and reload can expand as a GPR64 operand. Examine the
3571
192
  //          operand in detail and default to ABI.
3572
192
  const TargetRegisterClass *RC =
3573
192
      MI.getOperand(1).isReg() ? 
RegInfo.getRegClass(MI.getOperand(1).getReg())186
3574
192
                               : 
(Subtarget.isABI_O32() 6
?
&Mips::GPR32RegClass2
3575
6
                                                        : 
&Mips::GPR64RegClass4
);
3576
192
3577
192
  const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3578
192
  unsigned Rt = RegInfo.createVirtualRegister(RC);
3579
192
3580
192
  MachineInstrBuilder MIB =
3581
192
      BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? 
Mips::LH126
:
Mips::LH6466
), Rt);
3582
576
  for (unsigned i = 1; i < MI.getNumOperands(); 
i++384
)
3583
384
    MIB.add(MI.getOperand(i));
3584
192
3585
192
  if(!UsingMips32) {
3586
66
    unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
3587
66
    BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32);
3588
66
    Rt = Tmp;
3589
66
  }
3590
192
3591
192
  BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt);
3592
192
3593
192
  MI.eraseFromParent();
3594
192
  return BB;
3595
192
}
3596
3597
// Emit the FPROUND_PSEUDO instruction.
3598
//
3599
// Round an FGR64Opnd, FGR32Opnd to an f16.
3600
//
3601
// Safety: Cycle the operand through the GPRs so the result always ends up
3602
//         the correct MSA register.
3603
//
3604
// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs
3605
//        / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register
3606
//        (which they can be, as the MSA registers are defined to alias the
3607
//        FPU's 64 bit and 32 bit registers) the result can be accessed using
3608
//        the correct register class. That requires operands be tie-able across
3609
//        register classes which have a sub/super register class relationship.
3610
//
3611
// For FPG32Opnd:
3612
//
3613
// FPROUND MSA128F16:$wd, FGR32Opnd:$fs
3614
// =>
3615
//  mfc1 $rtemp, $fs
3616
//  fill.w $rtemp, $wtemp
3617
//  fexdo.w $wd, $wtemp, $wtemp
3618
//
3619
// For FPG64Opnd on mips32r2+:
3620
//
3621
// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3622
// =>
3623
//  mfc1 $rtemp, $fs
3624
//  fill.w $rtemp, $wtemp
3625
//  mfhc1 $rtemp2, $fs
3626
//  insert.w $wtemp[1], $rtemp2
3627
//  insert.w $wtemp[3], $rtemp2
3628
//  fexdo.w $wtemp2, $wtemp, $wtemp
3629
//  fexdo.h $wd, $temp2, $temp2
3630
//
3631
// For FGR64Opnd on mips64r2+:
3632
//
3633
// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3634
// =>
3635
//  dmfc1 $rtemp, $fs
3636
//  fill.d $rtemp, $wtemp
3637
//  fexdo.w $wtemp2, $wtemp, $wtemp
3638
//  fexdo.h $wd, $wtemp2, $wtemp2
3639
//
3640
// Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the
3641
//              undef bits are "just right" and the exception enable bits are
3642
//              set. By using fill.w to replicate $fs into all elements over
3643
//              insert.w for one element, we avoid that potiential case. If
3644
//              fexdo.[hw] causes an exception in, the exception is valid and it
3645
//              occurs for all elements.
3646
MachineBasicBlock *
3647
MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
3648
                                         MachineBasicBlock *BB,
3649
174
                                         bool IsFGR64) const {
3650
174
3651
174
  // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3652
174
  // here. It's technically doable to support MIPS32 here, but the ISA forbids
3653
174
  // it.
3654
174
  assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
3655
174
3656
174
  bool IsFGR64onMips64 = Subtarget.hasMips64() && 
IsFGR64116
;
3657
174
  bool IsFGR64onMips32 = !Subtarget.hasMips64() && 
IsFGR6458
;
3658
174
3659
174
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3660
174
  DebugLoc DL = MI.getDebugLoc();
3661
174
  unsigned Wd = MI.getOperand(0).getReg();
3662
174
  unsigned Fs = MI.getOperand(1).getReg();
3663
174
3664
174
  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3665
174
  unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3666
174
  const TargetRegisterClass *GPRRC =
3667
174
      IsFGR64onMips64 ? 
&Mips::GPR64RegClass8
:
&Mips::GPR32RegClass166
;
3668
174
  unsigned MFC1Opc = IsFGR64onMips64
3669
174
                         ? 
Mips::DMFC18
3670
174
                         : 
(IsFGR64onMips32 166
?
Mips::MFC1_D644
:
Mips::MFC1162
);
3671
174
  unsigned FILLOpc = IsFGR64onMips64 ? 
Mips::FILL_D8
:
Mips::FILL_W166
;
3672
174
3673
174
  // Perform the register class copy as mentioned above.
3674
174
  unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC);
3675
174
  BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs);
3676
174
  BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp);
3677
174
  unsigned WPHI = Wtemp;
3678
174
3679
174
  if (IsFGR64onMips32) {
3680
4
    unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
3681
4
    BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs);
3682
4
    unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3683
4
    unsigned Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3684
4
    BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2)
3685
4
        .addReg(Wtemp)
3686
4
        .addReg(Rtemp2)
3687
4
        .addImm(1);
3688
4
    BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3)
3689
4
        .addReg(Wtemp2)
3690
4
        .addReg(Rtemp2)
3691
4
        .addImm(3);
3692
4
    WPHI = Wtemp3;
3693
4
  }
3694
174
3695
174
  if (IsFGR64) {
3696
12
    unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3697
12
    BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2)
3698
12
        .addReg(WPHI)
3699
12
        .addReg(WPHI);
3700
12
    WPHI = Wtemp2;
3701
12
  }
3702
174
3703
174
  BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI);
3704
174
3705
174
  MI.eraseFromParent();
3706
174
  return BB;
3707
174
}
3708
3709
// Emit the FPEXTEND_PSEUDO instruction.
3710
//
3711
// Expand an f16 to either a FGR32Opnd or FGR64Opnd.
3712
//
3713
// Safety: Cycle the result through the GPRs so the result always ends up
3714
//         the correct floating point register.
3715
//
3716
// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd
3717
//        / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register
3718
//        (which they can be, as the MSA registers are defined to alias the
3719
//        FPU's 64 bit and 32 bit registers) the result can be accessed using
3720
//        the correct register class. That requires operands be tie-able across
3721
//        register classes which have a sub/super register class relationship. I
3722
//        haven't checked.
3723
//
3724
// For FGR32Opnd:
3725
//
3726
// FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws
3727
// =>
3728
//  fexupr.w $wtemp, $ws
3729
//  copy_s.w $rtemp, $ws[0]
3730
//  mtc1 $rtemp, $fd
3731
//
3732
// For FGR64Opnd on Mips64:
3733
//
3734
// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3735
// =>
3736
//  fexupr.w $wtemp, $ws
3737
//  fexupr.d $wtemp2, $wtemp
3738
//  copy_s.d $rtemp, $wtemp2s[0]
3739
//  dmtc1 $rtemp, $fd
3740
//
3741
// For FGR64Opnd on Mips32:
3742
//
3743
// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3744
// =>
3745
//  fexupr.w $wtemp, $ws
3746
//  fexupr.d $wtemp2, $wtemp
3747
//  copy_s.w $rtemp, $wtemp2[0]
3748
//  mtc1 $rtemp, $ftemp
3749
//  copy_s.w $rtemp2, $wtemp2[1]
3750
//  $fd = mthc1 $rtemp2, $ftemp
3751
MachineBasicBlock *
3752
MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
3753
                                          MachineBasicBlock *BB,
3754
192
                                          bool IsFGR64) const {
3755
192
3756
192
  // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3757
192
  // here. It's technically doable to support MIPS32 here, but the ISA forbids
3758
192
  // it.
3759
192
  assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
3760
192
3761
192
  bool IsFGR64onMips64 = Subtarget.hasMips64() && 
IsFGR64128
;
3762
192
  bool IsFGR64onMips32 = !Subtarget.hasMips64() && 
IsFGR6464
;
3763
192
3764
192
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3765
192
  DebugLoc DL = MI.getDebugLoc();
3766
192
  Register Fd = MI.getOperand(0).getReg();
3767
192
  Register Ws = MI.getOperand(1).getReg();
3768
192
3769
192
  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3770
192
  const TargetRegisterClass *GPRRC =
3771
192
      IsFGR64onMips64 ? 
&Mips::GPR64RegClass12
:
&Mips::GPR32RegClass180
;
3772
192
  unsigned MTC1Opc = IsFGR64onMips64
3773
192
                         ? 
Mips::DMTC112
3774
192
                         : 
(IsFGR64onMips32 180
?
Mips::MTC1_D646
:
Mips::MTC1174
);
3775
192
  Register COPYOpc = IsFGR64onMips64 ? 
Mips::COPY_S_D12
:
Mips::COPY_S_W180
;
3776
192
3777
192
  Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3778
192
  Register WPHI = Wtemp;
3779
192
3780
192
  BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws);
3781
192
  if (IsFGR64) {
3782
18
    WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3783
18
    BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp);
3784
18
  }
3785
192
3786
192
  // Perform the safety regclass copy mentioned above.
3787
192
  Register Rtemp = RegInfo.createVirtualRegister(GPRRC);
3788
192
  Register FPRPHI = IsFGR64onMips32
3789
192
                        ? 
RegInfo.createVirtualRegister(&Mips::FGR64RegClass)6
3790
192
                        : 
Fd186
;
3791
192
  BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0);
3792
192
  BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp);
3793
192
3794
192
  if (IsFGR64onMips32) {
3795
6
    Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
3796
6
    BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2)
3797
6
        .addReg(WPHI)
3798
6
        .addImm(1);
3799
6
    BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd)
3800
6
        .addReg(FPRPHI)
3801
6
        .addReg(Rtemp2);
3802
6
  }
3803
192
3804
192
  MI.eraseFromParent();
3805
192
  return BB;
3806
192
}
3807
3808
// Emit the FEXP2_W_1 pseudo instructions.
3809
//
3810
// fexp2_w_1_pseudo $wd, $wt
3811
// =>
3812
// ldi.w $ws, 1
3813
// fexp2.w $wd, $ws, $wt
3814
MachineBasicBlock *
3815
MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI,
3816
4
                                    MachineBasicBlock *BB) const {
3817
4
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3818
4
  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3819
4
  const TargetRegisterClass *RC = &Mips::MSA128WRegClass;
3820
4
  unsigned Ws1 = RegInfo.createVirtualRegister(RC);
3821
4
  unsigned Ws2 = RegInfo.createVirtualRegister(RC);
3822
4
  DebugLoc DL = MI.getDebugLoc();
3823
4
3824
4
  // Splat 1.0 into a vector
3825
4
  BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1);
3826
4
  BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1);
3827
4
3828
4
  // Emit 1.0 * fexp2(Wt)
3829
4
  BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI.getOperand(0).getReg())
3830
4
      .addReg(Ws2)
3831
4
      .addReg(MI.getOperand(1).getReg());
3832
4
3833
4
  MI.eraseFromParent(); // The pseudo instruction is gone now.
3834
4
  return BB;
3835
4
}
3836
3837
// Emit the FEXP2_D_1 pseudo instructions.
3838
//
3839
// fexp2_d_1_pseudo $wd, $wt
3840
// =>
3841
// ldi.d $ws, 1
3842
// fexp2.d $wd, $ws, $wt
3843
MachineBasicBlock *
3844
MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI,
3845
4
                                    MachineBasicBlock *BB) const {
3846
4
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3847
4
  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3848
4
  const TargetRegisterClass *RC = &Mips::MSA128DRegClass;
3849
4
  unsigned Ws1 = RegInfo.createVirtualRegister(RC);
3850
4
  unsigned Ws2 = RegInfo.createVirtualRegister(RC);
3851
4
  DebugLoc DL = MI.getDebugLoc();
3852
4
3853
4
  // Splat 1.0 into a vector
3854
4
  BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1);
3855
4
  BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1);
3856
4
3857
4
  // Emit 1.0 * fexp2(Wt)
3858
4
  BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI.getOperand(0).getReg())
3859
4
      .addReg(Ws2)
3860
4
      .addReg(MI.getOperand(1).getReg());
3861
4
3862
4
  MI.eraseFromParent(); // The pseudo instruction is gone now.
3863
4
  return BB;
3864
4
}