Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines an instruction selector for the AArch64 target.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "AArch64TargetMachine.h"
14
#include "MCTargetDesc/AArch64AddressingModes.h"
15
#include "llvm/ADT/APSInt.h"
16
#include "llvm/CodeGen/SelectionDAGISel.h"
17
#include "llvm/IR/Function.h" // To access function attributes.
18
#include "llvm/IR/GlobalValue.h"
19
#include "llvm/IR/Intrinsics.h"
20
#include "llvm/Support/Debug.h"
21
#include "llvm/Support/ErrorHandling.h"
22
#include "llvm/Support/KnownBits.h"
23
#include "llvm/Support/MathExtras.h"
24
#include "llvm/Support/raw_ostream.h"
25
26
using namespace llvm;
27
28
#define DEBUG_TYPE "aarch64-isel"
29
30
//===--------------------------------------------------------------------===//
31
/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
32
/// instructions for SelectionDAG operations.
33
///
34
namespace {
35
36
class AArch64DAGToDAGISel : public SelectionDAGISel {
37
38
  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
39
  /// make the right decision when generating code for different targets.
40
  const AArch64Subtarget *Subtarget;
41
42
  bool ForCodeSize;
43
44
public:
45
  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
46
                               CodeGenOpt::Level OptLevel)
47
      : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
48
8.75k
        ForCodeSize(false) {}
49
50
264k
  StringRef getPassName() const override {
51
264k
    return "AArch64 Instruction Selection";
52
264k
  }
53
54
257k
  bool runOnMachineFunction(MachineFunction &MF) override {
55
257k
    ForCodeSize = MF.getFunction().hasOptSize();
56
257k
    Subtarget = &MF.getSubtarget<AArch64Subtarget>();
57
257k
    return SelectionDAGISel::runOnMachineFunction(MF);
58
257k
  }
59
60
  void Select(SDNode *Node) override;
61
62
  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
63
  /// inline asm expressions.
64
  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
65
                                    unsigned ConstraintID,
66
                                    std::vector<SDValue> &OutOps) override;
67
68
  bool tryMLAV64LaneV128(SDNode *N);
69
  bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
70
  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
71
  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
72
  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73
288k
  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
74
288k
    return SelectShiftedRegister(N, false, Reg, Shift);
75
288k
  }
76
50.6k
  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77
50.6k
    return SelectShiftedRegister(N, true, Reg, Shift);
78
50.6k
  }
79
0
  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
80
0
    return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
81
0
  }
82
0
  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
83
0
    return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
84
0
  }
85
15
  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
86
15
    return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
87
15
  }
88
18
  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
89
18
    return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
90
18
  }
91
8
  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
92
8
    return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
93
8
  }
94
37
  bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
95
37
    return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
96
37
  }
97
10
  bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
98
10
    return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
99
10
  }
100
149k
  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
101
149k
    return SelectAddrModeIndexed(N, 1, Base, OffImm);
102
149k
  }
103
19.8k
  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
104
19.8k
    return SelectAddrModeIndexed(N, 2, Base, OffImm);
105
19.8k
  }
106
123k
  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
107
123k
    return SelectAddrModeIndexed(N, 4, Base, OffImm);
108
123k
  }
109
193k
  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
110
193k
    return SelectAddrModeIndexed(N, 8, Base, OffImm);
111
193k
  }
112
186k
  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
113
186k
    return SelectAddrModeIndexed(N, 16, Base, OffImm);
114
186k
  }
115
623
  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
116
623
    return SelectAddrModeUnscaled(N, 1, Base, OffImm);
117
623
  }
118
1.68k
  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
119
1.68k
    return SelectAddrModeUnscaled(N, 2, Base, OffImm);
120
1.68k
  }
121
2.19k
  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
122
2.19k
    return SelectAddrModeUnscaled(N, 4, Base, OffImm);
123
2.19k
  }
124
6.63k
  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
125
6.63k
    return SelectAddrModeUnscaled(N, 8, Base, OffImm);
126
6.63k
  }
127
17.9k
  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
128
17.9k
    return SelectAddrModeUnscaled(N, 16, Base, OffImm);
129
17.9k
  }
130
131
  template<int Width>
132
  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
133
726k
                         SDValue &SignExtend, SDValue &DoShift) {
134
726k
    return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
135
726k
  }
AArch64ISelDAGToDAG.cpp:bool (anonymous namespace)::AArch64DAGToDAGISel::SelectAddrModeWRO<16>(llvm::SDValue, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&)
Line
Count
Source
133
26.4k
                         SDValue &SignExtend, SDValue &DoShift) {
134
26.4k
    return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
135
26.4k
  }
AArch64ISelDAGToDAG.cpp:bool (anonymous namespace)::AArch64DAGToDAGISel::SelectAddrModeWRO<32>(llvm::SDValue, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&)
Line
Count
Source
133
135k
                         SDValue &SignExtend, SDValue &DoShift) {
134
135k
    return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
135
135k
  }
AArch64ISelDAGToDAG.cpp:bool (anonymous namespace)::AArch64DAGToDAGISel::SelectAddrModeWRO<64>(llvm::SDValue, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&)
Line
Count
Source
133
213k
                         SDValue &SignExtend, SDValue &DoShift) {
134
213k
    return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
135
213k
  }
AArch64ISelDAGToDAG.cpp:bool (anonymous namespace)::AArch64DAGToDAGISel::SelectAddrModeWRO<8>(llvm::SDValue, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&)
Line
Count
Source
133
163k
                         SDValue &SignExtend, SDValue &DoShift) {
134
163k
    return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
135
163k
  }
AArch64ISelDAGToDAG.cpp:bool (anonymous namespace)::AArch64DAGToDAGISel::SelectAddrModeWRO<128>(llvm::SDValue, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&)
Line
Count
Source
133
187k
                         SDValue &SignExtend, SDValue &DoShift) {
134
187k
    return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
135
187k
  }
136
137
  template<int Width>
138
  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
139
722k
                         SDValue &SignExtend, SDValue &DoShift) {
140
722k
    return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
141
722k
  }
AArch64ISelDAGToDAG.cpp:bool (anonymous namespace)::AArch64DAGToDAGISel::SelectAddrModeXRO<16>(llvm::SDValue, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&)
Line
Count
Source
139
26.2k
                         SDValue &SignExtend, SDValue &DoShift) {
140
26.2k
    return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
141
26.2k
  }
AArch64ISelDAGToDAG.cpp:bool (anonymous namespace)::AArch64DAGToDAGISel::SelectAddrModeXRO<32>(llvm::SDValue, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&)
Line
Count
Source
139
134k
                         SDValue &SignExtend, SDValue &DoShift) {
140
134k
    return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
141
134k
  }
AArch64ISelDAGToDAG.cpp:bool (anonymous namespace)::AArch64DAGToDAGISel::SelectAddrModeXRO<64>(llvm::SDValue, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&)
Line
Count
Source
139
210k
                         SDValue &SignExtend, SDValue &DoShift) {
140
210k
    return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
141
210k
  }
AArch64ISelDAGToDAG.cpp:bool (anonymous namespace)::AArch64DAGToDAGISel::SelectAddrModeXRO<8>(llvm::SDValue, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&)
Line
Count
Source
139
163k
                         SDValue &SignExtend, SDValue &DoShift) {
140
163k
    return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
141
163k
  }
AArch64ISelDAGToDAG.cpp:bool (anonymous namespace)::AArch64DAGToDAGISel::SelectAddrModeXRO<128>(llvm::SDValue, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&)
Line
Count
Source
139
187k
                         SDValue &SignExtend, SDValue &DoShift) {
140
187k
    return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
141
187k
  }
142
143
144
  /// Form sequences of consecutive 64/128-bit registers for use in NEON
145
  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
146
  /// between 1 and 4 elements. If it contains a single element that is returned
147
  /// unchanged; otherwise a REG_SEQUENCE value is returned.
148
  SDValue createDTuple(ArrayRef<SDValue> Vecs);
149
  SDValue createQTuple(ArrayRef<SDValue> Vecs);
150
151
  /// Generic helper for the createDTuple/createQTuple
152
  /// functions. Those should almost always be called instead.
153
  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
154
                      const unsigned SubRegs[]);
155
156
  void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
157
158
  bool tryIndexedLoad(SDNode *N);
159
160
  bool trySelectStackSlotTagP(SDNode *N);
161
  void SelectTagP(SDNode *N);
162
163
  void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
164
                     unsigned SubRegIdx);
165
  void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
166
                         unsigned SubRegIdx);
167
  void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
168
  void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
169
170
  void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
171
  void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
172
  void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
173
  void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
174
175
  bool tryBitfieldExtractOp(SDNode *N);
176
  bool tryBitfieldExtractOpFromSExt(SDNode *N);
177
  bool tryBitfieldInsertOp(SDNode *N);
178
  bool tryBitfieldInsertInZeroOp(SDNode *N);
179
  bool tryShiftAmountMod(SDNode *N);
180
181
  bool tryReadRegister(SDNode *N);
182
  bool tryWriteRegister(SDNode *N);
183
184
// Include the pieces autogenerated from the target description.
185
#include "AArch64GenDAGISel.inc"
186
187
private:
188
  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
189
                             SDValue &Shift);
190
  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
191
41
                               SDValue &OffImm) {
192
41
    return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
193
41
  }
194
  bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
195
                                     unsigned Size, SDValue &Base,
196
                                     SDValue &OffImm);
197
  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
198
                             SDValue &OffImm);
199
  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
200
                              SDValue &OffImm);
201
  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
202
                         SDValue &Offset, SDValue &SignExtend,
203
                         SDValue &DoShift);
204
  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
205
                         SDValue &Offset, SDValue &SignExtend,
206
                         SDValue &DoShift);
207
  bool isWorthFolding(SDValue V) const;
208
  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
209
                         SDValue &Offset, SDValue &SignExtend);
210
211
  template<unsigned RegWidth>
212
831
  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
213
831
    return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
214
831
  }
AArch64ISelDAGToDAG.cpp:bool (anonymous namespace)::AArch64DAGToDAGISel::SelectCVTFixedPosOperand<32u>(llvm::SDValue, llvm::SDValue&)
Line
Count
Source
212
357
  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
213
357
    return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
214
357
  }
AArch64ISelDAGToDAG.cpp:bool (anonymous namespace)::AArch64DAGToDAGISel::SelectCVTFixedPosOperand<64u>(llvm::SDValue, llvm::SDValue&)
Line
Count
Source
212
474
  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
213
474
    return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
214
474
  }
215
216
  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
217
218
  bool SelectCMP_SWAP(SDNode *N);
219
220
};
221
} // end anonymous namespace
222
223
/// isIntImmediate - This method tests to see if the node is a constant
224
/// operand. If so Imm will receive the 32-bit value.
225
100k
static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
226
100k
  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
227
71.6k
    Imm = C->getZExtValue();
228
71.6k
    return true;
229
71.6k
  }
230
28.8k
  return false;
231
28.8k
}
232
233
// isIntImmediate - This method tests to see if a constant operand.
234
// If so Imm will receive the value.
235
5.73k
static bool isIntImmediate(SDValue N, uint64_t &Imm) {
236
5.73k
  return isIntImmediate(N.getNode(), Imm);
237
5.73k
}
238
239
// isOpcWithIntImmediate - This method tests to see if the node is a specific
240
// opcode and that it has a immediate integer right operand.
241
// If so Imm will receive the 32 bit value.
242
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
243
319k
                                  uint64_t &Imm) {
244
319k
  return N->getOpcode() == Opc &&
245
319k
         
isIntImmediate(N->getOperand(1).getNode(), Imm)94.7k
;
246
319k
}
247
248
bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
249
41
    const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
250
41
  switch(ConstraintID) {
251
41
  default:
252
0
    llvm_unreachable("Unexpected asm memory constraint");
253
41
  case InlineAsm::Constraint_i:
254
41
  case InlineAsm::Constraint_m:
255
41
  case InlineAsm::Constraint_Q:
256
41
    // We need to make sure that this one operand does not end up in XZR, thus
257
41
    // require the address to be in a PointerRegClass register.
258
41
    const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
259
41
    const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
260
41
    SDLoc dl(Op);
261
41
    SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
262
41
    SDValue NewOp =
263
41
        SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
264
41
                                       dl, Op.getValueType(),
265
41
                                       Op, RC), 0);
266
41
    OutOps.push_back(NewOp);
267
41
    return false;
268
0
  }
269
0
  return true;
270
0
}
271
272
/// SelectArithImmed - Select an immediate value that can be represented as
273
/// a 12-bit value shifted left by either 0 or 12.  If so, return true with
274
/// Val set to the 12-bit value and Shift set to the shifter operand.
275
bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
276
593k
                                           SDValue &Shift) {
277
593k
  // This function is called from the addsub_shifted_imm ComplexPattern,
278
593k
  // which lists [imm] as the list of opcode it's interested in, however
279
593k
  // we still need to check whether the operand is actually an immediate
280
593k
  // here because the ComplexPattern opcode list is only used in
281
593k
  // root-level opcode matching.
282
593k
  if (!isa<ConstantSDNode>(N.getNode()))
283
306k
    return false;
284
287k
285
287k
  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
286
287k
  unsigned ShiftAmt;
287
287k
288
287k
  if (Immed >> 12 == 0) {
289
225k
    ShiftAmt = 0;
290
225k
  } else 
if (61.7k
(Immed & 0xfff) == 061.7k
&&
Immed >> 24 == 011.6k
) {
291
3.49k
    ShiftAmt = 12;
292
3.49k
    Immed = Immed >> 12;
293
3.49k
  } else
294
58.2k
    return false;
295
229k
296
229k
  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
297
229k
  SDLoc dl(N);
298
229k
  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
299
229k
  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
300
229k
  return true;
301
229k
}
302
303
/// SelectNegArithImmed - As above, but negates the value before trying to
304
/// select it.
305
bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
306
419k
                                              SDValue &Shift) {
307
419k
  // This function is called from the addsub_shifted_imm ComplexPattern,
308
419k
  // which lists [imm] as the list of opcode it's interested in, however
309
419k
  // we still need to check whether the operand is actually an immediate
310
419k
  // here because the ComplexPattern opcode list is only used in
311
419k
  // root-level opcode matching.
312
419k
  if (!isa<ConstantSDNode>(N.getNode()))
313
270k
    return false;
314
149k
315
149k
  // The immediate operand must be a 24-bit zero-extended immediate.
316
149k
  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
317
149k
318
149k
  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
319
149k
  // have the opposite effect on the C flag, so this pattern mustn't match under
320
149k
  // those circumstances.
321
149k
  if (Immed == 0)
322
15.2k
    return false;
323
133k
324
133k
  if (N.getValueType() == MVT::i32)
325
78.2k
    Immed = ~((uint32_t)Immed) + 1;
326
55.6k
  else
327
55.6k
    Immed = ~Immed + 1ULL;
328
133k
  if (Immed & 0xFFFFFFFFFF000000ULL)
329
99.3k
    return false;
330
34.5k
331
34.5k
  Immed &= 0xFFFFFFULL;
332
34.5k
  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
333
34.5k
                          Shift);
334
34.5k
}
335
336
/// getShiftTypeForNode - Translate a shift node to the corresponding
337
/// ShiftType value.
338
338k
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
339
338k
  switch (N.getOpcode()) {
340
338k
  default:
341
315k
    return AArch64_AM::InvalidShiftExtend;
342
338k
  case ISD::SHL:
343
20.3k
    return AArch64_AM::LSL;
344
338k
  case ISD::SRL:
345
1.81k
    return AArch64_AM::LSR;
346
338k
  case ISD::SRA:
347
1.41k
    return AArch64_AM::ASR;
348
338k
  case ISD::ROTR:
349
176
    return AArch64_AM::ROR;
350
338k
  }
351
338k
}
352
353
/// Determine whether it is worth it to fold SHL into the addressing
354
/// mode.
355
10
static bool isWorthFoldingSHL(SDValue V) {
356
10
  assert(V.getOpcode() == ISD::SHL && "invalid opcode");
357
10
  // It is worth folding logical shift of up to three places.
358
10
  auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
359
10
  if (!CSD)
360
0
    return false;
361
10
  unsigned ShiftVal = CSD->getZExtValue();
362
10
  if (ShiftVal > 3)
363
0
    return false;
364
10
365
10
  // Check if this particular node is reused in any non-memory related
366
10
  // operation.  If yes, do not try to fold this node into the address
367
10
  // computation, since the computation will be kept.
368
10
  const SDNode *Node = V.getNode();
369
10
  for (SDNode *UI : Node->uses())
370
16
    if (!isa<MemSDNode>(*UI))
371
16
      for (SDNode *UII : UI->uses())
372
22
        if (!isa<MemSDNode>(*UII))
373
1
          return false;
374
10
  
return true9
;
375
10
}
376
377
/// Determine whether it is worth to fold V into an extended register.
378
161k
bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
379
161k
  // Trivial if we are optimizing for code size or if there is only
380
161k
  // one use of the value.
381
161k
  if (ForCodeSize || 
V.hasOneUse()161k
)
382
142k
    return true;
383
19.5k
  // If a subtarget has a fastpath LSL we can fold a logical shift into
384
19.5k
  // the addressing mode and save a cycle.
385
19.5k
  if (Subtarget->hasLSLFast() && 
V.getOpcode() == ISD::SHL10
&&
386
19.5k
      
isWorthFoldingSHL(V)4
)
387
3
    return true;
388
19.5k
  if (Subtarget->hasLSLFast() && 
V.getOpcode() == ISD::ADD7
) {
389
6
    const SDValue LHS = V.getOperand(0);
390
6
    const SDValue RHS = V.getOperand(1);
391
6
    if (LHS.getOpcode() == ISD::SHL && 
isWorthFoldingSHL(LHS)0
)
392
0
      return true;
393
6
    if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
394
6
      return true;
395
19.5k
  }
396
19.5k
397
19.5k
  // It hurts otherwise, since the value will be reused.
398
19.5k
  return false;
399
19.5k
}
400
401
/// SelectShiftedRegister - Select a "shifted register" operand.  If the value
402
/// is not shifted, set the Shift operand to default of "LSL 0".  The logical
403
/// instructions allow the shifted register to be rotated, but the arithmetic
404
/// instructions do not.  The AllowROR parameter specifies whether ROR is
405
/// supported.
406
bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
407
338k
                                                SDValue &Reg, SDValue &Shift) {
408
338k
  AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
409
338k
  if (ShType == AArch64_AM::InvalidShiftExtend)
410
315k
    return false;
411
23.7k
  if (!AllowROR && 
ShType == AArch64_AM::ROR19.8k
)
412
166
    return false;
413
23.5k
414
23.5k
  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
415
20.5k
    unsigned BitSize = N.getValueSizeInBits();
416
20.5k
    unsigned Val = RHS->getZExtValue() & (BitSize - 1);
417
20.5k
    unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
418
20.5k
419
20.5k
    Reg = N.getOperand(0);
420
20.5k
    Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
421
20.5k
    return isWorthFolding(N);
422
20.5k
  }
423
2.97k
424
2.97k
  return false;
425
2.97k
}
426
427
/// getExtendTypeForNode - Translate an extend node to the corresponding
428
/// ExtendType value.
429
static AArch64_AM::ShiftExtendType
430
542k
getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
431
542k
  if (N.getOpcode() == ISD::SIGN_EXTEND ||
432
542k
      
N.getOpcode() == ISD::SIGN_EXTEND_INREG536k
) {
433
6.47k
    EVT SrcVT;
434
6.47k
    if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
435
610
      SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
436
5.86k
    else
437
5.86k
      SrcVT = N.getOperand(0).getValueType();
438
6.47k
439
6.47k
    if (!IsLoadStore && 
SrcVT == MVT::i83.83k
)
440
114
      return AArch64_AM::SXTB;
441
6.36k
    else if (!IsLoadStore && 
SrcVT == MVT::i163.71k
)
442
308
      return AArch64_AM::SXTH;
443
6.05k
    else if (SrcVT == MVT::i32)
444
6.00k
      return AArch64_AM::SXTW;
445
45
    assert(SrcVT != MVT::i64 && "extend from 64-bits?");
446
45
447
45
    return AArch64_AM::InvalidShiftExtend;
448
536k
  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
449
536k
             
N.getOpcode() == ISD::ANY_EXTEND532k
) {
450
3.29k
    EVT SrcVT = N.getOperand(0).getValueType();
451
3.29k
    if (!IsLoadStore && 
SrcVT == MVT::i8947
)
452
0
      return AArch64_AM::UXTB;
453
3.29k
    else if (!IsLoadStore && 
SrcVT == MVT::i16947
)
454
0
      return AArch64_AM::UXTH;
455
3.29k
    else if (SrcVT == MVT::i32)
456
3.29k
      return AArch64_AM::UXTW;
457
0
    assert(SrcVT != MVT::i64 && "extend from 64-bits?");
458
0
459
0
    return AArch64_AM::InvalidShiftExtend;
460
532k
  } else if (N.getOpcode() == ISD::AND) {
461
7.01k
    ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
462
7.01k
    if (!CSD)
463
454
      return AArch64_AM::InvalidShiftExtend;
464
6.55k
    uint64_t AndMask = CSD->getZExtValue();
465
6.55k
466
6.55k
    switch (AndMask) {
467
6.55k
    default:
468
4.42k
      return AArch64_AM::InvalidShiftExtend;
469
6.55k
    case 0xFF:
470
1.70k
      return !IsLoadStore ? 
AArch64_AM::UXTB1.59k
:
AArch64_AM::InvalidShiftExtend109
;
471
6.55k
    case 0xFFFF:
472
329
      return !IsLoadStore ? 
AArch64_AM::UXTH307
:
AArch64_AM::InvalidShiftExtend22
;
473
6.55k
    case 0xFFFFFFFF:
474
95
      return AArch64_AM::UXTW;
475
525k
    }
476
525k
  }
477
525k
478
525k
  return AArch64_AM::InvalidShiftExtend;
479
525k
}
480
481
// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
482
28.8k
static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
483
28.8k
  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
484
28.8k
      
DL->getOpcode() != AArch64ISD::DUPLANE3228.8k
)
485
28.7k
    return false;
486
112
487
112
  SDValue SV = DL->getOperand(0);
488
112
  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
489
56
    return false;
490
56
491
56
  SDValue EV = SV.getOperand(1);
492
56
  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
493
56
    return false;
494
0
495
0
  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
496
0
  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
497
0
  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
498
0
  LaneOp = EV.getOperand(0);
499
0
500
0
  return true;
501
0
}
502
503
// Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
504
// high lane extract.
505
static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
506
14.4k
                             SDValue &LaneOp, int &LaneIdx) {
507
14.4k
508
14.4k
  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
509
14.4k
    std::swap(Op0, Op1);
510
14.4k
    if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
511
14.4k
      return false;
512
0
  }
513
0
  StdOp = Op1;
514
0
  return true;
515
0
}
516
517
/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
518
/// is a lane in the upper half of a 128-bit vector.  Recognize and select this
519
/// so that we don't emit unnecessary lane extracts.
520
269k
bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
521
269k
  SDLoc dl(N);
522
269k
  SDValue Op0 = N->getOperand(0);
523
269k
  SDValue Op1 = N->getOperand(1);
524
269k
  SDValue MLAOp1;   // Will hold ordinary multiplicand for MLA.
525
269k
  SDValue MLAOp2;   // Will hold lane-accessed multiplicand for MLA.
526
269k
  int LaneIdx = -1; // Will hold the lane index.
527
269k
528
269k
  if (Op1.getOpcode() != ISD::MUL ||
529
269k
      !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
530
269k
                        LaneIdx)) {
531
269k
    std::swap(Op0, Op1);
532
269k
    if (Op1.getOpcode() != ISD::MUL ||
533
269k
        !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
534
2.03k
                          LaneIdx))
535
269k
      return false;
536
0
  }
537
0
538
0
  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
539
0
540
0
  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
541
0
542
0
  unsigned MLAOpc = ~0U;
543
0
544
0
  switch (N->getSimpleValueType(0).SimpleTy) {
545
0
  default:
546
0
    llvm_unreachable("Unrecognized MLA.");
547
0
  case MVT::v4i16:
548
0
    MLAOpc = AArch64::MLAv4i16_indexed;
549
0
    break;
550
0
  case MVT::v8i16:
551
0
    MLAOpc = AArch64::MLAv8i16_indexed;
552
0
    break;
553
0
  case MVT::v2i32:
554
0
    MLAOpc = AArch64::MLAv2i32_indexed;
555
0
    break;
556
0
  case MVT::v4i32:
557
0
    MLAOpc = AArch64::MLAv4i32_indexed;
558
0
    break;
559
0
  }
560
0
561
0
  ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
562
0
  return true;
563
0
}
564
565
151
bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
566
151
  SDLoc dl(N);
567
151
  SDValue SMULLOp0;
568
151
  SDValue SMULLOp1;
569
151
  int LaneIdx;
570
151
571
151
  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
572
151
                        LaneIdx))
573
151
    return false;
574
0
575
0
  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
576
0
577
0
  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
578
0
579
0
  unsigned SMULLOpc = ~0U;
580
0
581
0
  if (IntNo == Intrinsic::aarch64_neon_smull) {
582
0
    switch (N->getSimpleValueType(0).SimpleTy) {
583
0
    default:
584
0
      llvm_unreachable("Unrecognized SMULL.");
585
0
    case MVT::v4i32:
586
0
      SMULLOpc = AArch64::SMULLv4i16_indexed;
587
0
      break;
588
0
    case MVT::v2i64:
589
0
      SMULLOpc = AArch64::SMULLv2i32_indexed;
590
0
      break;
591
0
    }
592
0
  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
593
0
    switch (N->getSimpleValueType(0).SimpleTy) {
594
0
    default:
595
0
      llvm_unreachable("Unrecognized SMULL.");
596
0
    case MVT::v4i32:
597
0
      SMULLOpc = AArch64::UMULLv4i16_indexed;
598
0
      break;
599
0
    case MVT::v2i64:
600
0
      SMULLOpc = AArch64::UMULLv2i32_indexed;
601
0
      break;
602
0
    }
603
0
  } else
604
0
    llvm_unreachable("Unrecognized intrinsic.");
605
0
606
0
  ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
607
0
  return true;
608
0
}
609
610
/// Instructions that accept extend modifiers like UXTW expect the register
611
/// being extended to be a GPR32, but the incoming DAG might be acting on a
612
/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
613
/// this is the case.
614
11.4k
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
615
11.4k
  if (N.getValueType() == MVT::i32)
616
9.55k
    return N;
617
1.94k
618
1.94k
  SDLoc dl(N);
619
1.94k
  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
620
1.94k
  MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
621
1.94k
                                               dl, MVT::i32, N, SubReg);
622
1.94k
  return SDValue(Node, 0);
623
1.94k
}
624
625
626
/// SelectArithExtendedRegister - Select a "extended register" operand.  This
627
/// operand folds in an extend followed by an optional left shift.
628
bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
629
423k
                                                      SDValue &Shift) {
630
423k
  unsigned ShiftVal = 0;
631
423k
  AArch64_AM::ShiftExtendType Ext;
632
423k
633
423k
  if (N.getOpcode() == ISD::SHL) {
634
19.2k
    ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
635
19.2k
    if (!CSD)
636
280
      return false;
637
18.9k
    ShiftVal = CSD->getZExtValue();
638
18.9k
    if (ShiftVal > 4)
639
4.18k
      return false;
640
14.8k
641
14.8k
    Ext = getExtendTypeForNode(N.getOperand(0));
642
14.8k
    if (Ext == AArch64_AM::InvalidShiftExtend)
643
11.7k
      return false;
644
3.07k
645
3.07k
    Reg = N.getOperand(0).getOperand(0);
646
404k
  } else {
647
404k
    Ext = getExtendTypeForNode(N);
648
404k
    if (Ext == AArch64_AM::InvalidShiftExtend)
649
400k
      return false;
650
3.67k
651
3.67k
    Reg = N.getOperand(0);
652
3.67k
653
3.67k
    // Don't match if free 32-bit -> 64-bit zext can be used instead.
654
3.67k
    if (Ext == AArch64_AM::UXTW &&
655
3.67k
        
Reg->getValueType(0).getSizeInBits() == 32661
&&
isDef32(*Reg.getNode())648
)
656
415
      return false;
657
6.33k
  }
658
6.33k
659
6.33k
  // AArch64 mandates that the RHS of the operation must use the smallest
660
6.33k
  // register class that could contain the size being extended from.  Thus,
661
6.33k
  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
662
6.33k
  // there might not be an actual 32-bit value in the program.  We can
663
6.33k
  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
664
6.33k
  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
665
6.33k
  Reg = narrowIfNeeded(CurDAG, Reg);
666
6.33k
  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
667
6.33k
                                    MVT::i32);
668
6.33k
  return isWorthFolding(N);
669
6.33k
}
670
671
/// If there's a use of this ADDlow that's not itself a load/store then we'll
672
/// need to create a real ADD instruction from it anyway and there's no point in
673
/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
674
/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
675
/// leads to duplicated ADRP instructions.
676
52.5k
static bool isWorthFoldingADDlow(SDValue N) {
677
59.0k
  for (auto Use : N->uses()) {
678
59.0k
    if (Use->getOpcode() != ISD::LOAD && 
Use->getOpcode() != ISD::STORE15.6k
&&
679
59.0k
        
Use->getOpcode() != ISD::ATOMIC_LOAD2.26k
&&
680
59.0k
        
Use->getOpcode() != ISD::ATOMIC_STORE2.25k
)
681
2.25k
      return false;
682
56.8k
683
56.8k
    // ldar and stlr have much more restrictive addressing modes (just a
684
56.8k
    // register).
685
56.8k
    if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering()))
686
0
      return false;
687
56.8k
  }
688
52.5k
689
52.5k
  
return true50.3k
;
690
52.5k
}
691
692
/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
693
/// immediate" address.  The "Size" argument is the size in bytes of the memory
694
/// reference, which determines the scale.
695
bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
696
                                                        unsigned BW, unsigned Size,
697
                                                        SDValue &Base,
698
88
                                                        SDValue &OffImm) {
699
88
  SDLoc dl(N);
700
88
  const DataLayout &DL = CurDAG->getDataLayout();
701
88
  const TargetLowering *TLI = getTargetLowering();
702
88
  if (N.getOpcode() == ISD::FrameIndex) {
703
6
    int FI = cast<FrameIndexSDNode>(N)->getIndex();
704
6
    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
705
6
    OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
706
6
    return true;
707
6
  }
708
82
709
82
  // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
710
82
  // selected here doesn't support labels/immediates, only base+offset.
711
82
  if (CurDAG->isBaseWithConstantOffset(N)) {
712
56
    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
713
56
      if (IsSignedImm) {
714
48
        int64_t RHSC = RHS->getSExtValue();
715
48
        unsigned Scale = Log2_32(Size);
716
48
        int64_t Range = 0x1LL << (BW - 1);
717
48
718
48
        if ((RHSC & (Size - 1)) == 0 && 
RHSC >= -(Range << Scale)40
&&
719
48
            
RHSC < (Range << Scale)37
) {
720
30
          Base = N.getOperand(0);
721
30
          if (Base.getOpcode() == ISD::FrameIndex) {
722
6
            int FI = cast<FrameIndexSDNode>(Base)->getIndex();
723
6
            Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
724
6
          }
725
30
          OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
726
30
          return true;
727
30
        }
728
8
      } else {
729
8
        // unsigned Immediate
730
8
        uint64_t RHSC = RHS->getZExtValue();
731
8
        unsigned Scale = Log2_32(Size);
732
8
        uint64_t Range = 0x1ULL << BW;
733
8
734
8
        if ((RHSC & (Size - 1)) == 0 && 
RHSC < (Range << Scale)6
) {
735
4
          Base = N.getOperand(0);
736
4
          if (Base.getOpcode() == ISD::FrameIndex) {
737
2
            int FI = cast<FrameIndexSDNode>(Base)->getIndex();
738
2
            Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
739
2
          }
740
4
          OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
741
4
          return true;
742
4
        }
743
48
      }
744
56
    }
745
56
  }
746
48
  // Base only. The address will be materialized into a register before
747
48
  // the memory is accessed.
748
48
  //    add x0, Xbase, #offset
749
48
  //    stp x1, x2, [x0]
750
48
  Base = N;
751
48
  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
752
48
  return true;
753
48
}
754
755
/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
756
/// immediate" address.  The "Size" argument is the size in bytes of the memory
757
/// reference, which determines the scale.
758
bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
759
672k
                                              SDValue &Base, SDValue &OffImm) {
760
672k
  SDLoc dl(N);
761
672k
  const DataLayout &DL = CurDAG->getDataLayout();
762
672k
  const TargetLowering *TLI = getTargetLowering();
763
672k
  if (N.getOpcode() == ISD::FrameIndex) {
764
26.4k
    int FI = cast<FrameIndexSDNode>(N)->getIndex();
765
26.4k
    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
766
26.4k
    OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
767
26.4k
    return true;
768
26.4k
  }
769
645k
770
645k
  if (N.getOpcode() == AArch64ISD::ADDlow && 
isWorthFoldingADDlow(N)52.5k
) {
771
50.3k
    GlobalAddressSDNode *GAN =
772
50.3k
        dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
773
50.3k
    Base = N.getOperand(0);
774
50.3k
    OffImm = N.getOperand(1);
775
50.3k
    if (!GAN)
776
28.7k
      return true;
777
21.6k
778
21.6k
    if (GAN->getOffset() % Size == 0) {
779
21.6k
      const GlobalValue *GV = GAN->getGlobal();
780
21.6k
      unsigned Alignment = GV->getAlignment();
781
21.6k
      Type *Ty = GV->getValueType();
782
21.6k
      if (Alignment == 0 && 
Ty->isSized()962
)
783
961
        Alignment = DL.getABITypeAlignment(Ty);
784
21.6k
785
21.6k
      if (Alignment >= Size)
786
21.4k
        return true;
787
595k
    }
788
21.6k
  }
789
595k
790
595k
  if (CurDAG->isBaseWithConstantOffset(N)) {
791
500k
    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
792
500k
      int64_t RHSC = (int64_t)RHS->getZExtValue();
793
500k
      unsigned Scale = Log2_32(Size);
794
500k
      if ((RHSC & (Size - 1)) == 0 && 
RHSC >= 0490k
&&
RHSC < (0x1000 << Scale)470k
) {
795
470k
        Base = N.getOperand(0);
796
470k
        if (Base.getOpcode() == ISD::FrameIndex) {
797
86.2k
          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
798
86.2k
          Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
799
86.2k
        }
800
470k
        OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
801
470k
        return true;
802
470k
      }
803
124k
    }
804
500k
  }
805
124k
806
124k
  // Before falling back to our general case, check if the unscaled
807
124k
  // instructions can handle this. If so, that's preferable.
808
124k
  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
809
29.1k
    return false;
810
95.8k
811
95.8k
  // Base only. The address will be materialized into a register before
812
95.8k
  // the memory is accessed.
813
95.8k
  //    add x0, Xbase, #offset
814
95.8k
  //    ldr x0, [x0]
815
95.8k
  Base = N;
816
95.8k
  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
817
95.8k
  return true;
818
95.8k
}
819
820
/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
821
/// immediate" address.  This should only match when there is an offset that
822
/// is not valid for a scaled immediate addressing mode.  The "Size" argument
823
/// is the size in bytes of the memory reference, which is needed here to know
824
/// what is valid for a scaled immediate.
825
bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
826
                                                 SDValue &Base,
827
154k
                                                 SDValue &OffImm) {
828
154k
  if (!CurDAG->isBaseWithConstantOffset(N))
829
95.1k
    return false;
830
58.8k
  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
831
58.8k
    int64_t RHSC = RHS->getSExtValue();
832
58.8k
    // If the offset is valid as a scaled immediate, don't match here.
833
58.8k
    if ((RHSC & (Size - 1)) == 0 && 
RHSC >= 039.9k
&&
834
58.8k
        
RHSC < (0x1000 << Log2_32(Size))174
)
835
3
      return false;
836
58.8k
    if (RHSC >= -256 && 
RHSC < 25658.8k
) {
837
58.2k
      Base = N.getOperand(0);
838
58.2k
      if (Base.getOpcode() == ISD::FrameIndex) {
839
2.47k
        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
840
2.47k
        const TargetLowering *TLI = getTargetLowering();
841
2.47k
        Base = CurDAG->getTargetFrameIndex(
842
2.47k
            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
843
2.47k
      }
844
58.2k
      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
845
58.2k
      return true;
846
58.2k
    }
847
692
  }
848
692
  return false;
849
692
}
850
851
76
static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
852
76
  SDLoc dl(N);
853
76
  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
854
76
  SDValue ImpDef = SDValue(
855
76
      CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
856
76
  MachineSDNode *Node = CurDAG->getMachineNode(
857
76
      TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
858
76
  return SDValue(Node, 0);
859
76
}
860
861
/// Check if the given SHL node (\p N), can be used to form an
862
/// extended register for an addressing mode.
863
bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
864
                                            bool WantExtend, SDValue &Offset,
865
65.6k
                                            SDValue &SignExtend) {
866
65.6k
  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
867
65.6k
  ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
868
65.6k
  if (!CSD || 
(CSD->getZExtValue() & 0x7) != CSD->getZExtValue()65.6k
)
869
22
    return false;
870
65.5k
871
65.5k
  SDLoc dl(N);
872
65.5k
  if (WantExtend) {
873
34.5k
    AArch64_AM::ShiftExtendType Ext =
874
34.5k
        getExtendTypeForNode(N.getOperand(0), true);
875
34.5k
    if (Ext == AArch64_AM::InvalidShiftExtend)
876
29.9k
      return false;
877
4.52k
878
4.52k
    Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
879
4.52k
    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
880
4.52k
                                           MVT::i32);
881
31.0k
  } else {
882
31.0k
    Offset = N.getOperand(0);
883
31.0k
    SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
884
31.0k
  }
885
65.5k
886
65.5k
  unsigned LegalShiftVal = Log2_32(Size);
887
35.5k
  unsigned ShiftVal = CSD->getZExtValue();
888
35.5k
889
35.5k
  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
890
550
    return false;
891
35.0k
892
35.0k
  return isWorthFolding(N);
893
35.0k
}
894
895
bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
896
                                            SDValue &Base, SDValue &Offset,
897
                                            SDValue &SignExtend,
898
726k
                                            SDValue &DoShift) {
899
726k
  if (N.getOpcode() != ISD::ADD)
900
169k
    return false;
901
557k
  SDValue LHS = N.getOperand(0);
902
557k
  SDValue RHS = N.getOperand(1);
903
557k
  SDLoc dl(N);
904
557k
905
557k
  // We don't want to match immediate adds here, because they are better lowered
906
557k
  // to the register-immediate addressing modes.
907
557k
  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
908
503k
    return false;
909
53.3k
910
53.3k
  // Check if this particular node is reused in any non-memory related
911
53.3k
  // operation.  If yes, do not try to fold this node into the address
912
53.3k
  // computation, since the computation will be kept.
913
53.3k
  const SDNode *Node = N.getNode();
914
56.5k
  for (SDNode *UI : Node->uses()) {
915
56.5k
    if (!isa<MemSDNode>(*UI))
916
4.25k
      return false;
917
56.5k
  }
918
53.3k
919
53.3k
  // Remember if it is worth folding N when it produces extended register.
920
53.3k
  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
921
49.0k
922
49.0k
  // Try to match a shifted extend on the RHS.
923
49.0k
  if (IsExtendedRegisterWorthFolding && 
RHS.getOpcode() == ISD::SHL48.0k
&&
924
49.0k
      
SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)34.5k
) {
925
3.44k
    Base = LHS;
926
3.44k
    DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
927
3.44k
    return true;
928
3.44k
  }
929
45.6k
930
45.6k
  // Try to match a shifted extend on the LHS.
931
45.6k
  if (IsExtendedRegisterWorthFolding && 
LHS.getOpcode() == ISD::SHL44.5k
&&
932
45.6k
      
SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)14
) {
933
7
    Base = RHS;
934
7
    DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
935
7
    return true;
936
7
  }
937
45.6k
938
45.6k
  // There was no shift, whatever else we find.
939
45.6k
  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
940
45.6k
941
45.6k
  AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
942
45.6k
  // Try to match an unshifted extend on the LHS.
943
45.6k
  if (IsExtendedRegisterWorthFolding &&
944
45.6k
      (Ext = getExtendTypeForNode(LHS, true)) !=
945
44.5k
          AArch64_AM::InvalidShiftExtend) {
946
1
    Base = RHS;
947
1
    Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
948
1
    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
949
1
                                           MVT::i32);
950
1
    if (isWorthFolding(LHS))
951
1
      return true;
952
45.6k
  }
953
45.6k
954
45.6k
  // Try to match an unshifted extend on the RHS.
955
45.6k
  if (IsExtendedRegisterWorthFolding &&
956
45.6k
      (Ext = getExtendTypeForNode(RHS, true)) !=
957
44.5k
          AArch64_AM::InvalidShiftExtend) {
958
451
    Base = LHS;
959
451
    Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
960
451
    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
961
451
                                           MVT::i32);
962
451
    if (isWorthFolding(RHS))
963
371
      return true;
964
45.2k
  }
965
45.2k
966
45.2k
  return false;
967
45.2k
}
968
969
// Check if the given immediate is preferred by ADD. If an immediate can be
970
// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
971
// encoded by one MOVZ, return true.
972
59.3k
static bool isPreferredADD(int64_t ImmOff) {
973
59.3k
  // Constant in [0x0, 0xfff] can be encoded in ADD.
974
59.3k
  if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
975
29.0k
    return true;
976
30.3k
  // Check if it can be encoded in an "ADD LSL #12".
977
30.3k
  if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
978
27
    // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
979
27
    return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
980
27
           
(ImmOff & 0xffffffffffff0fffLL) != 0x0LL25
;
981
30.2k
  return false;
982
30.2k
}
983
984
bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
985
                                            SDValue &Base, SDValue &Offset,
986
                                            SDValue &SignExtend,
987
722k
                                            SDValue &DoShift) {
988
722k
  if (N.getOpcode() != ISD::ADD)
989
169k
    return false;
990
553k
  SDValue LHS = N.getOperand(0);
991
553k
  SDValue RHS = N.getOperand(1);
992
553k
  SDLoc DL(N);
993
553k
994
553k
  // Check if this particular node is reused in any non-memory related
995
553k
  // operation.  If yes, do not try to fold this node into the address
996
553k
  // computation, since the computation will be kept.
997
553k
  const SDNode *Node = N.getNode();
998
595k
  for (SDNode *UI : Node->uses()) {
999
595k
    if (!isa<MemSDNode>(*UI))
1000
6.39k
      return false;
1001
595k
  }
1002
553k
1003
553k
  // Watch out if RHS is a wide immediate, it can not be selected into
1004
553k
  // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1005
553k
  // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1006
553k
  // instructions like:
1007
553k
  //     MOV  X0, WideImmediate
1008
553k
  //     ADD  X1, BaseReg, X0
1009
553k
  //     LDR  X2, [X1, 0]
1010
553k
  // For such situation, using [BaseReg, XReg] addressing mode can save one
1011
553k
  // ADD/SUB:
1012
553k
  //     MOV  X0, WideImmediate
1013
553k
  //     LDR  X2, [BaseReg, X0]
1014
553k
  
if (547k
isa<ConstantSDNode>(RHS)547k
) {
1015
501k
    int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
1016
501k
    unsigned Scale = Log2_32(Size);
1017
501k
    // Skip the immediate can be selected by load/store addressing mode.
1018
501k
    // Also skip the immediate can be encoded by a single ADD (SUB is also
1019
501k
    // checked by using -ImmOff).
1020
501k
    if ((ImmOff % Size == 0 && 
ImmOff >= 0492k
&&
ImmOff < (0x1000 << Scale)472k
) ||
1021
501k
        
isPreferredADD(ImmOff)34.1k
||
isPreferredADD(-ImmOff)25.2k
)
1022
496k
      return false;
1023
5.08k
1024
5.08k
    SDValue Ops[] = { RHS };
1025
5.08k
    SDNode *MOVI =
1026
5.08k
        CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1027
5.08k
    SDValue MOVIV = SDValue(MOVI, 0);
1028
5.08k
    // This ADD of two X register will be selected into [Reg+Reg] mode.
1029
5.08k
    N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1030
5.08k
  }
1031
547k
1032
547k
  // Remember if it is worth folding N when it produces extended register.
1033
547k
  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1034
50.3k
1035
50.3k
  // Try to match a shifted extend on the RHS.
1036
50.3k
  if (IsExtendedRegisterWorthFolding && 
RHS.getOpcode() == ISD::SHL44.2k
&&
1037
50.3k
      
SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)31.0k
) {
1038
26.3k
    Base = LHS;
1039
26.3k
    DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1040
26.3k
    return true;
1041
26.3k
  }
1042
24.0k
1043
24.0k
  // Try to match a shifted extend on the LHS.
1044
24.0k
  if (IsExtendedRegisterWorthFolding && 
LHS.getOpcode() == ISD::SHL17.8k
&&
1045
24.0k
      
SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)5
) {
1046
5
    Base = RHS;
1047
5
    DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1048
5
    return true;
1049
5
  }
1050
24.0k
1051
24.0k
  // Match any non-shifted, non-extend, non-immediate add expression.
1052
24.0k
  Base = LHS;
1053
24.0k
  Offset = RHS;
1054
24.0k
  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1055
24.0k
  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1056
24.0k
  // Reg1 + Reg2 is free: no check needed.
1057
24.0k
  return true;
1058
24.0k
}
1059
1060
5.56k
SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1061
5.56k
  static const unsigned RegClassIDs[] = {
1062
5.56k
      AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1063
5.56k
  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1064
5.56k
                                     AArch64::dsub2, AArch64::dsub3};
1065
5.56k
1066
5.56k
  return createTuple(Regs, RegClassIDs, SubRegs);
1067
5.56k
}
1068
1069
8.04k
SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1070
8.04k
  static const unsigned RegClassIDs[] = {
1071
8.04k
      AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1072
8.04k
  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1073
8.04k
                                     AArch64::qsub2, AArch64::qsub3};
1074
8.04k
1075
8.04k
  return createTuple(Regs, RegClassIDs, SubRegs);
1076
8.04k
}
1077
1078
SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1079
                                         const unsigned RegClassIDs[],
1080
13.6k
                                         const unsigned SubRegs[]) {
1081
13.6k
  // There's no special register-class for a vector-list of 1 element: it's just
1082
13.6k
  // a vector.
1083
13.6k
  if (Regs.size() == 1)
1084
29
    return Regs[0];
1085
13.5k
1086
13.5k
  assert(Regs.size() >= 2 && Regs.size() <= 4);
1087
13.5k
1088
13.5k
  SDLoc DL(Regs[0]);
1089
13.5k
1090
13.5k
  SmallVector<SDValue, 4> Ops;
1091
13.5k
1092
13.5k
  // First operand of REG_SEQUENCE is the desired RegClass.
1093
13.5k
  Ops.push_back(
1094
13.5k
      CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1095
13.5k
1096
13.5k
  // Then we get pairs of source & subregister-position for the components.
1097
50.9k
  for (unsigned i = 0; i < Regs.size(); 
++i37.3k
) {
1098
37.3k
    Ops.push_back(Regs[i]);
1099
37.3k
    Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1100
37.3k
  }
1101
13.5k
1102
13.5k
  SDNode *N =
1103
13.5k
      CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1104
13.5k
  return SDValue(N, 0);
1105
13.5k
}
1106
1107
void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1108
120
                                      bool isExt) {
1109
120
  SDLoc dl(N);
1110
120
  EVT VT = N->getValueType(0);
1111
120
1112
120
  unsigned ExtOff = isExt;
1113
120
1114
120
  // Form a REG_SEQUENCE to force register allocation.
1115
120
  unsigned Vec0Off = ExtOff + 1;
1116
120
  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1117
120
                               N->op_begin() + Vec0Off + NumVecs);
1118
120
  SDValue RegSeq = createQTuple(Regs);
1119
120
1120
120
  SmallVector<SDValue, 6> Ops;
1121
120
  if (isExt)
1122
48
    Ops.push_back(N->getOperand(1));
1123
120
  Ops.push_back(RegSeq);
1124
120
  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1125
120
  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1126
120
}
1127
1128
374k
bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1129
374k
  LoadSDNode *LD = cast<LoadSDNode>(N);
1130
374k
  if (LD->isUnindexed())
1131
364k
    return false;
1132
10.6k
  EVT VT = LD->getMemoryVT();
1133
10.6k
  EVT DstVT = N->getValueType(0);
1134
10.6k
  ISD::MemIndexedMode AM = LD->getAddressingMode();
1135
10.6k
  bool IsPre = AM == ISD::PRE_INC || 
AM == ISD::PRE_DEC5.18k
;
1136
10.6k
1137
10.6k
  // We're not doing validity checking here. That was done when checking
1138
10.6k
  // if we should mark the load as indexed or not. We're just selecting
1139
10.6k
  // the right instruction.
1140
10.6k
  unsigned Opcode = 0;
1141
10.6k
1142
10.6k
  ISD::LoadExtType ExtType = LD->getExtensionType();
1143
10.6k
  bool InsertTo64 = false;
1144
10.6k
  if (VT == MVT::i64)
1145
5.12k
    Opcode = IsPre ? 
AArch64::LDRXpre4.75k
:
AArch64::LDRXpost369
;
1146
5.55k
  else if (VT == MVT::i32) {
1147
3.71k
    if (ExtType == ISD::NON_EXTLOAD)
1148
3.50k
      Opcode = IsPre ? 
AArch64::LDRWpre113
:
AArch64::LDRWpost3.39k
;
1149
212
    else if (ExtType == ISD::SEXTLOAD)
1150
194
      Opcode = IsPre ? 
AArch64::LDRSWpre158
:
AArch64::LDRSWpost36
;
1151
18
    else {
1152
18
      Opcode = IsPre ? 
AArch64::LDRWpre1
:
AArch64::LDRWpost17
;
1153
18
      InsertTo64 = true;
1154
18
      // The result of the load is only i32. It's the subreg_to_reg that makes
1155
18
      // it into an i64.
1156
18
      DstVT = MVT::i32;
1157
18
    }
1158
3.71k
  } else 
if (1.84k
VT == MVT::i161.84k
) {
1159
348
    if (ExtType == ISD::SEXTLOAD) {
1160
26
      if (DstVT == MVT::i64)
1161
6
        Opcode = IsPre ? 
AArch64::LDRSHXpre1
:
AArch64::LDRSHXpost5
;
1162
20
      else
1163
20
        Opcode = IsPre ? 
AArch64::LDRSHWpre2
:
AArch64::LDRSHWpost18
;
1164
322
    } else {
1165
322
      Opcode = IsPre ? 
AArch64::LDRHHpre153
:
AArch64::LDRHHpost169
;
1166
322
      InsertTo64 = DstVT == MVT::i64;
1167
322
      // The result of the load is only i32. It's the subreg_to_reg that makes
1168
322
      // it into an i64.
1169
322
      DstVT = MVT::i32;
1170
322
    }
1171
1.49k
  } else if (VT == MVT::i8) {
1172
869
    if (ExtType == ISD::SEXTLOAD) {
1173
67
      if (DstVT == MVT::i64)
1174
8
        Opcode = IsPre ? 
AArch64::LDRSBXpre2
:
AArch64::LDRSBXpost6
;
1175
59
      else
1176
59
        Opcode = IsPre ? 
AArch64::LDRSBWpre38
:
AArch64::LDRSBWpost21
;
1177
802
    } else {
1178
802
      Opcode = IsPre ? 
AArch64::LDRBBpre210
:
AArch64::LDRBBpost592
;
1179
802
      InsertTo64 = DstVT == MVT::i64;
1180
802
      // The result of the load is only i32. It's the subreg_to_reg that makes
1181
802
      // it into an i64.
1182
802
      DstVT = MVT::i32;
1183
802
    }
1184
869
  } else 
if (624
VT == MVT::f16624
) {
1185
1
    Opcode = IsPre ? AArch64::LDRHpre : 
AArch64::LDRHpost0
;
1186
623
  } else if (VT == MVT::f32) {
1187
138
    Opcode = IsPre ? 
AArch64::LDRSpre11
:
AArch64::LDRSpost127
;
1188
485
  } else if (VT == MVT::f64 || 
VT.is64BitVector()407
) {
1189
199
    Opcode = IsPre ? 
AArch64::LDRDpre39
:
AArch64::LDRDpost160
;
1190
286
  } else if (VT.is128BitVector()) {
1191
286
    Opcode = IsPre ? 
AArch64::LDRQpre11
:
AArch64::LDRQpost275
;
1192
286
  } else
1193
0
    return false;
1194
10.6k
  SDValue Chain = LD->getChain();
1195
10.6k
  SDValue Base = LD->getBasePtr();
1196
10.6k
  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1197
10.6k
  int OffsetVal = (int)OffsetOp->getZExtValue();
1198
10.6k
  SDLoc dl(N);
1199
10.6k
  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1200
10.6k
  SDValue Ops[] = { Base, Offset, Chain };
1201
10.6k
  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1202
10.6k
                                       MVT::Other, Ops);
1203
10.6k
  // Either way, we're replacing the node, so tell the caller that.
1204
10.6k
  SDValue LoadedVal = SDValue(Res, 1);
1205
10.6k
  if (InsertTo64) {
1206
100
    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1207
100
    LoadedVal =
1208
100
        SDValue(CurDAG->getMachineNode(
1209
100
                    AArch64::SUBREG_TO_REG, dl, MVT::i64,
1210
100
                    CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1211
100
                    SubReg),
1212
100
                0);
1213
100
  }
1214
10.6k
1215
10.6k
  ReplaceUses(SDValue(N, 0), LoadedVal);
1216
10.6k
  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1217
10.6k
  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1218
10.6k
  CurDAG->RemoveDeadNode(N);
1219
10.6k
  return true;
1220
10.6k
}
1221
1222
void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1223
640
                                     unsigned SubRegIdx) {
1224
640
  SDLoc dl(N);
1225
640
  EVT VT = N->getValueType(0);
1226
640
  SDValue Chain = N->getOperand(0);
1227
640
1228
640
  SDValue Ops[] = {N->getOperand(2), // Mem operand;
1229
640
                   Chain};
1230
640
1231
640
  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1232
640
1233
640
  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1234
640
  SDValue SuperReg = SDValue(Ld, 0);
1235
2.50k
  for (unsigned i = 0; i < NumVecs; 
++i1.86k
)
1236
1.86k
    ReplaceUses(SDValue(N, i),
1237
1.86k
        CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1238
640
1239
640
  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1240
640
1241
640
  // Transfer memoperands.
1242
640
  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1243
640
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1244
640
1245
640
  CurDAG->RemoveDeadNode(N);
1246
640
}
1247
1248
void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1249
354
                                         unsigned Opc, unsigned SubRegIdx) {
1250
354
  SDLoc dl(N);
1251
354
  EVT VT = N->getValueType(0);
1252
354
  SDValue Chain = N->getOperand(0);
1253
354
1254
354
  SDValue Ops[] = {N->getOperand(1), // Mem operand
1255
354
                   N->getOperand(2), // Incremental
1256
354
                   Chain};
1257
354
1258
354
  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1259
354
                        MVT::Untyped, MVT::Other};
1260
354
1261
354
  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1262
354
1263
354
  // Update uses of write back register
1264
354
  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1265
354
1266
354
  // Update uses of vector list
1267
354
  SDValue SuperReg = SDValue(Ld, 1);
1268
354
  if (NumVecs == 1)
1269
33
    ReplaceUses(SDValue(N, 0), SuperReg);
1270
321
  else
1271
1.26k
    
for (unsigned i = 0; 321
i < NumVecs;
++i945
)
1272
945
      ReplaceUses(SDValue(N, i),
1273
945
          CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1274
354
1275
354
  // Update the chain
1276
354
  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1277
354
  CurDAG->RemoveDeadNode(N);
1278
354
}
1279
1280
void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1281
12.7k
                                      unsigned Opc) {
1282
12.7k
  SDLoc dl(N);
1283
12.7k
  EVT VT = N->getOperand(2)->getValueType(0);
1284
12.7k
1285
12.7k
  // Form a REG_SEQUENCE to force register allocation.
1286
12.7k
  bool Is128Bit = VT.getSizeInBits() == 128;
1287
12.7k
  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1288
12.7k
  SDValue RegSeq = Is128Bit ? 
createQTuple(Regs)7.29k
:
createDTuple(Regs)5.44k
;
1289
12.7k
1290
12.7k
  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1291
12.7k
  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1292
12.7k
1293
12.7k
  // Transfer memoperands.
1294
12.7k
  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1295
12.7k
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1296
12.7k
1297
12.7k
  ReplaceNode(N, St);
1298
12.7k
}
1299
1300
void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1301
352
                                          unsigned Opc) {
1302
352
  SDLoc dl(N);
1303
352
  EVT VT = N->getOperand(2)->getValueType(0);
1304
352
  const EVT ResTys[] = {MVT::i64,    // Type of the write back register
1305
352
                        MVT::Other}; // Type for the Chain
1306
352
1307
352
  // Form a REG_SEQUENCE to force register allocation.
1308
352
  bool Is128Bit = VT.getSizeInBits() == 128;
1309
352
  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1310
352
  SDValue RegSeq = Is128Bit ? 
createQTuple(Regs)231
:
createDTuple(Regs)121
;
1311
352
1312
352
  SDValue Ops[] = {RegSeq,
1313
352
                   N->getOperand(NumVecs + 1), // base register
1314
352
                   N->getOperand(NumVecs + 2), // Incremental
1315
352
                   N->getOperand(0)};          // Chain
1316
352
  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1317
352
1318
352
  ReplaceNode(N, St);
1319
352
}
1320
1321
namespace {
1322
/// WidenVector - Given a value in the V64 register class, produce the
1323
/// equivalent value in the V128 register class.
1324
class WidenVector {
1325
  SelectionDAG &DAG;
1326
1327
public:
1328
171
  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1329
1330
500
  SDValue operator()(SDValue V64Reg) {
1331
500
    EVT VT = V64Reg.getValueType();
1332
500
    unsigned NarrowSize = VT.getVectorNumElements();
1333
500
    MVT EltTy = VT.getVectorElementType().getSimpleVT();
1334
500
    MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1335
500
    SDLoc DL(V64Reg);
1336
500
1337
500
    SDValue Undef =
1338
500
        SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1339
500
    return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1340
500
  }
1341
};
1342
} // namespace
1343
1344
/// NarrowVector - Given a value in the V128 register class, produce the
1345
/// equivalent value in the V64 register class.
1346
255
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1347
255
  EVT VT = V128Reg.getValueType();
1348
255
  unsigned WideSize = VT.getVectorNumElements();
1349
255
  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1350
255
  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1351
255
1352
255
  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1353
255
                                    V128Reg);
1354
255
}
1355
1356
void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1357
112
                                         unsigned Opc) {
1358
112
  SDLoc dl(N);
1359
112
  EVT VT = N->getValueType(0);
1360
112
  bool Narrow = VT.getSizeInBits() == 64;
1361
112
1362
112
  // Form a REG_SEQUENCE to force register allocation.
1363
112
  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1364
112
1365
112
  if (Narrow)
1366
52
    transform(Regs, Regs.begin(),
1367
52
                   WidenVector(*CurDAG));
1368
112
1369
112
  SDValue RegSeq = createQTuple(Regs);
1370
112
1371
112
  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1372
112
1373
112
  unsigned LaneNo =
1374
112
      cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1375
112
1376
112
  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1377
112
                   N->getOperand(NumVecs + 3), N->getOperand(0)};
1378
112
  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1379
112
  SDValue SuperReg = SDValue(Ld, 0);
1380
112
1381
112
  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1382
112
  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1383
112
                                    AArch64::qsub2, AArch64::qsub3 };
1384
438
  for (unsigned i = 0; i < NumVecs; 
++i326
) {
1385
326
    SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1386
326
    if (Narrow)
1387
146
      NV = NarrowVector(NV, *CurDAG);
1388
326
    ReplaceUses(SDValue(N, i), NV);
1389
326
  }
1390
112
1391
112
  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1392
112
  CurDAG->RemoveDeadNode(N);
1393
112
}
1394
1395
void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1396
101
                                             unsigned Opc) {
1397
101
  SDLoc dl(N);
1398
101
  EVT VT = N->getValueType(0);
1399
101
  bool Narrow = VT.getSizeInBits() == 64;
1400
101
1401
101
  // Form a REG_SEQUENCE to force register allocation.
1402
101
  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1403
101
1404
101
  if (Narrow)
1405
37
    transform(Regs, Regs.begin(),
1406
37
                   WidenVector(*CurDAG));
1407
101
1408
101
  SDValue RegSeq = createQTuple(Regs);
1409
101
1410
101
  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1411
101
                        RegSeq->getValueType(0), MVT::Other};
1412
101
1413
101
  unsigned LaneNo =
1414
101
      cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1415
101
1416
101
  SDValue Ops[] = {RegSeq,
1417
101
                   CurDAG->getTargetConstant(LaneNo, dl,
1418
101
                                             MVT::i64),         // Lane Number
1419
101
                   N->getOperand(NumVecs + 2),                  // Base register
1420
101
                   N->getOperand(NumVecs + 3),                  // Incremental
1421
101
                   N->getOperand(0)};
1422
101
  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1423
101
1424
101
  // Update uses of the write back register
1425
101
  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1426
101
1427
101
  // Update uses of the vector list
1428
101
  SDValue SuperReg = SDValue(Ld, 1);
1429
101
  if (NumVecs == 1) {
1430
29
    ReplaceUses(SDValue(N, 0),
1431
29
                Narrow ? 
NarrowVector(SuperReg, *CurDAG)1
:
SuperReg28
);
1432
72
  } else {
1433
72
    EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1434
72
    static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1435
72
                                      AArch64::qsub2, AArch64::qsub3 };
1436
288
    for (unsigned i = 0; i < NumVecs; 
++i216
) {
1437
216
      SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1438
216
                                                  SuperReg);
1439
216
      if (Narrow)
1440
108
        NV = NarrowVector(NV, *CurDAG);
1441
216
      ReplaceUses(SDValue(N, i), NV);
1442
216
    }
1443
72
  }
1444
101
1445
101
  // Update the Chain
1446
101
  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1447
101
  CurDAG->RemoveDeadNode(N);
1448
101
}
1449
1450
void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1451
116
                                          unsigned Opc) {
1452
116
  SDLoc dl(N);
1453
116
  EVT VT = N->getOperand(2)->getValueType(0);
1454
116
  bool Narrow = VT.getSizeInBits() == 64;
1455
116
1456
116
  // Form a REG_SEQUENCE to force register allocation.
1457
116
  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1458
116
1459
116
  if (Narrow)
1460
46
    transform(Regs, Regs.begin(),
1461
46
                   WidenVector(*CurDAG));
1462
116
1463
116
  SDValue RegSeq = createQTuple(Regs);
1464
116
1465
116
  unsigned LaneNo =
1466
116
      cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1467
116
1468
116
  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1469
116
                   N->getOperand(NumVecs + 3), N->getOperand(0)};
1470
116
  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1471
116
1472
116
  // Transfer memoperands.
1473
116
  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1474
116
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1475
116
1476
116
  ReplaceNode(N, St);
1477
116
}
1478
1479
void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1480
72
                                              unsigned Opc) {
1481
72
  SDLoc dl(N);
1482
72
  EVT VT = N->getOperand(2)->getValueType(0);
1483
72
  bool Narrow = VT.getSizeInBits() == 64;
1484
72
1485
72
  // Form a REG_SEQUENCE to force register allocation.
1486
72
  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1487
72
1488
72
  if (Narrow)
1489
36
    transform(Regs, Regs.begin(),
1490
36
                   WidenVector(*CurDAG));
1491
72
1492
72
  SDValue RegSeq = createQTuple(Regs);
1493
72
1494
72
  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1495
72
                        MVT::Other};
1496
72
1497
72
  unsigned LaneNo =
1498
72
      cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1499
72
1500
72
  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1501
72
                   N->getOperand(NumVecs + 2), // Base Register
1502
72
                   N->getOperand(NumVecs + 3), // Incremental
1503
72
                   N->getOperand(0)};
1504
72
  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1505
72
1506
72
  // Transfer memoperands.
1507
72
  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1508
72
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1509
72
1510
72
  ReplaceNode(N, St);
1511
72
}
1512
1513
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
1514
                                       unsigned &Opc, SDValue &Opd0,
1515
                                       unsigned &LSB, unsigned &MSB,
1516
                                       unsigned NumberOfIgnoredLowBits,
1517
36.0k
                                       bool BiggerPattern) {
1518
36.0k
  assert(N->getOpcode() == ISD::AND &&
1519
36.0k
         "N must be a AND operation to call this function");
1520
36.0k
1521
36.0k
  EVT VT = N->getValueType(0);
1522
36.0k
1523
36.0k
  // Here we can test the type of VT and return false when the type does not
1524
36.0k
  // match, but since it is done prior to that call in the current context
1525
36.0k
  // we turned that into an assert to avoid redundant code.
1526
36.0k
  assert((VT == MVT::i32 || VT == MVT::i64) &&
1527
36.0k
         "Type checking must have been done before calling this function");
1528
36.0k
1529
36.0k
  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1530
36.0k
  // changed the AND node to a 32-bit mask operation. We'll have to
1531
36.0k
  // undo that as part of the transform here if we want to catch all
1532
36.0k
  // the opportunities.
1533
36.0k
  // Currently the NumberOfIgnoredLowBits argument helps to recover
1534
36.0k
  // form these situations when matching bigger pattern (bitfield insert).
1535
36.0k
1536
36.0k
  // For unsigned extracts, check for a shift right and mask
1537
36.0k
  uint64_t AndImm = 0;
1538
36.0k
  if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
1539
5.45k
    return false;
1540
30.6k
1541
30.6k
  const SDNode *Op0 = N->getOperand(0).getNode();
1542
30.6k
1543
30.6k
  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1544
30.6k
  // simplified. Try to undo that
1545
30.6k
  AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
1546
30.6k
1547
30.6k
  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1548
30.6k
  if (AndImm & (AndImm + 1))
1549
13.6k
    return false;
1550
17.0k
1551
17.0k
  bool ClampMSB = false;
1552
17.0k
  uint64_t SrlImm = 0;
1553
17.0k
  // Handle the SRL + ANY_EXTEND case.
1554
17.0k
  if (VT == MVT::i64 && 
Op0->getOpcode() == ISD::ANY_EXTEND7.75k
&&
1555
17.0k
      
isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)1.69k
) {
1556
24
    // Extend the incoming operand of the SRL to 64-bit.
1557
24
    Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1558
24
    // Make sure to clamp the MSB so that we preserve the semantics of the
1559
24
    // original operations.
1560
24
    ClampMSB = true;
1561
17.0k
  } else if (VT == MVT::i32 && 
Op0->getOpcode() == ISD::TRUNCATE9.27k
&&
1562
17.0k
             isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
1563
929
                                   SrlImm)) {
1564
359
    // If the shift result was truncated, we can still combine them.
1565
359
    Opd0 = Op0->getOperand(0).getOperand(0);
1566
359
1567
359
    // Use the type of SRL node.
1568
359
    VT = Opd0->getValueType(0);
1569
16.6k
  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
1570
3.19k
    Opd0 = Op0->getOperand(0);
1571
13.4k
  } else if (BiggerPattern) {
1572
290
    // Let's pretend a 0 shift right has been performed.
1573
290
    // The resulting code will be at least as good as the original one
1574
290
    // plus it may expose more opportunities for bitfield insert pattern.
1575
290
    // FIXME: Currently we limit this to the bigger pattern, because
1576
290
    // some optimizations expect AND and not UBFM.
1577
290
    Opd0 = N->getOperand(0);
1578
290
  } else
1579
13.1k
    return false;
1580
3.86k
1581
3.86k
  // Bail out on large immediates. This happens when no proper
1582
3.86k
  // combining/constant folding was performed.
1583
3.86k
  if (!BiggerPattern && 
(3.45k
SrlImm <= 03.45k
||
SrlImm >= VT.getSizeInBits()3.45k
)) {
1584
0
    LLVM_DEBUG(
1585
0
        (dbgs() << N
1586
0
                << ": Found large shift immediate, this should not happen\n"));
1587
0
    return false;
1588
0
  }
1589
3.86k
1590
3.86k
  LSB = SrlImm;
1591
3.86k
  MSB = SrlImm + (VT == MVT::i32 ? 
countTrailingOnes<uint32_t>(AndImm)1.92k
1592
3.86k
                                 : 
countTrailingOnes<uint64_t>(AndImm)1.93k
) -
1593
3.86k
        1;
1594
3.86k
  if (ClampMSB)
1595
24
    // Since we're moving the extend before the right shift operation, we need
1596
24
    // to clamp the MSB to make sure we don't shift in undefined bits instead of
1597
24
    // the zeros which would get shifted in with the original right shift
1598
24
    // operation.
1599
24
    MSB = MSB > 31 ? 
310
: MSB;
1600
3.86k
1601
3.86k
  Opc = VT == MVT::i32 ? 
AArch64::UBFMWri1.92k
:
AArch64::UBFMXri1.93k
;
1602
3.86k
  return true;
1603
3.86k
}
1604
1605
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
1606
                                             SDValue &Opd0, unsigned &Immr,
1607
1.65k
                                             unsigned &Imms) {
1608
1.65k
  assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
1609
1.65k
1610
1.65k
  EVT VT = N->getValueType(0);
1611
1.65k
  unsigned BitWidth = VT.getSizeInBits();
1612
1.65k
  assert((VT == MVT::i32 || VT == MVT::i64) &&
1613
1.65k
         "Type checking must have been done before calling this function");
1614
1.65k
1615
1.65k
  SDValue Op = N->getOperand(0);
1616
1.65k
  if (Op->getOpcode() == ISD::TRUNCATE) {
1617
359
    Op = Op->getOperand(0);
1618
359
    VT = Op->getValueType(0);
1619
359
    BitWidth = VT.getSizeInBits();
1620
359
  }
1621
1.65k
1622
1.65k
  uint64_t ShiftImm;
1623
1.65k
  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
1624
1.65k
      
!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)1.64k
)
1625
1.64k
    return false;
1626
11
1627
11
  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1628
11
  if (ShiftImm + Width > BitWidth)
1629
0
    return false;
1630
11
1631
11
  Opc = (VT == MVT::i32) ? 
AArch64::SBFMWri4
:
AArch64::SBFMXri7
;
1632
11
  Opd0 = Op.getOperand(0);
1633
11
  Immr = ShiftImm;
1634
11
  Imms = ShiftImm + Width - 1;
1635
11
  return true;
1636
11
}
1637
1638
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1639
                                          SDValue &Opd0, unsigned &LSB,
1640
11.7k
                                          unsigned &MSB) {
1641
11.7k
  // We are looking for the following pattern which basically extracts several
1642
11.7k
  // continuous bits from the source value and places it from the LSB of the
1643
11.7k
  // destination value, all other bits of the destination value or set to zero:
1644
11.7k
  //
1645
11.7k
  // Value2 = AND Value, MaskImm
1646
11.7k
  // SRL Value2, ShiftImm
1647
11.7k
  //
1648
11.7k
  // with MaskImm >> ShiftImm to search for the bit width.
1649
11.7k
  //
1650
11.7k
  // This gets selected into a single UBFM:
1651
11.7k
  //
1652
11.7k
  // UBFM Value, ShiftImm, BitWide + SrlImm -1
1653
11.7k
  //
1654
11.7k
1655
11.7k
  if (N->getOpcode() != ISD::SRL)
1656
3.15k
    return false;
1657
8.58k
1658
8.58k
  uint64_t AndMask = 0;
1659
8.58k
  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
1660
7.99k
    return false;
1661
586
1662
586
  Opd0 = N->getOperand(0).getOperand(0);
1663
586
1664
586
  uint64_t SrlImm = 0;
1665
586
  if (!isIntImmediate(N->getOperand(1), SrlImm))
1666
20
    return false;
1667
566
1668
566
  // Check whether we really have several bits extract here.
1669
566
  unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
1670
566
  if (BitWide && isMask_64(AndMask >> SrlImm)) {
1671
541
    if (N->getValueType(0) == MVT::i32)
1672
86
      Opc = AArch64::UBFMWri;
1673
455
    else
1674
455
      Opc = AArch64::UBFMXri;
1675
541
1676
541
    LSB = SrlImm;
1677
541
    MSB = BitWide + SrlImm - 1;
1678
541
    return true;
1679
541
  }
1680
25
1681
25
  return false;
1682
25
}
1683
1684
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1685
                                       unsigned &Immr, unsigned &Imms,
1686
11.7k
                                       bool BiggerPattern) {
1687
11.7k
  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1688
11.7k
         "N must be a SHR/SRA operation to call this function");
1689
11.7k
1690
11.7k
  EVT VT = N->getValueType(0);
1691
11.7k
1692
11.7k
  // Here we can test the type of VT and return false when the type does not
1693
11.7k
  // match, but since it is done prior to that call in the current context
1694
11.7k
  // we turned that into an assert to avoid redundant code.
1695
11.7k
  assert((VT == MVT::i32 || VT == MVT::i64) &&
1696
11.7k
         "Type checking must have been done before calling this function");
1697
11.7k
1698
11.7k
  // Check for AND + SRL doing several bits extract.
1699
11.7k
  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1700
541
    return true;
1701
11.1k
1702
11.1k
  // We're looking for a shift of a shift.
1703
11.1k
  uint64_t ShlImm = 0;
1704
11.1k
  uint64_t TruncBits = 0;
1705
11.1k
  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
1706
79
    Opd0 = N->getOperand(0).getOperand(0);
1707
11.1k
  } else if (VT == MVT::i32 && 
N->getOpcode() == ISD::SRL4.55k
&&
1708
11.1k
             
N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE2.61k
) {
1709
25
    // We are looking for a shift of truncate. Truncate from i64 to i32 could
1710
25
    // be considered as setting high 32 bits as zero. Our strategy here is to
1711
25
    // always generate 64bit UBFM. This consistency will help the CSE pass
1712
25
    // later find more redundancy.
1713
25
    Opd0 = N->getOperand(0).getOperand(0);
1714
25
    TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1715
25
    VT = Opd0.getValueType();
1716
25
    assert(VT == MVT::i64 && "the promoted type should be i64");
1717
11.0k
  } else if (BiggerPattern) {
1718
599
    // Let's pretend a 0 shift left has been performed.
1719
599
    // FIXME: Currently we limit this to the bigger pattern case,
1720
599
    // because some optimizations expect AND and not UBFM
1721
599
    Opd0 = N->getOperand(0);
1722
599
  } else
1723
10.4k
    return false;
1724
703
1725
703
  // Missing combines/constant folding may have left us with strange
1726
703
  // constants.
1727
703
  if (ShlImm >= VT.getSizeInBits()) {
1728
0
    LLVM_DEBUG(
1729
0
        (dbgs() << N
1730
0
                << ": Found large shift immediate, this should not happen\n"));
1731
0
    return false;
1732
0
  }
1733
703
1734
703
  uint64_t SrlImm = 0;
1735
703
  if (!isIntImmediate(N->getOperand(1), SrlImm))
1736
149
    return false;
1737
554
1738
554
  assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
1739
554
         "bad amount in shift node!");
1740
554
  int immr = SrlImm - ShlImm;
1741
554
  Immr = immr < 0 ? 
immr + VT.getSizeInBits()29
:
immr525
;
1742
554
  Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
1743
554
  // SRA requires a signed extraction
1744
554
  if (VT == MVT::i32)
1745
433
    Opc = N->getOpcode() == ISD::SRA ? 
AArch64::SBFMWri210
:
AArch64::UBFMWri223
;
1746
121
  else
1747
121
    Opc = N->getOpcode() == ISD::SRA ? 
AArch64::SBFMXri40
:
AArch64::UBFMXri81
;
1748
554
  return true;
1749
554
}
1750
1751
24.4k
bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
1752
24.4k
  assert(N->getOpcode() == ISD::SIGN_EXTEND);
1753
24.4k
1754
24.4k
  EVT VT = N->getValueType(0);
1755
24.4k
  EVT NarrowVT = N->getOperand(0)->getValueType(0);
1756
24.4k
  if (VT != MVT::i64 || 
NarrowVT != MVT::i327.53k
)
1757
16.9k
    return false;
1758
7.53k
1759
7.53k
  uint64_t ShiftImm;
1760
7.53k
  SDValue Op = N->getOperand(0);
1761
7.53k
  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1762
7.48k
    return false;
1763
52
1764
52
  SDLoc dl(N);
1765
52
  // Extend the incoming operand of the shift to 64-bits.
1766
52
  SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
1767
52
  unsigned Immr = ShiftImm;
1768
52
  unsigned Imms = NarrowVT.getSizeInBits() - 1;
1769
52
  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1770
52
                   CurDAG->getTargetConstant(Imms, dl, VT)};
1771
52
  CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
1772
52
  return true;
1773
52
}
1774
1775
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1776
                                SDValue &Opd0, unsigned &Immr, unsigned &Imms,
1777
                                unsigned NumberOfIgnoredLowBits = 0,
1778
120k
                                bool BiggerPattern = false) {
1779
120k
  if (N->getValueType(0) != MVT::i32 && 
N->getValueType(0) != MVT::i6455.5k
)
1780
7.83k
    return false;
1781
112k
1782
112k
  switch (N->getOpcode()) {
1783
112k
  default:
1784
62.9k
    if (!N->isMachineOpcode())
1785
62.6k
      return false;
1786
256
    break;
1787
36.0k
  case ISD::AND:
1788
36.0k
    return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
1789
36.0k
                                      NumberOfIgnoredLowBits, BiggerPattern);
1790
11.7k
  case ISD::SRL:
1791
11.7k
  case ISD::SRA:
1792
11.7k
    return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
1793
11.7k
1794
11.7k
  case ISD::SIGN_EXTEND_INREG:
1795
1.65k
    return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
1796
256
  }
1797
256
1798
256
  unsigned NOpc = N->getMachineOpcode();
1799
256
  switch (NOpc) {
1800
256
  default:
1801
256
    return false;
1802
256
  case AArch64::SBFMWri:
1803
0
  case AArch64::UBFMWri:
1804
0
  case AArch64::SBFMXri:
1805
0
  case AArch64::UBFMXri:
1806
0
    Opc = NOpc;
1807
0
    Opd0 = N->getOperand(0);
1808
0
    Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1809
0
    Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1810
0
    return true;
1811
0
  }
1812
0
  // Unreachable
1813
0
  return false;
1814
0
}
1815
1816
52.2k
bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
1817
52.2k
  unsigned Opc, Immr, Imms;
1818
52.2k
  SDValue Opd0;
1819
52.2k
  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
1820
48.3k
    return false;
1821
3.84k
1822
3.84k
  EVT VT = N->getValueType(0);
1823
3.84k
  SDLoc dl(N);
1824
3.84k
1825
3.84k
  // If the bit extract operation is 64bit but the original type is 32bit, we
1826
3.84k
  // need to add one EXTRACT_SUBREG.
1827
3.84k
  if ((Opc == AArch64::SBFMXri || 
Opc == AArch64::UBFMXri3.80k
) &&
VT == MVT::i322.17k
) {
1828
184
    SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
1829
184
                       CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
1830
184
1831
184
    SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
1832
184
    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1833
184
    ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
1834
184
                                          MVT::i32, SDValue(BFM, 0), SubReg));
1835
184
    return true;
1836
184
  }
1837
3.66k
1838
3.66k
  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1839
3.66k
                   CurDAG->getTargetConstant(Imms, dl, VT)};
1840
3.66k
  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1841
3.66k
  return true;
1842
3.66k
}
1843
1844
/// Does DstMask form a complementary pair with the mask provided by
1845
/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
1846
/// this asks whether DstMask zeroes precisely those bits that will be set by
1847
/// the other half.
1848
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
1849
352
                              unsigned NumberOfIgnoredHighBits, EVT VT) {
1850
352
  assert((VT == MVT::i32 || VT == MVT::i64) &&
1851
352
         "i32 or i64 mask type expected!");
1852
352
  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
1853
352
1854
352
  APInt SignificantDstMask = APInt(BitWidth, DstMask);
1855
352
  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
1856
352
1857
352
  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
1858
352
         (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
1859
352
}
1860
1861
// Look for bits that will be useful for later uses.
1862
// A bit is consider useless as soon as it is dropped and never used
1863
// before it as been dropped.
1864
// E.g., looking for useful bit of x
1865
// 1. y = x & 0x7
1866
// 2. z = y >> 2
1867
// After #1, x useful bits are 0x7, then the useful bits of x, live through
1868
// y.
1869
// After #2, the useful bits of x are 0x4.
1870
// However, if x is used on an unpredicatable instruction, then all its bits
1871
// are useful.
1872
// E.g.
1873
// 1. y = x & 0x7
1874
// 2. z = y >> 2
1875
// 3. str x, [@x]
1876
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1877
1878
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
1879
238
                                              unsigned Depth) {
1880
238
  uint64_t Imm =
1881
238
      cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1882
238
  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1883
238
  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1884
238
  getUsefulBits(Op, UsefulBits, Depth + 1);
1885
238
}
1886
1887
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1888
                                             uint64_t Imm, uint64_t MSB,
1889
170
                                             unsigned Depth) {
1890
170
  // inherit the bitwidth value
1891
170
  APInt OpUsefulBits(UsefulBits);
1892
170
  OpUsefulBits = 1;
1893
170
1894
170
  if (MSB >= Imm) {
1895
74
    OpUsefulBits <<= MSB - Imm + 1;
1896
74
    --OpUsefulBits;
1897
74
    // The interesting part will be in the lower part of the result
1898
74
    getUsefulBits(Op, OpUsefulBits, Depth + 1);
1899
74
    // The interesting part was starting at Imm in the argument
1900
74
    OpUsefulBits <<= Imm;
1901
96
  } else {
1902
96
    OpUsefulBits <<= MSB + 1;
1903
96
    --OpUsefulBits;
1904
96
    // The interesting part will be shifted in the result
1905
96
    OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
1906
96
    getUsefulBits(Op, OpUsefulBits, Depth + 1);
1907
96
    // The interesting part was at zero in the argument
1908
96
    OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
1909
96
  }
1910
170
1911
170
  UsefulBits &= OpUsefulBits;
1912
170
}
1913
1914
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1915
170
                                  unsigned Depth) {
1916
170
  uint64_t Imm =
1917
170
      cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1918
170
  uint64_t MSB =
1919
170
      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1920
170
1921
170
  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1922
170
}
1923
1924
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
1925
91
                                              unsigned Depth) {
1926
91
  uint64_t ShiftTypeAndValue =
1927
91
      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1928
91
  APInt Mask(UsefulBits);
1929
91
  Mask.clearAllBits();
1930
91
  Mask.flipAllBits();
1931
91
1932
91
  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
1933
25
    // Shift Left
1934
25
    uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1935
25
    Mask <<= ShiftAmt;
1936
25
    getUsefulBits(Op, Mask, Depth + 1);
1937
25
    Mask.lshrInPlace(ShiftAmt);
1938
66
  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
1939
66
    // Shift Right
1940
66
    // We do not handle AArch64_AM::ASR, because the sign will change the
1941
66
    // number of useful bits
1942
66
    uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1943
66
    Mask.lshrInPlace(ShiftAmt);
1944
66
    getUsefulBits(Op, Mask, Depth + 1);
1945
66
    Mask <<= ShiftAmt;
1946
66
  } else
1947
0
    return;
1948
91
1949
91
  UsefulBits &= Mask;
1950
91
}
1951
1952
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1953
634
                                 unsigned Depth) {
1954
634
  uint64_t Imm =
1955
634
      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1956
634
  uint64_t MSB =
1957
634
      cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1958
634
1959
634
  APInt OpUsefulBits(UsefulBits);
1960
634
  OpUsefulBits = 1;
1961
634
1962
634
  APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
1963
634
  ResultUsefulBits.flipAllBits();
1964
634
  APInt Mask(UsefulBits.getBitWidth(), 0);
1965
634
1966
634
  getUsefulBits(Op, ResultUsefulBits, Depth + 1);
1967
634
1968
634
  if (MSB >= Imm) {
1969
157
    // The instruction is a BFXIL.
1970
157
    uint64_t Width = MSB - Imm + 1;
1971
157
    uint64_t LSB = Imm;
1972
157
1973
157
    OpUsefulBits <<= Width;
1974
157
    --OpUsefulBits;
1975
157
1976
157
    if (Op.getOperand(1) == Orig) {
1977
134
      // Copy the low bits from the result to bits starting from LSB.
1978
134
      Mask = ResultUsefulBits & OpUsefulBits;
1979
134
      Mask <<= LSB;
1980
134
    }
1981
157
1982
157
    if (Op.getOperand(0) == Orig)
1983
23
      // Bits starting from LSB in the input contribute to the result.
1984
23
      Mask |= (ResultUsefulBits & ~OpUsefulBits);
1985
477
  } else {
1986
477
    // The instruction is a BFI.
1987
477
    uint64_t Width = MSB + 1;
1988
477
    uint64_t LSB = UsefulBits.getBitWidth() - Imm;
1989
477
1990
477
    OpUsefulBits <<= Width;
1991
477
    --OpUsefulBits;
1992
477
    OpUsefulBits <<= LSB;
1993
477
1994
477
    if (Op.getOperand(1) == Orig) {
1995
46
      // Copy the bits from the result to the zero bits.
1996
46
      Mask = ResultUsefulBits & OpUsefulBits;
1997
46
      Mask.lshrInPlace(LSB);
1998
46
    }
1999
477
2000
477
    if (Op.getOperand(0) == Orig)
2001
435
      Mask |= (ResultUsefulBits & ~OpUsefulBits);
2002
477
  }
2003
634
2004
634
  UsefulBits &= Mask;
2005
634
}
2006
2007
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2008
22.8k
                                SDValue Orig, unsigned Depth) {
2009
22.8k
2010
22.8k
  // Users of this node should have already been instruction selected
2011
22.8k
  // FIXME: Can we turn that into an assert?
2012
22.8k
  if (!UserNode->isMachineOpcode())
2013
1.82k
    return;
2014
21.0k
2015
21.0k
  switch (UserNode->getMachineOpcode()) {
2016
21.0k
  default:
2017
18.4k
    return;
2018
21.0k
  case AArch64::ANDSWri:
2019
238
  case AArch64::ANDSXri:
2020
238
  case AArch64::ANDWri:
2021
238
  case AArch64::ANDXri:
2022
238
    // We increment Depth only when we call the getUsefulBits
2023
238
    return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2024
238
                                             Depth);
2025
238
  case AArch64::UBFMWri:
2026
170
  case AArch64::UBFMXri:
2027
170
    return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2028
170
2029
170
  case AArch64::ORRWrs:
2030
103
  case AArch64::ORRXrs:
2031
103
    if (UserNode->getOperand(1) != Orig)
2032
12
      return;
2033
91
    return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2034
91
                                             Depth);
2035
634
  case AArch64::BFMWri:
2036
634
  case AArch64::BFMXri:
2037
634
    return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2038
634
2039
766
  case AArch64::STRBBui:
2040
766
  case AArch64::STURBBi:
2041
766
    if (UserNode->getOperand(0) != Orig)
2042
0
      return;
2043
766
    UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2044
766
    return;
2045
766
2046
766
  case AArch64::STRHHui:
2047
653
  case AArch64::STURHHi:
2048
653
    if (UserNode->getOperand(0) != Orig)
2049
0
      return;
2050
653
    UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2051
653
    return;
2052
21.0k
  }
2053
21.0k
}
2054
2055
20.3k
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2056
20.3k
  if (Depth >= 6)
2057
0
    return;
2058
20.3k
  // Initialize UsefulBits
2059
20.3k
  if (!Depth) {
2060
19.2k
    unsigned Bitwidth = Op.getScalarValueSizeInBits();
2061
19.2k
    // At the beginning, assume every produced bits is useful
2062
19.2k
    UsefulBits = APInt(Bitwidth, 0);
2063
19.2k
    UsefulBits.flipAllBits();
2064
19.2k
  }
2065
20.3k
  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2066
20.3k
2067
22.8k
  for (SDNode *Node : Op.getNode()->uses()) {
2068
22.8k
    // A use cannot produce useful bits
2069
22.8k
    APInt UsefulBitsForUse = APInt(UsefulBits);
2070
22.8k
    getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2071
22.8k
    UsersUsefulBits |= UsefulBitsForUse;
2072
22.8k
  }
2073
20.3k
  // UsefulBits contains the produced bits that are meaningful for the
2074
20.3k
  // current definition, thus a user cannot make a bit meaningful at
2075
20.3k
  // this point
2076
20.3k
  UsefulBits &= UsersUsefulBits;
2077
20.3k
}
2078
2079
/// Create a machine node performing a notional SHL of Op by ShlAmount. If
2080
/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2081
/// 0, return Op unchanged.
2082
1.57k
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2083
1.57k
  if (ShlAmount == 0)
2084
1.53k
    return Op;
2085
40
2086
40
  EVT VT = Op.getValueType();
2087
40
  SDLoc dl(Op);
2088
40
  unsigned BitWidth = VT.getSizeInBits();
2089
40
  unsigned UBFMOpc = BitWidth == 32 ? 
AArch64::UBFMWri29
:
AArch64::UBFMXri11
;
2090
40
2091
40
  SDNode *ShiftNode;
2092
40
  if (ShlAmount > 0) {
2093
0
    // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2094
0
    ShiftNode = CurDAG->getMachineNode(
2095
0
        UBFMOpc, dl, VT, Op,
2096
0
        CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2097
0
        CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2098
40
  } else {
2099
40
    // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2100
40
    assert(ShlAmount < 0 && "expected right shift");
2101
40
    int ShrAmount = -ShlAmount;
2102
40
    ShiftNode = CurDAG->getMachineNode(
2103
40
        UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2104
40
        CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2105
40
  }
2106
40
2107
40
  return SDValue(ShiftNode, 0);
2108
40
}
2109
2110
/// Does this tree qualify as an attempt to move a bitfield into position,
2111
/// essentially "(and (shl VAL, N), Mask)".
2112
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
2113
                                    bool BiggerPattern,
2114
                                    SDValue &Src, int &ShiftAmount,
2115
95.9k
                                    int &MaskWidth) {
2116
95.9k
  EVT VT = Op.getValueType();
2117
95.9k
  unsigned BitWidth = VT.getSizeInBits();
2118
95.9k
  (void)BitWidth;
2119
95.9k
  assert(BitWidth == 32 || BitWidth == 64);
2120
95.9k
2121
95.9k
  KnownBits Known = CurDAG->computeKnownBits(Op);
2122
95.9k
2123
95.9k
  // Non-zero in the sense that they're not provably zero, which is the key
2124
95.9k
  // point if we want to use this value
2125
95.9k
  uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2126
95.9k
2127
95.9k
  // Discard a constant AND mask if present. It's safe because the node will
2128
95.9k
  // already have been factored into the computeKnownBits calculation above.
2129
95.9k
  uint64_t AndImm;
2130
95.9k
  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
2131
26.7k
    assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
2132
26.7k
    Op = Op.getOperand(0);
2133
26.7k
  }
2134
95.9k
2135
95.9k
  // Don't match if the SHL has more than one use, since then we'll end up
2136
95.9k
  // generating SHL+UBFIZ instead of just keeping SHL+AND.
2137
95.9k
  if (!BiggerPattern && 
!Op.hasOneUse()63.5k
)
2138
15.5k
    return false;
2139
80.3k
2140
80.3k
  uint64_t ShlImm;
2141
80.3k
  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2142
77.2k
    return false;
2143
3.14k
  Op = Op.getOperand(0);
2144
3.14k
2145
3.14k
  if (!isShiftedMask_64(NonZeroBits))
2146
23
    return false;
2147
3.11k
2148
3.11k
  ShiftAmount = countTrailingZeros(NonZeroBits);
2149
3.11k
  MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
2150
3.11k
2151
3.11k
  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2152
3.11k
  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2153
3.11k
  // amount.  BiggerPattern is true when this pattern is being matched for BFI,
2154
3.11k
  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2155
3.11k
  // which case it is not profitable to insert an extra shift.
2156
3.11k
  if (ShlImm - ShiftAmount != 0 && 
!BiggerPattern1.58k
)
2157
1.54k
    return false;
2158
1.57k
  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
2159
1.57k
2160
1.57k
  return true;
2161
1.57k
}
2162
2163
569
static bool isShiftedMask(uint64_t Mask, EVT VT) {
2164
569
  assert(VT == MVT::i32 || VT == MVT::i64);
2165
569
  if (VT == MVT::i32)
2166
558
    return isShiftedMask_32(Mask);
2167
11
  return isShiftedMask_64(Mask);
2168
11
}
2169
2170
// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2171
// inserted only sets known zero bits.
2172
17.6k
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
2173
17.6k
  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2174
17.6k
2175
17.6k
  EVT VT = N->getValueType(0);
2176
17.6k
  if (VT != MVT::i32 && 
VT != MVT::i646.95k
)
2177
1.51k
    return false;
2178
16.1k
2179
16.1k
  unsigned BitWidth = VT.getSizeInBits();
2180
16.1k
2181
16.1k
  uint64_t OrImm;
2182
16.1k
  if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2183
14.4k
    return false;
2184
1.75k
2185
1.75k
  // Skip this transformation if the ORR immediate can be encoded in the ORR.
2186
1.75k
  // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2187
1.75k
  // performance neutral.
2188
1.75k
  if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
2189
1.60k
    return false;
2190
148
2191
148
  uint64_t MaskImm;
2192
148
  SDValue And = N->getOperand(0);
2193
148
  // Must be a single use AND with an immediate operand.
2194
148
  if (!And.hasOneUse() ||
2195
148
      
!isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)99
)
2196
140
    return false;
2197
8
2198
8
  // Compute the Known Zero for the AND as this allows us to catch more general
2199
8
  // cases than just looking for AND with imm.
2200
8
  KnownBits Known = CurDAG->computeKnownBits(And);
2201
8
2202
8
  // Non-zero in the sense that they're not provably zero, which is the key
2203
8
  // point if we want to use this value.
2204
8
  uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
2205
8
2206
8
  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2207
8
  if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
2208
1
    return false;
2209
7
2210
7
  // The bits being inserted must only set those bits that are known to be zero.
2211
7
  if ((OrImm & NotKnownZero) != 0) {
2212
0
    // FIXME:  It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2213
0
    // currently handle this case.
2214
0
    return false;
2215
0
  }
2216
7
2217
7
  // BFI/BFXIL dst, src, #lsb, #width.
2218
7
  int LSB = countTrailingOnes(NotKnownZero);
2219
7
  int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2220
7
2221
7
  // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2222
7
  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2223
7
  unsigned ImmS = Width - 1;
2224
7
2225
7
  // If we're creating a BFI instruction avoid cases where we need more
2226
7
  // instructions to materialize the BFI constant as compared to the original
2227
7
  // ORR.  A BFXIL will use the same constant as the original ORR, so the code
2228
7
  // should be no worse in this case.
2229
7
  bool IsBFI = LSB != 0;
2230
7
  uint64_t BFIImm = OrImm >> LSB;
2231
7
  if (IsBFI && 
!AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)4
) {
2232
4
    // We have a BFI instruction and we know the constant can't be materialized
2233
4
    // with a ORR-immediate with the zero register.
2234
4
    unsigned OrChunks = 0, BFIChunks = 0;
2235
16
    for (unsigned Shift = 0; Shift < BitWidth; 
Shift += 1612
) {
2236
12
      if (((OrImm >> Shift) & 0xFFFF) != 0)
2237
6
        ++OrChunks;
2238
12
      if (((BFIImm >> Shift) & 0xFFFF) != 0)
2239
7
        ++BFIChunks;
2240
12
    }
2241
4
    if (BFIChunks > OrChunks)
2242
1
      return false;
2243
6
  }
2244
6
2245
6
  // Materialize the constant to be inserted.
2246
6
  SDLoc DL(N);
2247
6
  unsigned MOVIOpc = VT == MVT::i32 ? 
AArch64::MOVi32imm4
:
AArch64::MOVi64imm2
;
2248
6
  SDNode *MOVI = CurDAG->getMachineNode(
2249
6
      MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2250
6
2251
6
  // Create the BFI/BFXIL instruction.
2252
6
  SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2253
6
                   CurDAG->getTargetConstant(ImmR, DL, VT),
2254
6
                   CurDAG->getTargetConstant(ImmS, DL, VT)};
2255
6
  unsigned Opc = (VT == MVT::i32) ? 
AArch64::BFMWri4
:
AArch64::BFMXri2
;
2256
6
  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2257
6
  return true;
2258
6
}
2259
2260
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
2261
19.2k
                                      SelectionDAG *CurDAG) {
2262
19.2k
  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2263
19.2k
2264
19.2k
  EVT VT = N->getValueType(0);
2265
19.2k
  if (VT != MVT::i32 && 
VT != MVT::i647.39k
)
2266
1.51k
    return false;
2267
17.7k
2268
17.7k
  unsigned BitWidth = VT.getSizeInBits();
2269
17.7k
2270
17.7k
  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
2271
17.7k
  // have the expected shape. Try to undo that.
2272
17.7k
2273
17.7k
  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
2274
17.7k
  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
2275
17.7k
2276
17.7k
  // Given a OR operation, check if we have the following pattern
2277
17.7k
  // ubfm c, b, imm, imm2 (or something that does the same jobs, see
2278
17.7k
  //                       isBitfieldExtractOp)
2279
17.7k
  // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
2280
17.7k
  //                 countTrailingZeros(mask2) == imm2 - imm + 1
2281
17.7k
  // f = d | c
2282
17.7k
  // if yes, replace the OR instruction with:
2283
17.7k
  // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
2284
17.7k
2285
17.7k
  // OR is commutative, check all combinations of operand order and values of
2286
17.7k
  // BiggerPattern, i.e.
2287
17.7k
  //     Opd0, Opd1, BiggerPattern=false
2288
17.7k
  //     Opd1, Opd0, BiggerPattern=false
2289
17.7k
  //     Opd0, Opd1, BiggerPattern=true
2290
17.7k
  //     Opd1, Opd0, BiggerPattern=true
2291
17.7k
  // Several of these combinations may match, so check with BiggerPattern=false
2292
17.7k
  // first since that will produce better results by matching more instructions
2293
17.7k
  // and/or inserting fewer extra instructions.
2294
84.3k
  for (int I = 0; I < 4; 
++I66.6k
) {
2295
68.0k
2296
68.0k
    SDValue Dst, Src;
2297
68.0k
    unsigned ImmR, ImmS;
2298
68.0k
    bool BiggerPattern = I / 2;
2299
68.0k
    SDValue OrOpd0Val = N->getOperand(I % 2);
2300
68.0k
    SDNode *OrOpd0 = OrOpd0Val.getNode();
2301
68.0k
    SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
2302
68.0k
    SDNode *OrOpd1 = OrOpd1Val.getNode();
2303
68.0k
2304
68.0k
    unsigned BFXOpc;
2305
68.0k
    int DstLSB, Width;
2306
68.0k
    if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
2307
68.0k
                            NumberOfIgnoredLowBits, BiggerPattern)) {
2308
1.12k
      // Check that the returned opcode is compatible with the pattern,
2309
1.12k
      // i.e., same type and zero extended (U and not S)
2310
1.12k
      if ((BFXOpc != AArch64::UBFMXri && 
VT == MVT::i64782
) ||
2311
1.12k
          
(1.12k
BFXOpc != AArch64::UBFMWri1.12k
&&
VT == MVT::i32526
))
2312
392
        continue;
2313
731
2314
731
      // Compute the width of the bitfield insertion
2315
731
      DstLSB = 0;
2316
731
      Width = ImmS - ImmR + 1;
2317
731
      // FIXME: This constraint is to catch bitfield insertion we may
2318
731
      // want to widen the pattern if we want to grab general bitfied
2319
731
      // move case
2320
731
      if (Width <= 0)
2321
0
        continue;
2322
66.8k
2323
66.8k
      // If the mask on the insertee is correct, we have a BFXIL operation. We
2324
66.8k
      // can share the ImmR and ImmS values from the already-computed UBFM.
2325
66.8k
    } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
2326
66.8k
                                       BiggerPattern,
2327
66.8k
                                       Src, DstLSB, Width)) {
2328
1.48k
      ImmR = (BitWidth - DstLSB) % BitWidth;
2329
1.48k
      ImmS = Width - 1;
2330
1.48k
    } else
2331
65.4k
      continue;
2332
2.22k
2333
2.22k
    // Check the second part of the pattern
2334
2.22k
    EVT VT = OrOpd1Val.getValueType();
2335
2.22k
    assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
2336
2.22k
2337
2.22k
    // Compute the Known Zero for the candidate of the first operand.
2338
2.22k
    // This allows to catch more general case than just looking for
2339
2.22k
    // AND with imm. Indeed, simplify-demanded-bits may have removed
2340
2.22k
    // the AND instruction because it proves it was useless.
2341
2.22k
    KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
2342
2.22k
2343
2.22k
    // Check if there is enough room for the second operand to appear
2344
2.22k
    // in the first one
2345
2.22k
    APInt BitsToBeInserted =
2346
2.22k
        APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
2347
2.22k
2348
2.22k
    if ((BitsToBeInserted & ~Known.Zero) != 0)
2349
865
      continue;
2350
1.35k
2351
1.35k
    // Set the first operand
2352
1.35k
    uint64_t Imm;
2353
1.35k
    if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
2354
1.35k
        
isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)352
)
2355
286
      // In that case, we can eliminate the AND
2356
286
      Dst = OrOpd1->getOperand(0);
2357
1.06k
    else
2358
1.06k
      // Maybe the AND has been removed by simplify-demanded-bits
2359
1.06k
      // or is useful because it discards more bits
2360
1.06k
      Dst = OrOpd1Val;
2361
1.35k
2362
1.35k
    // both parts match
2363
1.35k
    SDLoc DL(N);
2364
1.35k
    SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
2365
1.35k
                     CurDAG->getTargetConstant(ImmS, DL, VT)};
2366
1.35k
    unsigned Opc = (VT == MVT::i32) ? 
AArch64::BFMWri919
:
AArch64::BFMXri436
;
2367
1.35k
    CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2368
1.35k
    return true;
2369
1.35k
  }
2370
17.7k
2371
17.7k
  // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
2372
17.7k
  // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
2373
17.7k
  // mask (e.g., 0x000ffff0).
2374
17.7k
  uint64_t Mask0Imm, Mask1Imm;
2375
16.3k
  SDValue And0 = N->getOperand(0);
2376
16.3k
  SDValue And1 = N->getOperand(1);
2377
16.3k
  if (And0.hasOneUse() && 
And1.hasOneUse()11.2k
&&
2378
16.3k
      
isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm)10.9k
&&
2379
16.3k
      
isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm)348
&&
2380
16.3k
      
APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm)210
&&
2381
16.3k
      
(193
isShiftedMask(Mask0Imm, VT)193
||
isShiftedMask(Mask1Imm, VT)182
)) {
2382
186
2383
186
    // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
2384
186
    // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
2385
186
    // bits to be inserted.
2386
186
    if (isShiftedMask(Mask0Imm, VT)) {
2387
11
      std::swap(And0, And1);
2388
11
      std::swap(Mask0Imm, Mask1Imm);
2389
11
    }
2390
186
2391
186
    SDValue Src = And1->getOperand(0);
2392
186
    SDValue Dst = And0->getOperand(0);
2393
186
    unsigned LSB = countTrailingZeros(Mask1Imm);
2394
186
    int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
2395
186
2396
186
    // The BFXIL inserts the low-order bits from a source register, so right
2397
186
    // shift the needed bits into place.
2398
186
    SDLoc DL(N);
2399
186
    unsigned ShiftOpc = (VT == MVT::i32) ? 
AArch64::UBFMWri184
:
AArch64::UBFMXri2
;
2400
186
    SDNode *LSR = CurDAG->getMachineNode(
2401
186
        ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
2402
186
        CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
2403
186
2404
186
    // BFXIL is an alias of BFM, so translate to BFM operands.
2405
186
    unsigned ImmR = (BitWidth - LSB) % BitWidth;
2406
186
    unsigned ImmS = Width - 1;
2407
186
2408
186
    // Create the BFXIL instruction.
2409
186
    SDValue Ops[] = {Dst, SDValue(LSR, 0),
2410
186
                     CurDAG->getTargetConstant(ImmR, DL, VT),
2411
186
                     CurDAG->getTargetConstant(ImmS, DL, VT)};
2412
186
    unsigned Opc = (VT == MVT::i32) ? 
AArch64::BFMWri184
:
AArch64::BFMXri2
;
2413
186
    CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2414
186
    return true;
2415
186
  }
2416
16.1k
2417
16.1k
  return false;
2418
16.1k
}
2419
2420
19.2k
bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
2421
19.2k
  if (N->getOpcode() != ISD::OR)
2422
0
    return false;
2423
19.2k
2424
19.2k
  APInt NUsefulBits;
2425
19.2k
  getUsefulBits(SDValue(N, 0), NUsefulBits);
2426
19.2k
2427
19.2k
  // If all bits are not useful, just return UNDEF.
2428
19.2k
  if (!NUsefulBits) {
2429
1
    CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
2430
1
    return true;
2431
1
  }
2432
19.2k
2433
19.2k
  if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
2434
1.54k
    return true;
2435
17.6k
2436
17.6k
  return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
2437
17.6k
}
2438
2439
/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
2440
/// equivalent of a left shift by a constant amount followed by an and masking
2441
/// out a contiguous set of bits.
2442
48.3k
bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
2443
48.3k
  if (N->getOpcode() != ISD::AND)
2444
11.4k
    return false;
2445
36.8k
2446
36.8k
  EVT VT = N->getValueType(0);
2447
36.8k
  if (VT != MVT::i32 && 
VT != MVT::i6424.3k
)
2448
7.83k
    return false;
2449
29.0k
2450
29.0k
  SDValue Op0;
2451
29.0k
  int DstLSB, Width;
2452
29.0k
  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
2453
29.0k
                               Op0, DstLSB, Width))
2454
28.9k
    return false;
2455
82
2456
82
  // ImmR is the rotate right amount.
2457
82
  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
2458
82
  // ImmS is the most significant bit of the source to be moved.
2459
82
  unsigned ImmS = Width - 1;
2460
82
2461
82
  SDLoc DL(N);
2462
82
  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
2463
82
                   CurDAG->getTargetConstant(ImmS, DL, VT)};
2464
82
  unsigned Opc = (VT == MVT::i32) ? 
AArch64::UBFMWri58
:
AArch64::UBFMXri24
;
2465
82
  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2466
82
  return true;
2467
82
}
2468
2469
/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
2470
/// variable shift/rotate instructions.
2471
69.3k
bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
2472
69.3k
  EVT VT = N->getValueType(0);
2473
69.3k
2474
69.3k
  unsigned Opc;
2475
69.3k
  switch (N->getOpcode()) {
2476
69.3k
  case ISD::ROTR:
2477
372
    Opc = (VT == MVT::i32) ? 
AArch64::RORVWr316
:
AArch64::RORVXr56
;
2478
372
    break;
2479
69.3k
  case ISD::SHL:
2480
20.6k
    Opc = (VT == MVT::i32) ? 
AArch64::LSLVWr10.3k
:
AArch64::LSLVXr10.3k
;
2481
20.6k
    break;
2482
69.3k
  case ISD::SRL:
2483
7.16k
    Opc = (VT == MVT::i32) ? 
AArch64::LSRVWr1.92k
:
AArch64::LSRVXr5.24k
;
2484
7.16k
    break;
2485
69.3k
  case ISD::SRA:
2486
2.67k
    Opc = (VT == MVT::i32) ? 
AArch64::ASRVWr1.52k
:
AArch64::ASRVXr1.14k
;
2487
2.67k
    break;
2488
69.3k
  default:
2489
38.4k
    return false;
2490
30.9k
  }
2491
30.9k
2492
30.9k
  uint64_t Size;
2493
30.9k
  uint64_t Bits;
2494
30.9k
  if (VT == MVT::i32) {
2495
14.1k
    Bits = 5;
2496
14.1k
    Size = 32;
2497
16.7k
  } else if (VT == MVT::i64) {
2498
16.7k
    Bits = 6;
2499
16.7k
    Size = 64;
2500
16.7k
  } else
2501
0
    return false;
2502
30.9k
2503
30.9k
  SDValue ShiftAmt = N->getOperand(1);
2504
30.9k
  SDLoc DL(N);
2505
30.9k
  SDValue NewShiftAmt;
2506
30.9k
2507
30.9k
  // Skip over an extend of the shift amount.
2508
30.9k
  if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
2509
30.9k
      
ShiftAmt->getOpcode() == ISD::ANY_EXTEND26.3k
)
2510
4.52k
    ShiftAmt = ShiftAmt->getOperand(0);
2511
30.9k
2512
30.9k
  if (ShiftAmt->getOpcode() == ISD::ADD || 
ShiftAmt->getOpcode() == ISD::SUB30.6k
) {
2513
2.36k
    SDValue Add0 = ShiftAmt->getOperand(0);
2514
2.36k
    SDValue Add1 = ShiftAmt->getOperand(1);
2515
2.36k
    uint64_t Add0Imm;
2516
2.36k
    uint64_t Add1Imm;
2517
2.36k
    // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
2518
2.36k
    // to avoid the ADD/SUB.
2519
2.36k
    if (isIntImmediate(Add1, Add1Imm) && 
(Add1Imm % Size == 0)261
)
2520
47
      NewShiftAmt = Add0;
2521
2.31k
    // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2522
2.31k
    // generate a NEG instead of a SUB of a constant.
2523
2.31k
    else if (ShiftAmt->getOpcode() == ISD::SUB &&
2524
2.31k
             
isIntImmediate(Add0, Add0Imm)2.08k
&&
Add0Imm != 01.95k
&&
2525
2.31k
             
(Add0Imm % Size == 0)1.94k
) {
2526
209
      unsigned NegOpc;
2527
209
      unsigned ZeroReg;
2528
209
      EVT SubVT = ShiftAmt->getValueType(0);
2529
209
      if (SubVT == MVT::i32) {
2530
114
        NegOpc = AArch64::SUBWrr;
2531
114
        ZeroReg = AArch64::WZR;
2532
114
      } else {
2533
95
        assert(SubVT == MVT::i64);
2534
95
        NegOpc = AArch64::SUBXrr;
2535
95
        ZeroReg = AArch64::XZR;
2536
95
      }
2537
209
      SDValue Zero =
2538
209
          CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
2539
209
      MachineSDNode *Neg =
2540
209
          CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
2541
209
      NewShiftAmt = SDValue(Neg, 0);
2542
209
    } else
2543
2.10k
      return false;
2544
28.5k
  } else {
2545
28.5k
    // If the shift amount is masked with an AND, check that the mask covers the
2546
28.5k
    // bits that are implicitly ANDed off by the above opcodes and if so, skip
2547
28.5k
    // the AND.
2548
28.5k
    uint64_t MaskImm;
2549
28.5k
    if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm))
2550
27.7k
      return false;
2551
753
2552
753
    if (countTrailingOnes(MaskImm) < Bits)
2553
526
      return false;
2554
227
2555
227
    NewShiftAmt = ShiftAmt->getOperand(0);
2556
227
  }
2557
30.9k
2558
30.9k
  // Narrow/widen the shift amount to match the size of the shift operation.
2559
30.9k
  
if (483
VT == MVT::i32483
)
2560
185
    NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
2561
298
  else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
2562
41
    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
2563
41
    MachineSDNode *Ext = CurDAG->getMachineNode(
2564
41
        AArch64::SUBREG_TO_REG, DL, VT,
2565
41
        CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
2566
41
    NewShiftAmt = SDValue(Ext, 0);
2567
41
  }
2568
483
2569
483
  SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
2570
483
  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2571
483
  return true;
2572
30.9k
}
2573
2574
bool
2575
AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2576
831
                                              unsigned RegWidth) {
2577
831
  APFloat FVal(0.0);
2578
831
  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2579
558
    FVal = CN->getValueAPF();
2580
273
  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2581
92
    // Some otherwise illegal constants are allowed in this case.
2582
92
    if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2583
92
        
!isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))38
)
2584
72
      return false;
2585
20
2586
20
    ConstantPoolSDNode *CN =
2587
20
        dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2588
20
    FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2589
20
  } else
2590
181
    return false;
2591
578
2592
578
  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2593
578
  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2594
578
  // x-register.
2595
578
  //
2596
578
  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2597
578
  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2598
578
  // integers.
2599
578
  bool IsExact;
2600
578
2601
578
  // fbits is between 1 and 64 in the worst-case, which means the fmul
2602
578
  // could have 2^64 as an actual operand. Need 65 bits of precision.
2603
578
  APSInt IntVal(65, true);
2604
578
  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2605
578
2606
578
  // N.b. isPowerOf2 also checks for > 0.
2607
578
  if (!IsExact || 
!IntVal.isPowerOf2()543
)
return false530
;
2608
48
  unsigned FBits = IntVal.logBase2();
2609
48
2610
48
  // Checks above should have guaranteed that we haven't lost information in
2611
48
  // finding FBits, but it must still be in range.
2612
48
  if (FBits == 0 || FBits > RegWidth) 
return false0
;
2613
48
2614
48
  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2615
48
  return true;
2616
48
}
2617
2618
// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2619
// of the string and obtains the integer values from them and combines these
2620
// into a single value to be used in the MRS/MSR instruction.
2621
36
static int getIntOperandFromRegisterString(StringRef RegString) {
2622
36
  SmallVector<StringRef, 5> Fields;
2623
36
  RegString.split(Fields, ':');
2624
36
2625
36
  if (Fields.size() == 1)
2626
14
    return -1;
2627
22
2628
22
  assert(Fields.size() == 5
2629
22
            && "Invalid number of fields in read register string");
2630
22
2631
22
  SmallVector<int, 5> Ops;
2632
22
  bool AllIntFields = true;
2633
22
2634
110
  for (StringRef Field : Fields) {
2635
110
    unsigned IntField;
2636
110
    AllIntFields &= !Field.getAsInteger(10, IntField);
2637
110
    Ops.push_back(IntField);
2638
110
  }
2639
22
2640
22
  assert(AllIntFields &&
2641
22
          "Unexpected non-integer value in special register string.");
2642
22
2643
22
  // Need to combine the integer fields of the string into a single value
2644
22
  // based on the bit encoding of MRS/MSR instruction.
2645
22
  return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2646
22
         (Ops[3] << 3) | (Ops[4]);
2647
22
}
2648
2649
// Lower the read_register intrinsic to an MRS instruction node if the special
2650
// register string argument is either of the form detailed in the ALCE (the
2651
// form described in getIntOperandsFromRegsterString) or is a named register
2652
// known by the MRS SysReg mapper.
2653
18
bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
2654
18
  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2655
18
  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2656
18
  SDLoc DL(N);
2657
18
2658
18
  int Reg = getIntOperandFromRegisterString(RegString->getString());
2659
18
  if (Reg != -1) {
2660
11
    ReplaceNode(N, CurDAG->getMachineNode(
2661
11
                       AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2662
11
                       CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2663
11
                       N->getOperand(0)));
2664
11
    return true;
2665
11
  }
2666
7
2667
7
  // Use the sysreg mapper to map the remaining possible strings to the
2668
7
  // value for the register to be used for the instruction operand.
2669
7
  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2670
7
  if (TheReg && 
TheReg->Readable1
&&
2671
7
      
TheReg->haveFeatures(Subtarget->getFeatureBits())1
)
2672
1
    Reg = TheReg->Encoding;
2673
6
  else
2674
6
    Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2675
7
2676
7
  if (Reg != -1) {
2677
1
    ReplaceNode(N, CurDAG->getMachineNode(
2678
1
                       AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2679
1
                       CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2680
1
                       N->getOperand(0)));
2681
1
    return true;
2682
1
  }
2683
6
2684
6
  if (RegString->getString() == "pc") {
2685
1
    ReplaceNode(N, CurDAG->getMachineNode(
2686
1
                       AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other,
2687
1
                       CurDAG->getTargetConstant(0, DL, MVT::i32),
2688
1
                       N->getOperand(0)));
2689
1
    return true;
2690
1
  }
2691
5
2692
5
  return false;
2693
5
}
2694
2695
// Lower the write_register intrinsic to an MSR instruction node if the special
2696
// register string argument is either of the form detailed in the ALCE (the
2697
// form described in getIntOperandsFromRegsterString) or is a named register
2698
// known by the MSR SysReg mapper.
2699
18
bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
2700
18
  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2701
18
  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2702
18
  SDLoc DL(N);
2703
18
2704
18
  int Reg = getIntOperandFromRegisterString(RegString->getString());
2705
18
  if (Reg != -1) {
2706
11
    ReplaceNode(
2707
11
        N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2708
11
                                  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2709
11
                                  N->getOperand(2), N->getOperand(0)));
2710
11
    return true;
2711
11
  }
2712
7
2713
7
  // Check if the register was one of those allowed as the pstatefield value in
2714
7
  // the MSR (immediate) instruction. To accept the values allowed in the
2715
7
  // pstatefield for the MSR (immediate) instruction, we also require that an
2716
7
  // immediate value has been provided as an argument, we know that this is
2717
7
  // the case as it has been ensured by semantic checking.
2718
7
  auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
2719
7
  if (PMapper) {
2720
1
    assert (isa<ConstantSDNode>(N->getOperand(2))
2721
1
              && "Expected a constant integer expression.");
2722
1
    unsigned Reg = PMapper->Encoding;
2723
1
    uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
2724
1
    unsigned State;
2725
1
    if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) {
2726
0
      assert(Immed < 2 && "Bad imm");
2727
0
      State = AArch64::MSRpstateImm1;
2728
1
    } else {
2729
1
      assert(Immed < 16 && "Bad imm");
2730
1
      State = AArch64::MSRpstateImm4;
2731
1
    }
2732
1
    ReplaceNode(N, CurDAG->getMachineNode(
2733
1
                       State, DL, MVT::Other,
2734
1
                       CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2735
1
                       CurDAG->getTargetConstant(Immed, DL, MVT::i16),
2736
1
                       N->getOperand(0)));
2737
1
    return true;
2738
1
  }
2739
6
2740
6
  // Use the sysreg mapper to attempt to map the remaining possible strings
2741
6
  // to the value for the register to be used for the MSR (register)
2742
6
  // instruction operand.
2743
6
  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2744
6
  if (TheReg && 
TheReg->Writeable1
&&
2745
6
      
TheReg->haveFeatures(Subtarget->getFeatureBits())1
)
2746
1
    Reg = TheReg->Encoding;
2747
5
  else
2748
5
    Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2749
6
  if (Reg != -1) {
2750
1
    ReplaceNode(N, CurDAG->getMachineNode(
2751
1
                       AArch64::MSR, DL, MVT::Other,
2752
1
                       CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2753
1
                       N->getOperand(2), N->getOperand(0)));
2754
1
    return true;
2755
1
  }
2756
5
2757
5
  return false;
2758
5
}
2759
2760
/// We've got special pseudo-instructions for these
2761
76
bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2762
76
  unsigned Opcode;
2763
76
  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2764
76
2765
76
  // Leave IR for LSE if subtarget supports it.
2766
76
  if (Subtarget->hasLSE()) 
return false72
;
2767
4
2768
4
  if (MemTy == MVT::i8)
2769
1
    Opcode = AArch64::CMP_SWAP_8;
2770
3
  else if (MemTy == MVT::i16)
2771
1
    Opcode = AArch64::CMP_SWAP_16;
2772
2
  else if (MemTy == MVT::i32)
2773
1
    Opcode = AArch64::CMP_SWAP_32;
2774
1
  else if (MemTy == MVT::i64)
2775
1
    Opcode = AArch64::CMP_SWAP_64;
2776
1
  else
2777
1
    
llvm_unreachable0
("Unknown AtomicCmpSwap type");
2778
4
2779
4
  MVT RegTy = MemTy == MVT::i64 ? 
MVT::i641
:
MVT::i323
;
2780
4
  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2781
4
                   N->getOperand(0)};
2782
4
  SDNode *CmpSwap = CurDAG->getMachineNode(
2783
4
      Opcode, SDLoc(N),
2784
4
      CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
2785
4
2786
4
  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2787
4
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2788
4
2789
4
  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2790
4
  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2791
4
  CurDAG->RemoveDeadNode(N);
2792
4
2793
4
  return true;
2794
4
}
2795
2796
9
bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
2797
9
  // tagp(FrameIndex, IRGstack, tag_offset):
2798
9
  // since the offset between FrameIndex and IRGstack is a compile-time
2799
9
  // constant, this can be lowered to a single ADDG instruction.
2800
9
  if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
2801
2
    return false;
2802
2
  }
2803
7
2804
7
  SDValue IRG_SP = N->getOperand(2);
2805
7
  if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
2806
7
      cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
2807
6
          Intrinsic::aarch64_irg_sp) {
2808
1
    return false;
2809
1
  }
2810
6
2811
6
  const TargetLowering *TLI = getTargetLowering();
2812
6
  SDLoc DL(N);
2813
6
  int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
2814
6
  SDValue FiOp = CurDAG->getTargetFrameIndex(
2815
6
      FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2816
6
  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2817
6
2818
6
  SDNode *Out = CurDAG->getMachineNode(
2819
6
      AArch64::TAGPstack, DL, MVT::i64,
2820
6
      {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
2821
6
       CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
2822
6
  ReplaceNode(N, Out);
2823
6
  return true;
2824
6
}
2825
2826
9
void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
2827
9
  assert(isa<ConstantSDNode>(N->getOperand(3)) &&
2828
9
         "llvm.aarch64.tagp third argument must be an immediate");
2829
9
  if (trySelectStackSlotTagP(N))
2830
6
    return;
2831
3
  // FIXME: above applies in any case when offset between Op1 and Op2 is a
2832
3
  // compile-time constant, not just for stack allocations.
2833
3
2834
3
  // General case for unrelated pointers in Op1 and Op2.
2835
3
  SDLoc DL(N);
2836
3
  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2837
3
  SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
2838
3
                                      {N->getOperand(1), N->getOperand(2)});
2839
3
  SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
2840
3
                                      {SDValue(N1, 0), N->getOperand(2)});
2841
3
  SDNode *N3 = CurDAG->getMachineNode(
2842
3
      AArch64::ADDG, DL, MVT::i64,
2843
3
      {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
2844
3
       CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
2845
3
  ReplaceNode(N, N3);
2846
3
}
2847
2848
11.3M
void AArch64DAGToDAGISel::Select(SDNode *Node) {
2849
11.3M
  // If we have a custom node, we already have selected!
2850
11.3M
  if (Node->isMachineOpcode()) {
2851
16.5k
    LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
2852
16.5k
    Node->setNodeId(-1);
2853
16.5k
    return;
2854
16.5k
  }
2855
11.3M
2856
11.3M
  // Few custom selection stuff.
2857
11.3M
  EVT VT = Node->getValueType(0);
2858
11.3M
2859
11.3M
  switch (Node->getOpcode()) {
2860
11.3M
  default:
2861
10.0M
    break;
2862
11.3M
2863
11.3M
  case ISD::ATOMIC_CMP_SWAP:
2864
76
    if (SelectCMP_SWAP(Node))
2865
4
      return;
2866
72
    break;
2867
72
2868
72
  case ISD::READ_REGISTER:
2869
18
    if (tryReadRegister(Node))
2870
13
      return;
2871
5
    break;
2872
5
2873
18
  case ISD::WRITE_REGISTER:
2874
18
    if (tryWriteRegister(Node))
2875
13
      return;
2876
5
    break;
2877
5
2878
269k
  case ISD::ADD:
2879
269k
    if (tryMLAV64LaneV128(Node))
2880
0
      return;
2881
269k
    break;
2882
269k
2883
374k
  case ISD::LOAD: {
2884
374k
    // Try to select as an indexed load. Fall through to normal processing
2885
374k
    // if we can't.
2886
374k
    if (tryIndexedLoad(Node))
2887
10.6k
      return;
2888
364k
    break;
2889
364k
  }
2890
364k
2891
364k
  case ISD::SRL:
2892
52.2k
  case ISD::AND:
2893
52.2k
  case ISD::SRA:
2894
52.2k
  case ISD::SIGN_EXTEND_INREG:
2895
52.2k
    if (tryBitfieldExtractOp(Node))
2896
3.84k
      return;
2897
48.3k
    if (tryBitfieldInsertInZeroOp(Node))
2898
82
      return;
2899
48.2k
    LLVM_FALLTHROUGH;
2900
69.3k
  case ISD::ROTR:
2901
69.3k
  case ISD::SHL:
2902
69.3k
    if (tryShiftAmountMod(Node))
2903
483
      return;
2904
68.8k
    break;
2905
68.8k
2906
68.8k
  case ISD::SIGN_EXTEND:
2907
24.4k
    if (tryBitfieldExtractOpFromSExt(Node))
2908
52
      return;
2909
24.4k
    break;
2910
24.4k
2911
24.4k
  case ISD::OR:
2912
19.2k
    if (tryBitfieldInsertOp(Node))
2913
1.54k
      return;
2914
17.6k
    break;
2915
17.6k
2916
29.3k
  case ISD::EXTRACT_VECTOR_ELT: {
2917
29.3k
    // Extracting lane zero is a special case where we can just use a plain
2918
29.3k
    // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
2919
29.3k
    // the rest of the compiler, especially the register allocator and copyi
2920
29.3k
    // propagation, to reason about, so is preferred when it's possible to
2921
29.3k
    // use it.
2922
29.3k
    ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
2923
29.3k
    // Bail and use the default Select() for non-zero lanes.
2924
29.3k
    if (LaneNode->getZExtValue() != 0)
2925
21.8k
      break;
2926
7.52k
    // If the element type is not the same as the result type, likewise
2927
7.52k
    // bail and use the default Select(), as there's more to do than just
2928
7.52k
    // a cross-class COPY. This catches extracts of i8 and i16 elements
2929
7.52k
    // since they will need an explicit zext.
2930
7.52k
    if (VT != Node->getOperand(0).getValueType().getVectorElementType())
2931
3.87k
      break;
2932
3.64k
    unsigned SubReg;
2933
3.64k
    switch (Node->getOperand(0)
2934
3.64k
                .getValueType()
2935
3.64k
                .getVectorElementType()
2936
3.64k
                .getSizeInBits()) {
2937
3.64k
    default:
2938
0
      llvm_unreachable("Unexpected vector element type!");
2939
3.64k
    case 64:
2940
2.49k
      SubReg = AArch64::dsub;
2941
2.49k
      break;
2942
3.64k
    case 32:
2943
1.03k
      SubReg = AArch64::ssub;
2944
1.03k
      break;
2945
3.64k
    case 16:
2946
126
      SubReg = AArch64::hsub;
2947
126
      break;
2948
3.64k
    case 8:
2949
0
      llvm_unreachable("unexpected zext-requiring extract element!");
2950
3.64k
    }
2951
3.64k
    SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
2952
3.64k
                                                     Node->getOperand(0));
2953
3.64k
    LLVM_DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
2954
3.64k
    LLVM_DEBUG(Extract->dumpr(CurDAG));
2955
3.64k
    LLVM_DEBUG(dbgs() << "\n");
2956
3.64k
    ReplaceNode(Node, Extract.getNode());
2957
3.64k
    return;
2958
3.64k
  }
2959
362k
  case ISD::Constant: {
2960
362k
    // Materialize zero constants as copies from WZR/XZR.  This allows
2961
362k
    // the coalescer to propagate these into other instructions.
2962
362k
    ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
2963
362k
    if (ConstNode->isNullValue()) {
2964
125k
      if (VT == MVT::i32) {
2965
41.9k
        SDValue New = CurDAG->getCopyFromReg(
2966
41.9k
            CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
2967
41.9k
        ReplaceNode(Node, New.getNode());
2968
41.9k
        return;
2969
83.7k
      } else if (VT == MVT::i64) {
2970
83.7k
        SDValue New = CurDAG->getCopyFromReg(
2971
83.7k
            CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
2972
83.7k
        ReplaceNode(Node, New.getNode());
2973
83.7k
        return;
2974
83.7k
      }
2975
236k
    }
2976
236k
    break;
2977
236k
  }
2978
236k
2979
236k
  case ISD::FrameIndex: {
2980
25.2k
    // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
2981
25.2k
    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
2982
25.2k
    unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
2983
25.2k
    const TargetLowering *TLI = getTargetLowering();
2984
25.2k
    SDValue TFI = CurDAG->getTargetFrameIndex(
2985
25.2k
        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2986
25.2k
    SDLoc DL(Node);
2987
25.2k
    SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
2988
25.2k
                      CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
2989
25.2k
    CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
2990
25.2k
    return;
2991
236k
  }
2992
236k
  case ISD::INTRINSIC_W_CHAIN: {
2993
48.2k
    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2994
48.2k
    switch (IntNo) {
2995
48.2k
    default:
2996
47.4k
      break;
2997
48.2k
    case Intrinsic::aarch64_ldaxp:
2998
18
    case Intrinsic::aarch64_ldxp: {
2999
18
      unsigned Op =
3000
18
          IntNo == Intrinsic::aarch64_ldaxp ? 
AArch64::LDAXPX12
:
AArch64::LDXPX6
;
3001
18
      SDValue MemAddr = Node->getOperand(2);
3002
18
      SDLoc DL(Node);
3003
18
      SDValue Chain = Node->getOperand(0);
3004
18
3005
18
      SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
3006
18
                                          MVT::Other, MemAddr, Chain);
3007
18
3008
18
      // Transfer memoperands.
3009
18
      MachineMemOperand *MemOp =
3010
18
          cast<MemIntrinsicSDNode>(Node)->getMemOperand();
3011
18
      CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3012
18
      ReplaceNode(Node, Ld);
3013
18
      return;
3014
18
    }
3015
18
    case Intrinsic::aarch64_stlxp:
3016
18
    case Intrinsic::aarch64_stxp: {
3017
18
      unsigned Op =
3018
18
          IntNo == Intrinsic::aarch64_stlxp ? 
AArch64::STLXPX13
:
AArch64::STXPX5
;
3019
18
      SDLoc DL(Node);
3020
18
      SDValue Chain = Node->getOperand(0);
3021
18
      SDValue ValLo = Node->getOperand(2);
3022
18
      SDValue ValHi = Node->getOperand(3);
3023
18
      SDValue MemAddr = Node->getOperand(4);
3024
18
3025
18
      // Place arguments in the right order.
3026
18
      SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
3027
18
3028
18
      SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
3029
18
      // Transfer memoperands.
3030
18
      MachineMemOperand *MemOp =
3031
18
          cast<MemIntrinsicSDNode>(Node)->getMemOperand();
3032
18
      CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3033
18
3034
18
      ReplaceNode(Node, St);
3035
18
      return;
3036
18
    }
3037
18
    case Intrinsic::aarch64_neon_ld1x2:
3038
14
      if (VT == MVT::v8i8) {
3039
1
        SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
3040
1
        return;
3041
13
      } else if (VT == MVT::v16i8) {
3042
1
        SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
3043
1
        return;
3044
12
      } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1611
) {
3045
2
        SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
3046
2
        return;
3047
10
      } else if (VT == MVT::v8i16 || 
VT == MVT::v8f169
) {
3048
2
        SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
3049
2
        return;
3050
8
      } else if (VT == MVT::v2i32 || 
VT == MVT::v2f327
) {
3051
2
        SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
3052
2
        return;
3053
6
      } else if (VT == MVT::v4i32 || 
VT == MVT::v4f325
) {
3054
2
        SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
3055
2
        return;
3056
4
      } else if (VT == MVT::v1i64 || 
VT == MVT::v1f643
) {
3057
2
        SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3058
2
        return;
3059
2
      } else if (VT == MVT::v2i64 || 
VT == MVT::v2f641
) {
3060
2
        SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
3061
2
        return;
3062
2
      }
3063
0
      break;
3064
14
    case Intrinsic::aarch64_neon_ld1x3:
3065
14
      if (VT == MVT::v8i8) {
3066
1
        SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
3067
1
        return;
3068
13
      } else if (VT == MVT::v16i8) {
3069
1
        SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
3070
1
        return;
3071
12
      } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1611
) {
3072
2
        SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
3073
2
        return;
3074
10
      } else if (VT == MVT::v8i16 || 
VT == MVT::v8f169
) {
3075
2
        SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
3076
2
        return;
3077
8
      } else if (VT == MVT::v2i32 || 
VT == MVT::v2f327
) {
3078
2
        SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
3079
2
        return;
3080
6
      } else if (VT == MVT::v4i32 || 
VT == MVT::v4f325
) {
3081
2
        SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
3082
2
        return;
3083
4
      } else if (VT == MVT::v1i64 || 
VT == MVT::v1f643
) {
3084
2
        SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3085
2
        return;
3086
2
      } else if (VT == MVT::v2i64 || 
VT == MVT::v2f641
) {
3087
2
        SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
3088
2
        return;
3089
2
      }
3090
0
      break;
3091
14
    case Intrinsic::aarch64_neon_ld1x4:
3092
14
      if (VT == MVT::v8i8) {
3093
1
        SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
3094
1
        return;
3095
13
      } else if (VT == MVT::v16i8) {
3096
1
        SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
3097
1
        return;
3098
12
      } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1611
) {
3099
2
        SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
3100
2
        return;
3101
10
      } else if (VT == MVT::v8i16 || 
VT == MVT::v8f169
) {
3102
2
        SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
3103
2
        return;
3104
8
      } else if (VT == MVT::v2i32 || 
VT == MVT::v2f327
) {
3105
2
        SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
3106
2
        return;
3107
6
      } else if (VT == MVT::v4i32 || 
VT == MVT::v4f325
) {
3108
2
        SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
3109
2
        return;
3110
4
      } else if (VT == MVT::v1i64 || 
VT == MVT::v1f643
) {
3111
2
        SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3112
2
        return;
3113
2
      } else if (VT == MVT::v2i64 || 
VT == MVT::v2f641
) {
3114
2
        SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
3115
2
        return;
3116
2
      }
3117
0
      break;
3118
206
    case Intrinsic::aarch64_neon_ld2:
3119
206
      if (VT == MVT::v8i8) {
3120
12
        SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
3121
12
        return;
3122
194
      } else if (VT == MVT::v16i8) {
3123
12
        SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
3124
12
        return;
3125
182
      } else if (VT == MVT::v4i16 || 
VT == MVT::v4f16151
) {
3126
32
        SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
3127
32
        return;
3128
150
      } else if (VT == MVT::v8i16 || 
VT == MVT::v8f16138
) {
3129
13
        SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
3130
13
        return;
3131
137
      } else if (VT == MVT::v2i32 || 
VT == MVT::v2f32111
) {
3132
37
        SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
3133
37
        return;
3134
100
      } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3256
) {
3135
58
        SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
3136
58
        return;
3137
58
      } else 
if (42
VT == MVT::v1i6442
||
VT == MVT::v1f6438
) {
3138
6
        SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3139
6
        return;
3140
36
      } else if (VT == MVT::v2i64 || 
VT == MVT::v2f6432
) {
3141
36
        SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
3142
36
        return;
3143
36
      }
3144
0
      break;
3145
119
    case Intrinsic::aarch64_neon_ld3:
3146
119
      if (VT == MVT::v8i8) {
3147
11
        SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
3148
11
        return;
3149
108
      } else if (VT == MVT::v16i8) {
3150
36
        SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
3151
36
        return;
3152
72
      } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1661
) {
3153
12
        SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
3154
12
        return;
3155
60
      } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1652
) {
3156
9
        SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
3157
9
        return;
3158
51
      } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3248
) {
3159
4
        SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
3160
4
        return;
3161
47
      } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3224
) {
3162
33
        SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
3163
33
        return;
3164
33
      } else 
if (14
VT == MVT::v1i6414
||
VT == MVT::v1f6410
) {
3165
6
        SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3166
6
        return;
3167
8
      } else if (VT == MVT::v2i64 || 
VT == MVT::v2f644
) {
3168
8
        SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
3169
8
        return;
3170
8
      }
3171
0
      break;
3172
157
    case Intrinsic::aarch64_neon_ld4:
3173
157
      if (VT == MVT::v8i8) {
3174
10
        SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
3175
10
        return;
3176
147
      } else if (VT == MVT::v16i8) {
3177
38
        SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
3178
38
        return;
3179
109
      } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1685
) {
3180
25
        SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
3181
25
        return;
3182
84
      } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1676
) {
3183
9
        SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
3184
9
        return;
3185
75
      } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3272
) {
3186
4
        SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
3187
4
        return;
3188
71
      } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3235
) {
3189
52
        SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
3190
52
        return;
3191
52
      } else 
if (19
VT == MVT::v1i6419
||
VT == MVT::v1f6415
) {
3192
6
        SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3193
6
        return;
3194
13
      } else if (VT == MVT::v2i64 || 
VT == MVT::v2f641
) {
3195
13
        SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
3196
13
        return;
3197
13
      }
3198
0
      break;
3199
44
    case Intrinsic::aarch64_neon_ld2r:
3200
44
      if (VT == MVT::v8i8) {
3201
7
        SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
3202
7
        return;
3203
37
      } else if (VT == MVT::v16i8) {
3204
4
        SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
3205
4
        return;
3206
33
      } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1626
) {
3207
8
        SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
3208
8
        return;
3209
25
      } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1621
) {
3210
5
        SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
3211
5
        return;
3212
20
      } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3215
) {
3213
6
        SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
3214
6
        return;
3215
14
      } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3211
) {
3216
4
        SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
3217
4
        return;
3218
10
      } else if (VT == MVT::v1i64 || 
VT == MVT::v1f646
) {
3219
5
        SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
3220
5
        return;
3221
5
      } else if (VT == MVT::v2i64 || 
VT == MVT::v2f641
) {
3222
5
        SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
3223
5
        return;
3224
5
      }
3225
0
      break;
3226
36
    case Intrinsic::aarch64_neon_ld3r:
3227
36
      if (VT == MVT::v8i8) {
3228
4
        SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
3229
4
        return;
3230
32
      } else if (VT == MVT::v16i8) {
3231
4
        SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
3232
4
        return;
3233
28
      } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1624
) {
3234
5
        SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
3235
5
        return;
3236
23
      } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1619
) {
3237
5
        SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
3238
5
        return;
3239
18
      } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3215
) {
3240
4
        SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
3241
4
        return;
3242
14
      } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3211
) {
3243
4
        SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
3244
4
        return;
3245
10
      } else if (VT == MVT::v1i64 || 
VT == MVT::v1f646
) {
3246
5
        SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
3247
5
        return;
3248
5
      } else if (VT == MVT::v2i64 || 
VT == MVT::v2f641
) {
3249
5
        SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3250
5
        return;
3251
5
      }
3252
0
      break;
3253
36
    case Intrinsic::aarch64_neon_ld4r:
3254
36
      if (VT == MVT::v8i8) {
3255
4
        SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3256
4
        return;
3257
32
      } else if (VT == MVT::v16i8) {
3258
4
        SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3259
4
        return;
3260
28
      } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1624
) {
3261
5
        SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3262
5
        return;
3263
23
      } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1619
) {
3264
5
        SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3265
5
        return;
3266
18
      } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3215
) {
3267
4
        SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3268
4
        return;
3269
14
      } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3211
) {
3270
4
        SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3271
4
        return;
3272
10
      } else if (VT == MVT::v1i64 || 
VT == MVT::v1f646
) {
3273
5
        SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3274
5
        return;
3275
5
      } else if (VT == MVT::v2i64 || 
VT == MVT::v2f641
) {
3276
5
        SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3277
5
        return;
3278
5
      }
3279
0
      break;
3280
44
    case Intrinsic::aarch64_neon_ld2lane:
3281
44
      if (VT == MVT::v16i8 || 
VT == MVT::v8i840
) {
3282
10
        SelectLoadLane(Node, 2, AArch64::LD2i8);
3283
10
        return;
3284
34
      } else if (VT == MVT::v8i16 || 
VT == MVT::v4i1630
||
VT == MVT::v4f1624
||
3285
34
                 
VT == MVT::v8f1623
) {
3286
12
        SelectLoadLane(Node, 2, AArch64::LD2i16);
3287
12
        return;
3288
22
      } else if (VT == MVT::v4i32 || 
VT == MVT::v2i3217
||
VT == MVT::v4f3213
||
3289
22
                 
VT == MVT::v2f3212
) {
3290
11
        SelectLoadLane(Node, 2, AArch64::LD2i32);
3291
11
        return;
3292
11
      } else if (VT == MVT::v2i64 || 
VT == MVT::v1i647
||
VT == MVT::v2f642
||
3293
11
                 
VT == MVT::v1f641
) {
3294
11
        SelectLoadLane(Node, 2, AArch64::LD2i64);
3295
11
        return;
3296
11
      }
3297
0
      break;
3298
34
    case Intrinsic::aarch64_neon_ld3lane:
3299
34
      if (VT == MVT::v16i8 || 
VT == MVT::v8i830
) {
3300
7
        SelectLoadLane(Node, 3, AArch64::LD3i8);
3301
7
        return;
3302
27
      } else if (VT == MVT::v8i16 || 
VT == MVT::v4i1623
||
VT == MVT::v4f1620
||
3303
27
                 
VT == MVT::v8f1619
) {
3304
9
        SelectLoadLane(Node, 3, AArch64::LD3i16);
3305
9
        return;
3306
18
      } else if (VT == MVT::v4i32 || 
VT == MVT::v2i3213
||
VT == MVT::v4f3211
||
3307
18
                 
VT == MVT::v2f3210
) {
3308
9
        SelectLoadLane(Node, 3, AArch64::LD3i32);
3309
9
        return;
3310
9
      } else if (VT == MVT::v2i64 || 
VT == MVT::v1i645
||
VT == MVT::v2f642
||
3311
9
                 
VT == MVT::v1f641
) {
3312
9
        SelectLoadLane(Node, 3, AArch64::LD3i64);
3313
9
        return;
3314
9
      }
3315
0
      break;
3316
34
    case Intrinsic::aarch64_neon_ld4lane:
3317
34
      if (VT == MVT::v16i8 || 
VT == MVT::v8i830
) {
3318
7
        SelectLoadLane(Node, 4, AArch64::LD4i8);
3319
7
        return;
3320
27
      } else if (VT == MVT::v8i16 || 
VT == MVT::v4i1623
||
VT == MVT::v4f1620
||
3321
27
                 
VT == MVT::v8f1619
) {
3322
9
        SelectLoadLane(Node, 4, AArch64::LD4i16);
3323
9
        return;
3324
18
      } else if (VT == MVT::v4i32 || 
VT == MVT::v2i3213
||
VT == MVT::v4f3211
||
3325
18
                 
VT == MVT::v2f3210
) {
3326
9
        SelectLoadLane(Node, 4, AArch64::LD4i32);
3327
9
        return;
3328
9
      } else if (VT == MVT::v2i64 || 
VT == MVT::v1i645
||
VT == MVT::v2f642
||
3329
9
                 
VT == MVT::v1f641
) {
3330
9
        SelectLoadLane(Node, 4, AArch64::LD4i64);
3331
9
        return;
3332
9
      }
3333
0
      break;
3334
47.4k
    }
3335
47.4k
  } break;
3336
47.4k
  case ISD::INTRINSIC_WO_CHAIN: {
3337
4.04k
    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
3338
4.04k
    switch (IntNo) {
3339
4.04k
    default:
3340
3.76k
      break;
3341
4.04k
    case Intrinsic::aarch64_tagp:
3342
9
      SelectTagP(Node);
3343
9
      return;
3344
4.04k
    case Intrinsic::aarch64_neon_tbl2:
3345
44
      SelectTable(Node, 2,
3346
44
                  VT == MVT::v8i8 ? 
AArch64::TBLv8i8Two25
:
AArch64::TBLv16i8Two19
,
3347
44
                  false);
3348
44
      return;
3349
4.04k
    case Intrinsic::aarch64_neon_tbl3:
3350
14
      SelectTable(Node, 3, VT == MVT::v8i8 ? 
AArch64::TBLv8i8Three7
3351
14
                                           : 
AArch64::TBLv16i8Three7
,
3352
14
                  false);
3353
14
      return;
3354
4.04k
    case Intrinsic::aarch64_neon_tbl4:
3355
14
      SelectTable(Node, 4, VT == MVT::v8i8 ? 
AArch64::TBLv8i8Four7
3356
14
                                           : 
AArch64::TBLv16i8Four7
,
3357
14
                  false);
3358
14
      return;
3359
4.04k
    case Intrinsic::aarch64_neon_tbx2:
3360
20
      SelectTable(Node, 2,
3361
20
                  VT == MVT::v8i8 ? 
AArch64::TBXv8i8Two13
:
AArch64::TBXv16i8Two7
,
3362
20
                  true);
3363
20
      return;
3364
4.04k
    case Intrinsic::aarch64_neon_tbx3:
3365
14
      SelectTable(Node, 3, VT == MVT::v8i8 ? 
AArch64::TBXv8i8Three7
3366
14
                                           : 
AArch64::TBXv16i8Three7
,
3367
14
                  true);
3368
14
      return;
3369
4.04k
    case Intrinsic::aarch64_neon_tbx4:
3370
14
      SelectTable(Node, 4, VT == MVT::v8i8 ? 
AArch64::TBXv8i8Four7
3371
14
                                           : 
AArch64::TBXv16i8Four7
,
3372
14
                  true);
3373
14
      return;
3374
4.04k
    case Intrinsic::aarch64_neon_smull:
3375
151
    case Intrinsic::aarch64_neon_umull:
3376
151
      if (tryMULLV64LaneV128(IntNo, Node))
3377
0
        return;
3378
151
      break;
3379
3.91k
    }
3380
3.91k
    break;
3381
3.91k
  }
3382
17.2k
  case ISD::INTRINSIC_VOID: {
3383
17.2k
    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3384
17.2k
    if (Node->getNumOperands() >= 3)
3385
12.8k
      VT = Node->getOperand(2)->getValueType(0);
3386
17.2k
    switch (IntNo) {
3387
17.2k
    default:
3388
4.34k
      break;
3389
17.2k
    case Intrinsic::aarch64_neon_st1x2: {
3390
27
      if (VT == MVT::v8i8) {
3391
2
        SelectStore(Node, 2, AArch64::ST1Twov8b);
3392
2
        return;
3393
25
      } else if (VT == MVT::v16i8) {
3394
2
        SelectStore(Node, 2, AArch64::ST1Twov16b);
3395
2
        return;
3396
23
      } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1621
) {
3397
3
        SelectStore(Node, 2, AArch64::ST1Twov4h);
3398
3
        return;
3399
20
      } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1618
) {
3400
3
        SelectStore(Node, 2, AArch64::ST1Twov8h);
3401
3
        return;
3402
17
      } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3215
) {
3403
4
        SelectStore(Node, 2, AArch64::ST1Twov2s);
3404
4
        return;
3405
13
      } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3211
) {
3406
5
        SelectStore(Node, 2, AArch64::ST1Twov4s);
3407
5
        return;
3408
8
      } else if (VT == MVT::v2i64 || 
VT == MVT::v2f646
) {
3409
4
        SelectStore(Node, 2, AArch64::ST1Twov2d);
3410
4
        return;
3411
4
      } else if (VT == MVT::v1i64 || 
VT == MVT::v1f642
) {
3412
4
        SelectStore(Node, 2, AArch64::ST1Twov1d);
3413
4
        return;
3414
4
      }
3415
0
      break;
3416
0
    }
3417
27
    case Intrinsic::aarch64_neon_st1x3: {
3418
27
      if (VT == MVT::v8i8) {
3419
2
        SelectStore(Node, 3, AArch64::ST1Threev8b);
3420
2
        return;
3421
25
      } else if (VT == MVT::v16i8) {
3422
2
        SelectStore(Node, 3, AArch64::ST1Threev16b);
3423
2
        return;
3424
23
      } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1621
) {
3425
3
        SelectStore(Node, 3, AArch64::ST1Threev4h);
3426
3
        return;
3427
20
      } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1618
) {
3428
3
        SelectStore(Node, 3, AArch64::ST1Threev8h);
3429
3
        return;
3430
17
      } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3215
) {
3431
4
        SelectStore(Node, 3, AArch64::ST1Threev2s);
3432
4
        return;
3433
13
      } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3211
) {
3434
5
        SelectStore(Node, 3, AArch64::ST1Threev4s);
3435
5
        return;
3436
8
      } else if (VT == MVT::v2i64 || 
VT == MVT::v2f646
) {
3437
4
        SelectStore(Node, 3, AArch64::ST1Threev2d);
3438
4
        return;
3439
4
      } else if (VT == MVT::v1i64 || 
VT == MVT::v1f642
) {
3440
4
        SelectStore(Node, 3, AArch64::ST1Threev1d);
3441
4
        return;
3442
4
      }
3443
0
      break;
3444
0
    }
3445
27
    case Intrinsic::aarch64_neon_st1x4: {
3446
27
      if (VT == MVT::v8i8) {
3447
2
        SelectStore(Node, 4, AArch64::ST1Fourv8b);
3448
2
        return;
3449
25
      } else if (VT == MVT::v16i8) {
3450
2
        SelectStore(Node, 4, AArch64::ST1Fourv16b);
3451
2
        return;
3452
23
      } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1621
) {
3453
3
        SelectStore(Node, 4, AArch64::ST1Fourv4h);
3454
3
        return;
3455
20
      } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1618
) {
3456
3
        SelectStore(Node, 4, AArch64::ST1Fourv8h);
3457
3
        return;
3458
17
      } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3215
) {
3459
4
        SelectStore(Node, 4, AArch64::ST1Fourv2s);
3460
4
        return;
3461
13
      } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3211
) {
3462
5
        SelectStore(Node, 4, AArch64::ST1Fourv4s);
3463
5
        return;
3464
8
      } else if (VT == MVT::v2i64 || 
VT == MVT::v2f646
) {
3465
4
        SelectStore(Node, 4, AArch64::ST1Fourv2d);
3466
4
        return;
3467
4
      } else if (VT == MVT::v1i64 || 
VT == MVT::v1f642
) {
3468
4
        SelectStore(Node, 4, AArch64::ST1Fourv1d);
3469
4
        return;
3470
4
      }
3471
0
      break;
3472
0
    }
3473
6.41k
    case Intrinsic::aarch64_neon_st2: {
3474
6.41k
      if (VT == MVT::v8i8) {
3475
773
        SelectStore(Node, 2, AArch64::ST2Twov8b);
3476
773
        return;
3477
5.64k
      } else if (VT == MVT::v16i8) {
3478
1.28k
        SelectStore(Node, 2, AArch64::ST2Twov16b);
3479
1.28k
        return;
3480
4.35k
      } else if (VT == MVT::v4i16 || 
VT == MVT::v4f163.58k
) {
3481
766
        SelectStore(Node, 2, AArch64::ST2Twov4h);
3482
766
        return;
3483
3.58k
      } else if (VT == MVT::v8i16 || 
VT == MVT::v8f162.30k
) {
3484
1.28k
        SelectStore(Node, 2, AArch64::ST2Twov8h);
3485
1.28k
        return;
3486
2.30k
      } else if (VT == MVT::v2i32 || 
VT == MVT::v2f321.53k
) {
3487
766
        SelectStore(Node, 2, AArch64::ST2Twov2s);
3488
766
        return;
3489
1.53k
      } else if (VT == MVT::v4i32 || 
VT == MVT::v4f32246
) {
3490
1.32k
        SelectStore(Node, 2, AArch64::ST2Twov4s);
3491
1.32k
        return;
3492
1.32k
      } else 
if (216
VT == MVT::v2i64216
||
VT == MVT::v2f64209
) {
3493
209
        SelectStore(Node, 2, AArch64::ST2Twov2d);
3494
209
        return;
3495
209
      } else 
if (7
VT == MVT::v1i647
||
VT == MVT::v1f641
) {
3496
7
        SelectStore(Node, 2, AArch64::ST1Twov1d);
3497
7
        return;
3498
7
      }
3499
0
      break;
3500
0
    }
3501
3.11k
    case Intrinsic::aarch64_neon_st3: {
3502
3.11k
      if (VT == MVT::v8i8) {
3503
515
        SelectStore(Node, 3, AArch64::ST3Threev8b);
3504
515
        return;
3505
2.60k
      } else if (VT == MVT::v16i8) {
3506
523
        SelectStore(Node, 3, AArch64::ST3Threev16b);
3507
523
        return;
3508
2.07k
      } else if (VT == MVT::v4i16 || 
VT == MVT::v4f161.56k
) {
3509
514
        SelectStore(Node, 3, AArch64::ST3Threev4h);
3510
514
        return;
3511
1.56k
      } else if (VT == MVT::v8i16 || 
VT == MVT::v8f161.04k
) {
3512
518
        SelectStore(Node, 3, AArch64::ST3Threev8h);
3513
518
        return;
3514
1.04k
      } else if (VT == MVT::v2i32 || 
VT == MVT::v2f32534
) {
3515
513
        SelectStore(Node, 3, AArch64::ST3Threev2s);
3516
513
        return;
3517
533
      } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3216
) {
3518
519
        SelectStore(Node, 3, AArch64::ST3Threev4s);
3519
519
        return;
3520
519
      } else 
if (14
VT == MVT::v2i6414
||
VT == MVT::v2f647
) {
3521
8
        SelectStore(Node, 3, AArch64::ST3Threev2d);
3522
8
        return;
3523
8
      } else 
if (6
VT == MVT::v1i646
||
VT == MVT::v1f641
) {
3524
6
        SelectStore(Node, 3, AArch64::ST1Threev1d);
3525
6
        return;
3526
6
      }
3527
0
      break;
3528
0
    }
3529
3.12k
    case Intrinsic::aarch64_neon_st4: {
3530
3.12k
      if (VT == MVT::v8i8) {
3531
513
        SelectStore(Node, 4, AArch64::ST4Fourv8b);
3532
513
        return;
3533
2.61k
      } else if (VT == MVT::v16i8) {
3534
519
        SelectStore(Node, 4, AArch64::ST4Fourv16b);
3535
519
        return;
3536
2.09k
      } else if (VT == MVT::v4i16 || 
VT == MVT::v4f161.58k
) {
3537
514
        SelectStore(Node, 4, AArch64::ST4Fourv4h);
3538
514
        return;
3539
1.58k
      } else if (VT == MVT::v8i16 || 
VT == MVT::v8f161.06k
) {
3540
518
        SelectStore(Node, 4, AArch64::ST4Fourv8h);
3541
518
        return;
3542
1.06k
      } else if (VT == MVT::v2i32 || 
VT == MVT::v2f32551
) {
3543
513
        SelectStore(Node, 4, AArch64::ST4Fourv2s);
3544
513
        return;
3545
550
      } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3226
) {
3546
530
        SelectStore(Node, 4, AArch64::ST4Fourv4s);
3547
530
        return;
3548
530
      } else 
if (20
VT == MVT::v2i6420
||
VT == MVT::v2f647
) {
3549
14
        SelectStore(Node, 4, AArch64::ST4Fourv2d);
3550
14
        return;
3551
14
      } else 
if (6
VT == MVT::v1i646
||
VT == MVT::v1f641
) {
3552
6
        SelectStore(Node, 4, AArch64::ST1Fourv1d);
3553
6
        return;
3554
6
      }
3555
0
      break;
3556
0
    }
3557
40
    case Intrinsic::aarch64_neon_st2lane: {
3558
40
      if (VT == MVT::v16i8 || 
VT == MVT::v8i835
) {
3559
8
        SelectStoreLane(Node, 2, AArch64::ST2i8);
3560
8
        return;
3561
32
      } else if (VT == MVT::v8i16 || 
VT == MVT::v4i1627
||
VT == MVT::v4f1624
||
3562
32
                 
VT == MVT::v8f1623
) {
3563
10
        SelectStoreLane(Node, 2, AArch64::ST2i16);
3564
10
        return;
3565
22
      } else if (VT == MVT::v4i32 || 
VT == MVT::v2i3218
||
VT == MVT::v4f3216
||
3566
22
                 
VT == MVT::v2f3214
) {
3567
9
        SelectStoreLane(Node, 2, AArch64::ST2i32);
3568
9
        return;
3569
13
      } else if (VT == MVT::v2i64 || 
VT == MVT::v1i647
||
VT == MVT::v2f642
||
3570
13
                 
VT == MVT::v1f641
) {
3571
13
        SelectStoreLane(Node, 2, AArch64::ST2i64);
3572
13
        return;
3573
13
      }
3574
0
      break;
3575
0
    }
3576
38
    case Intrinsic::aarch64_neon_st3lane: {
3577
38
      if (VT == MVT::v16i8 || 
VT == MVT::v8i833
) {
3578
8
        SelectStoreLane(Node, 3, AArch64::ST3i8);
3579
8
        return;
3580
30
      } else if (VT == MVT::v8i16 || 
VT == MVT::v4i1625
||
VT == MVT::v4f1622
||
3581
30
                 
VT == MVT::v8f1621
) {
3582
10
        SelectStoreLane(Node, 3, AArch64::ST3i16);
3583
10
        return;
3584
20
      } else if (VT == MVT::v4i32 || 
VT == MVT::v2i3216
||
VT == MVT::v4f3214
||
3585
20
                 
VT == MVT::v2f3212
) {
3586
9
        SelectStoreLane(Node, 3, AArch64::ST3i32);
3587
9
        return;
3588
11
      } else if (VT == MVT::v2i64 || 
VT == MVT::v1i646
||
VT == MVT::v2f642
||
3589
11
                 
VT == MVT::v1f641
) {
3590
11
        SelectStoreLane(Node, 3, AArch64::ST3i64);
3591
11
        return;
3592
11
      }
3593
0
      break;
3594
0
    }
3595
38
    case Intrinsic::aarch64_neon_st4lane: {
3596
38
      if (VT == MVT::v16i8 || 
VT == MVT::v8i833
) {
3597
8
        SelectStoreLane(Node, 4, AArch64::ST4i8);
3598
8
        return;
3599
30
      } else if (VT == MVT::v8i16 || 
VT == MVT::v4i1625
||
VT == MVT::v4f1622
||
3600
30
                 
VT == MVT::v8f1621
) {
3601
10
        SelectStoreLane(Node, 4, AArch64::ST4i16);
3602
10
        return;
3603
20
      } else if (VT == MVT::v4i32 || 
VT == MVT::v2i3216
||
VT == MVT::v4f3214
||
3604
20
                 
VT == MVT::v2f3212
) {
3605
9
        SelectStoreLane(Node, 4, AArch64::ST4i32);
3606
9
        return;
3607
11
      } else if (VT == MVT::v2i64 || 
VT == MVT::v1i646
||
VT == MVT::v2f642
||
3608
11
                 
VT == MVT::v1f641
) {
3609
11
        SelectStoreLane(Node, 4, AArch64::ST4i64);
3610
11
        return;
3611
11
      }
3612
0
      break;
3613
0
    }
3614
4.34k
    }
3615
4.34k
    break;
3616
4.34k
  }
3617
4.34k
  case AArch64ISD::LD2post: {
3618
70
    if (VT == MVT::v8i8) {
3619
6
      SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
3620
6
      return;
3621
64
    } else if (VT == MVT::v16i8) {
3622
4
      SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
3623
4
      return;
3624
60
    } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1650
) {
3625
10
      SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
3626
10
      return;
3627
50
    } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1644
) {
3628
6
      SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
3629
6
      return;
3630
44
    } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3242
) {
3631
4
      SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
3632
4
      return;
3633
40
    } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3234
) {
3634
13
      SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
3635
13
      return;
3636
27
    } else if (VT == MVT::v1i64 || 
VT == MVT::v1f6425
) {
3637
4
      SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3638
4
      return;
3639
23
    } else if (VT == MVT::v2i64 || 
VT == MVT::v2f6421
) {
3640
23
      SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
3641
23
      return;
3642
23
    }
3643
0
    break;
3644
0
  }
3645
55
  case AArch64ISD::LD3post: {
3646
55
    if (VT == MVT::v8i8) {
3647
4
      SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
3648
4
      return;
3649
51
    } else if (VT == MVT::v16i8) {
3650
11
      SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
3651
11
      return;
3652
40
    } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1636
) {
3653
4
      SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
3654
4
      return;
3655
36
    } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1632
) {
3656
4
      SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
3657
4
      return;
3658
32
    } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3230
) {
3659
4
      SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
3660
4
      return;
3661
28
    } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3215
) {
3662
18
      SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
3663
18
      return;
3664
18
    } else 
if (10
VT == MVT::v1i6410
||
VT == MVT::v1f648
) {
3665
4
      SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3666
4
      return;
3667
6
    } else if (VT == MVT::v2i64 || 
VT == MVT::v2f644
) {
3668
6
      SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
3669
6
      return;
3670
6
    }
3671
0
    break;
3672
0
  }
3673
52
  case AArch64ISD::LD4post: {
3674
52
    if (VT == MVT::v8i8) {
3675
4
      SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
3676
4
      return;
3677
48
    } else if (VT == MVT::v16i8) {
3678
4
      SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
3679
4
      return;
3680
44
    } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1637
) {
3681
7
      SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
3682
7
      return;
3683
37
    } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1633
) {
3684
4
      SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
3685
4
      return;
3686
33
    } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3231
) {
3687
5
      SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
3688
5
      return;
3689
28
    } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3215
) {
3690
20
      SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
3691
20
      return;
3692
20
    } else 
if (8
VT == MVT::v1i648
||
VT == MVT::v1f646
) {
3693
4
      SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3694
4
      return;
3695
4
    } else if (VT == MVT::v2i64 || 
VT == MVT::v2f642
) {
3696
4
      SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
3697
4
      return;
3698
4
    }
3699
0
    break;
3700
0
  }
3701
24
  case AArch64ISD::LD1x2post: {
3702
24
    if (VT == MVT::v8i8) {
3703
2
      SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
3704
2
      return;
3705
22
    } else if (VT == MVT::v16i8) {
3706
2
      SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
3707
2
      return;
3708
20
    } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1618
) {
3709
2
      SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
3710
2
      return;
3711
18
    } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1616
) {
3712
2
      SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
3713
2
      return;
3714
16
    } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3214
) {
3715
4
      SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
3716
4
      return;
3717
12
    } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3210
) {
3718
4
      SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
3719
4
      return;
3720
8
    } else if (VT == MVT::v1i64 || 
VT == MVT::v1f646
) {
3721
4
      SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3722
4
      return;
3723
4
    } else if (VT == MVT::v2i64 || 
VT == MVT::v2f642
) {
3724
4
      SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
3725
4
      return;
3726
4
    }
3727
0
    break;
3728
0
  }
3729
24
  case AArch64ISD::LD1x3post: {
3730
24
    if (VT == MVT::v8i8) {
3731
2
      SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
3732
2
      return;
3733
22
    } else if (VT == MVT::v16i8) {
3734
2
      SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
3735
2
      return;
3736
20
    } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1618
) {
3737
2
      SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
3738
2
      return;
3739
18
    } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1616
) {
3740
2
      SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
3741
2
      return;
3742
16
    } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3214
) {
3743
4
      SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
3744
4
      return;
3745
12
    } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3210
) {
3746
4
      SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
3747
4
      return;
3748
8
    } else if (VT == MVT::v1i64 || 
VT == MVT::v1f646
) {
3749
4
      SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3750
4
      return;
3751
4
    } else if (VT == MVT::v2i64 || 
VT == MVT::v2f642
) {
3752
4
      SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
3753
4
      return;
3754
4
    }
3755
0
    break;
3756
0
  }
3757
24
  case AArch64ISD::LD1x4post: {
3758
24
    if (VT == MVT::v8i8) {
3759
2
      SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
3760
2
      return;
3761
22
    } else if (VT == MVT::v16i8) {
3762
2
      SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
3763
2
      return;
3764
20
    } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1618
) {
3765
2
      SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
3766
2
      return;
3767
18
    } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1616
) {
3768
2
      SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
3769
2
      return;
3770
16
    } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3214
) {
3771
4
      SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
3772
4
      return;
3773
12
    } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3210
) {
3774
4
      SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
3775
4
      return;
3776
8
    } else if (VT == MVT::v1i64 || 
VT == MVT::v1f646
) {
3777
4
      SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3778
4
      return;
3779
4
    } else if (VT == MVT::v2i64 || 
VT == MVT::v2f642
) {
3780
4
      SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
3781
4
      return;
3782
4
    }
3783
0
    break;
3784
0
  }
3785
33
  case AArch64ISD::LD1DUPpost: {
3786
33
    if (VT == MVT::v8i8) {
3787
2
      SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
3788
2
      return;
3789
31
    } else if (VT == MVT::v16i8) {
3790
2
      SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
3791
2
      return;
3792
29
    } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1627
) {
3793
2
      SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
3794
2
      return;
3795
27
    } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1624
) {
3796
3
      SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
3797
3
      return;
3798
24
    } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3222
) {
3799
10
      SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
3800
10
      return;
3801
14
    } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3212
) {
3802
6
      SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
3803
6
      return;
3804
8
    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3805
0
      SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
3806
0
      return;
3807
8
    } else if (VT == MVT::v2i64 || 
VT == MVT::v2f646
) {
3808
8
      SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
3809
8
      return;
3810
8
    }
3811
0
    break;
3812
0
  }
3813
24
  case AArch64ISD::LD2DUPpost: {
3814
24
    if (VT == MVT::v8i8) {
3815
2
      SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
3816
2
      return;
3817
22
    } else if (VT == MVT::v16i8) {
3818
2
      SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
3819
2
      return;
3820
20
    } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1618
) {
3821
2
      SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
3822
2
      return;
3823
18
    } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1616
) {
3824
2
      SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
3825
2
      return;
3826
16
    } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3214
) {
3827
4
      SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
3828
4
      return;
3829
12
    } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3210
) {
3830
4
      SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
3831
4
      return;
3832
8
    } else if (VT == MVT::v1i64 || 
VT == MVT::v1f646
) {
3833
4
      SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
3834
4
      return;
3835
4
    } else if (VT == MVT::v2i64 || 
VT == MVT::v2f642
) {
3836
4
      SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
3837
4
      return;
3838
4
    }
3839
0
    break;
3840
0
  }
3841
24
  case AArch64ISD::LD3DUPpost: {
3842
24
    if (VT == MVT::v8i8) {
3843
2
      SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
3844
2
      return;
3845
22
    } else if (VT == MVT::v16i8) {
3846
2
      SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
3847
2
      return;
3848
20
    } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1618
) {
3849
2
      SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
3850
2
      return;
3851
18
    } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1616
) {
3852
2
      SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
3853
2
      return;
3854
16
    } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3214
) {
3855
4
      SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
3856
4
      return;
3857
12
    } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3210
) {
3858
4
      SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
3859
4
      return;
3860
8
    } else if (VT == MVT::v1i64 || 
VT == MVT::v1f646
) {
3861
4
      SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
3862
4
      return;
3863
4
    } else if (VT == MVT::v2i64 || 
VT == MVT::v2f642
) {
3864
4
      SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
3865
4
      return;
3866
4
    }
3867
0
    break;
3868
0
  }
3869
24
  case AArch64ISD::LD4DUPpost: {
3870
24
    if (VT == MVT::v8i8) {
3871
2
      SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
3872
2
      return;
3873
22
    } else if (VT == MVT::v16i8) {
3874
2
      SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
3875
2
      return;
3876
20
    } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1618
) {
3877
2
      SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
3878
2
      return;
3879
18
    } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1616
) {
3880
2
      SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
3881
2
      return;
3882
16
    } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3214
) {
3883
4
      SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
3884
4
      return;
3885
12
    } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3210
) {
3886
4
      SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
3887
4
      return;
3888
8
    } else if (VT == MVT::v1i64 || 
VT == MVT::v1f646
) {
3889
4
      SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
3890
4
      return;
3891
4
    } else if (VT == MVT::v2i64 || 
VT == MVT::v2f642
) {
3892
4
      SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
3893
4
      return;
3894
4
    }
3895
0
    break;
3896
0
  }
3897
29
  case AArch64ISD::LD1LANEpost: {
3898
29
    if (VT == MVT::v16i8 || 
VT == MVT::v8i825
) {
3899
4
      SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
3900
4
      return;
3901
25
    } else if (VT == MVT::v8i16 || 
VT == MVT::v4i1621
||
VT == MVT::v4f1620
||
3902
25
               
VT == MVT::v8f1620
) {
3903
5
      SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
3904
5
      return;
3905
20
    } else if (VT == MVT::v4i32 || 
VT == MVT::v2i3210
||
VT == MVT::v4f3210
||
3906
20
               
VT == MVT::v2f325
) {
3907
15
      SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
3908
15
      return;
3909
15
    } else 
if (5
VT == MVT::v2i645
||
VT == MVT::v1i643
||
VT == MVT::v2f643
||
3910
5
               
VT == MVT::v1f640
) {
3911
5
      SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
3912
5
      return;
3913
5
    }
3914
0
    break;
3915
0
  }
3916
24
  case AArch64ISD::LD2LANEpost: {
3917
24
    if (VT == MVT::v16i8 || 
VT == MVT::v8i822
) {
3918
4
      SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
3919
4
      return;
3920
20
    } else if (VT == MVT::v8i16 || 
VT == MVT::v4i1618
||
VT == MVT::v4f1616
||
3921
20
               
VT == MVT::v8f1616
) {
3922
4
      SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
3923
4
      return;
3924
16
    } else if (VT == MVT::v4i32 || 
VT == MVT::v2i3214
||
VT == MVT::v4f3212
||
3925
16
               
VT == MVT::v2f3210
) {
3926
8
      SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
3927
8
      return;
3928
8
    } else if (VT == MVT::v2i64 || 
VT == MVT::v1i646
||
VT == MVT::v2f644
||
3929
8
               
VT == MVT::v1f642
) {
3930
8
      SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
3931
8
      return;
3932
8
    }
3933
0
    break;
3934
0
  }
3935
24
  case AArch64ISD::LD3LANEpost: {
3936
24
    if (VT == MVT::v16i8 || 
VT == MVT::v8i822
) {
3937
4
      SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
3938
4
      return;
3939
20
    } else if (VT == MVT::v8i16 || 
VT == MVT::v4i1618
||
VT == MVT::v4f1616
||
3940
20
               
VT == MVT::v8f1616
) {
3941
4
      SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
3942
4
      return;
3943
16
    } else if (VT == MVT::v4i32 || 
VT == MVT::v2i3214
||
VT == MVT::v4f3212
||
3944
16
               
VT == MVT::v2f3210
) {
3945
8
      SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
3946
8
      return;
3947
8
    } else if (VT == MVT::v2i64 || 
VT == MVT::v1i646
||
VT == MVT::v2f644
||
3948
8
               
VT == MVT::v1f642
) {
3949
8
      SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
3950
8
      return;
3951
8
    }
3952
0
    break;
3953
0
  }
3954
24
  case AArch64ISD::LD4LANEpost: {
3955
24
    if (VT == MVT::v16i8 || 
VT == MVT::v8i822
) {
3956
4
      SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
3957
4
      return;
3958
20
    } else if (VT == MVT::v8i16 || 
VT == MVT::v4i1618
||
VT == MVT::v4f1616
||
3959
20
               
VT == MVT::v8f1616
) {
3960
4
      SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
3961
4
      return;
3962
16
    } else if (VT == MVT::v4i32 || 
VT == MVT::v2i3214
||
VT == MVT::v4f3212
||
3963
16
               
VT == MVT::v2f3210
) {
3964
8
      SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
3965
8
      return;
3966
8
    } else if (VT == MVT::v2i64 || 
VT == MVT::v1i646
||
VT == MVT::v2f644
||
3967
8
               
VT == MVT::v1f642
) {
3968
8
      SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
3969
8
      return;
3970
8
    }
3971
0
    break;
3972
0
  }
3973
139
  case AArch64ISD::ST2post: {
3974
139
    VT = Node->getOperand(1).getValueType();
3975
139
    if (VT == MVT::v8i8) {
3976
10
      SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
3977
10
      return;
3978
129
    } else if (VT == MVT::v16i8) {
3979
20
      SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
3980
20
      return;
3981
109
    } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1699
) {
3982
10
      SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
3983
10
      return;
3984
99
    } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1672
) {
3985
27
      SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
3986
27
      return;
3987
72
    } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3262
) {
3988
13
      SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
3989
13
      return;
3990
59
    } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3240
) {
3991
28
      SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
3992
28
      return;
3993
31
    } else if (VT == MVT::v2i64 || 
VT == MVT::v2f6429
) {
3994
27
      SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
3995
27
      return;
3996
27
    } else 
if (4
VT == MVT::v1i644
||
VT == MVT::v1f642
) {
3997
4
      SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3998
4
      return;
3999
4
    }
4000
0
    break;
4001
0
  }
4002
69
  case AArch64ISD::ST3post: {
4003
69
    VT = Node->getOperand(1).getValueType();
4004
69
    if (VT == MVT::v8i8) {
4005
6
      SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
4006
6
      return;
4007
63
    } else if (VT == MVT::v16i8) {
4008
14
      SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
4009
14
      return;
4010
49
    } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1643
) {
4011
6
      SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
4012
6
      return;
4013
43
    } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1633
) {
4014
10
      SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
4015
10
      return;
4016
33
    } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3227
) {
4017
8
      SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
4018
8
      return;
4019
25
    } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3213
) {
4020
14
      SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
4021
14
      return;
4022
14
    } else 
if (11
VT == MVT::v2i6411
||
VT == MVT::v2f649
) {
4023
7
      SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
4024
7
      return;
4025
7
    } else 
if (4
VT == MVT::v1i644
||
VT == MVT::v1f642
) {
4026
4
      SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4027
4
      return;
4028
4
    }
4029
0
    break;
4030
0
  }
4031
72
  case AArch64ISD::ST4post: {
4032
72
    VT = Node->getOperand(1).getValueType();
4033
72
    if (VT == MVT::v8i8) {
4034
6
      SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
4035
6
      return;
4036
66
    } else if (VT == MVT::v16i8) {
4037
10
      SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
4038
10
      return;
4039
56
    } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1650
) {
4040
6
      SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
4041
6
      return;
4042
50
    } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1640
) {
4043
10
      SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
4044
10
      return;
4045
40
    } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3234
) {
4046
8
      SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
4047
8
      return;
4048
32
    } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3217
) {
4049
24
      SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
4050
24
      return;
4051
24
    } else 
if (8
VT == MVT::v2i648
||
VT == MVT::v2f646
) {
4052
4
      SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
4053
4
      return;
4054
4
    } else if (VT == MVT::v1i64 || 
VT == MVT::v1f642
) {
4055
4
      SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4056
4
      return;
4057
4
    }
4058
0
    break;
4059
0
  }
4060
24
  case AArch64ISD::ST1x2post: {
4061
24
    VT = Node->getOperand(1).getValueType();
4062
24
    if (VT == MVT::v8i8) {
4063
2
      SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
4064
2
      return;
4065
22
    } else if (VT == MVT::v16i8) {
4066
2
      SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
4067
2
      return;
4068
20
    } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1618
) {
4069
2
      SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
4070
2
      return;
4071
18
    } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1616
) {
4072
2
      SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
4073
2
      return;
4074
16
    } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3214
) {
4075
4
      SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
4076
4
      return;
4077
12
    } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3210
) {
4078
4
      SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
4079
4
      return;
4080
8
    } else if (VT == MVT::v1i64 || 
VT == MVT::v1f646
) {
4081
4
      SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
4082
4
      return;
4083
4
    } else if (VT == MVT::v2i64 || 
VT == MVT::v2f642
) {
4084
4
      SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
4085
4
      return;
4086
4
    }
4087
0
    break;
4088
0
  }
4089
24
  case AArch64ISD::ST1x3post: {
4090
24
    VT = Node->getOperand(1).getValueType();
4091
24
    if (VT == MVT::v8i8) {
4092
2
      SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
4093
2
      return;
4094
22
    } else if (VT == MVT::v16i8) {
4095
2
      SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
4096
2
      return;
4097
20
    } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1618
) {
4098
2
      SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
4099
2
      return;
4100
18
    } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1616
) {
4101
2
      SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
4102
2
      return;
4103
16
    } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3214
) {
4104
4
      SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
4105
4
      return;
4106
12
    } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3210
) {
4107
4
      SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
4108
4
      return;
4109
8
    } else if (VT == MVT::v1i64 || 
VT == MVT::v1f646
) {
4110
4
      SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4111
4
      return;
4112
4
    } else if (VT == MVT::v2i64 || 
VT == MVT::v2f642
) {
4113
4
      SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
4114
4
      return;
4115
4
    }
4116
0
    break;
4117
0
  }
4118
24
  case AArch64ISD::ST1x4post: {
4119
24
    VT = Node->getOperand(1).getValueType();
4120
24
    if (VT == MVT::v8i8) {
4121
2
      SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
4122
2
      return;
4123
22
    } else if (VT == MVT::v16i8) {
4124
2
      SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
4125
2
      return;
4126
20
    } else if (VT == MVT::v4i16 || 
VT == MVT::v4f1618
) {
4127
2
      SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
4128
2
      return;
4129
18
    } else if (VT == MVT::v8i16 || 
VT == MVT::v8f1616
) {
4130
2
      SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
4131
2
      return;
4132
16
    } else if (VT == MVT::v2i32 || 
VT == MVT::v2f3214
) {
4133
4
      SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
4134
4
      return;
4135
12
    } else if (VT == MVT::v4i32 || 
VT == MVT::v4f3210
) {
4136
4
      SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
4137
4
      return;
4138
8
    } else if (VT == MVT::v1i64 || 
VT == MVT::v1f646
) {
4139
4
      SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4140
4
      return;
4141
4
    } else if (VT == MVT::v2i64 || 
VT == MVT::v2f642
) {
4142
4
      SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
4143
4
      return;
4144
4
    }
4145
0
    break;
4146
0
  }
4147
24
  case AArch64ISD::ST2LANEpost: {
4148
24
    VT = Node->getOperand(1).getValueType();
4149
24
    if (VT == MVT::v16i8 || 
VT == MVT::v8i822
) {
4150
4
      SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
4151
4
      return;
4152
20
    } else if (VT == MVT::v8i16 || 
VT == MVT::v4i1618
||
VT == MVT::v4f1616
||
4153
20
               
VT == MVT::v8f1616
) {
4154
4
      SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
4155
4
      return;
4156
16
    } else if (VT == MVT::v4i32 || 
VT == MVT::v2i3214
||
VT == MVT::v4f3212
||
4157
16
               
VT == MVT::v2f3210
) {
4158
8
      SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
4159
8
      return;
4160
8
    } else if (VT == MVT::v2i64 || 
VT == MVT::v1i646
||
VT == MVT::v2f644
||
4161
8
               
VT == MVT::v1f642
) {
4162
8
      SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
4163
8
      return;
4164
8
    }
4165
0
    break;
4166
0
  }
4167
24
  case AArch64ISD::ST3LANEpost: {
4168
24
    VT = Node->getOperand(1).getValueType();
4169
24
    if (VT == MVT::v16i8 || 
VT == MVT::v8i822
) {
4170
4
      SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
4171
4
      return;
4172
20
    } else if (VT == MVT::v8i16 || 
VT == MVT::v4i1618
||
VT == MVT::v4f1616
||
4173
20
               
VT == MVT::v8f1616
) {
4174
4
      SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
4175
4
      return;
4176
16
    } else if (VT == MVT::v4i32 || 
VT == MVT::v2i3214
||
VT == MVT::v4f3212
||
4177
16
               
VT == MVT::v2f3210
) {
4178
8
      SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
4179
8
      return;
4180
8
    } else if (VT == MVT::v2i64 || 
VT == MVT::v1i646
||
VT == MVT::v2f644
||
4181
8
               
VT == MVT::v1f642
) {
4182
8
      SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
4183
8
      return;
4184
8
    }
4185
0
    break;
4186
0
  }
4187
24
  case AArch64ISD::ST4LANEpost: {
4188
24
    VT = Node->getOperand(1).getValueType();
4189
24
    if (VT == MVT::v16i8 || 
VT == MVT::v8i822
) {
4190
4
      SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
4191
4
      return;
4192
20
    } else if (VT == MVT::v8i16 || 
VT == MVT::v4i1618
||
VT == MVT::v4f1616
||
4193
20
               
VT == MVT::v8f1616
) {
4194
4
      SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
4195
4
      return;
4196
16
    } else if (VT == MVT::v4i32 || 
VT == MVT::v2i3214
||
VT == MVT::v4f3212
||
4197
16
               
VT == MVT::v2f3210
) {
4198
8
      SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
4199
8
      return;
4200
8
    } else if (VT == MVT::v2i64 || 
VT == MVT::v1i646
||
VT == MVT::v2f644
||
4201
8
               
VT == MVT::v1f642
) {
4202
8
      SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
4203
8
      return;
4204
8
    }
4205
0
    break;
4206
0
  }
4207
11.1M
  }
4208
11.1M
4209
11.1M
  // Select the default instruction
4210
11.1M
  SelectCode(Node);
4211
11.1M
}
4212
4213
/// createAArch64ISelDag - This pass converts a legalized DAG into a
4214
/// AArch64-specific DAG, ready for instruction scheduling.
4215
FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
4216
8.75k
                                         CodeGenOpt::Level OptLevel) {
4217
8.75k
  return new AArch64DAGToDAGISel(TM, OptLevel);
4218
8.75k
}