Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
/// \file
9
/// This file implements the targeting of the InstructionSelector class for
10
/// AArch64.
11
/// \todo This should be generated by TableGen.
12
//===----------------------------------------------------------------------===//
13
14
#include "AArch64InstrInfo.h"
15
#include "AArch64MachineFunctionInfo.h"
16
#include "AArch64RegisterBankInfo.h"
17
#include "AArch64RegisterInfo.h"
18
#include "AArch64Subtarget.h"
19
#include "AArch64TargetMachine.h"
20
#include "MCTargetDesc/AArch64AddressingModes.h"
21
#include "llvm/ADT/Optional.h"
22
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
24
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
25
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26
#include "llvm/CodeGen/GlobalISel/Utils.h"
27
#include "llvm/CodeGen/MachineBasicBlock.h"
28
#include "llvm/CodeGen/MachineConstantPool.h"
29
#include "llvm/CodeGen/MachineFunction.h"
30
#include "llvm/CodeGen/MachineInstr.h"
31
#include "llvm/CodeGen/MachineInstrBuilder.h"
32
#include "llvm/CodeGen/MachineOperand.h"
33
#include "llvm/CodeGen/MachineRegisterInfo.h"
34
#include "llvm/IR/Type.h"
35
#include "llvm/Support/Debug.h"
36
#include "llvm/Support/raw_ostream.h"
37
38
#define DEBUG_TYPE "aarch64-isel"
39
40
using namespace llvm;
41
42
namespace {
43
44
#define GET_GLOBALISEL_PREDICATE_BITSET
45
#include "AArch64GenGlobalISel.inc"
46
#undef GET_GLOBALISEL_PREDICATE_BITSET
47
48
class AArch64InstructionSelector : public InstructionSelector {
49
public:
50
  AArch64InstructionSelector(const AArch64TargetMachine &TM,
51
                             const AArch64Subtarget &STI,
52
                             const AArch64RegisterBankInfo &RBI);
53
54
  bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
55
0
  static const char *getName() { return DEBUG_TYPE; }
56
57
private:
58
  /// tblgen-erated 'select' implementation, used as the initial selector for
59
  /// the patterns that don't require complex C++.
60
  bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
61
62
  // A lowering phase that runs before any selection attempts.
63
64
  void preISelLower(MachineInstr &I) const;
65
66
  // An early selection function that runs before the selectImpl() call.
67
  bool earlySelect(MachineInstr &I) const;
68
69
  bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
70
  bool earlySelectLoad(MachineInstr &I, MachineRegisterInfo &MRI) const;
71
72
  /// Eliminate same-sized cross-bank copies into stores before selectImpl().
73
  void contractCrossBankCopyIntoStore(MachineInstr &I,
74
                                      MachineRegisterInfo &MRI) const;
75
76
  bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
77
                          MachineRegisterInfo &MRI) const;
78
  bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
79
                           MachineRegisterInfo &MRI) const;
80
81
  bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
82
                           MachineRegisterInfo &MRI) const;
83
84
  bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
85
  bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
86
87
  // Helper to generate an equivalent of scalar_to_vector into a new register,
88
  // returned via 'Dst'.
89
  MachineInstr *emitScalarToVector(unsigned EltSize,
90
                                   const TargetRegisterClass *DstRC,
91
                                   Register Scalar,
92
                                   MachineIRBuilder &MIRBuilder) const;
93
94
  /// Emit a lane insert into \p DstReg, or a new vector register if None is
95
  /// provided.
96
  ///
97
  /// The lane inserted into is defined by \p LaneIdx. The vector source
98
  /// register is given by \p SrcReg. The register containing the element is
99
  /// given by \p EltReg.
100
  MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
101
                               Register EltReg, unsigned LaneIdx,
102
                               const RegisterBank &RB,
103
                               MachineIRBuilder &MIRBuilder) const;
104
  bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
105
  bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
106
  bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
107
  bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
108
109
  void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
110
                                 SmallVectorImpl<Optional<int>> &Idxs) const;
111
  bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
112
  bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
113
  bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
114
  bool selectSplitVectorUnmerge(MachineInstr &I,
115
                                MachineRegisterInfo &MRI) const;
116
  bool selectIntrinsicWithSideEffects(MachineInstr &I,
117
                                      MachineRegisterInfo &MRI) const;
118
  bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
119
  bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
120
  bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
121
  bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
122
  bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
123
  bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
124
125
  unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
126
  MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
127
                                         MachineIRBuilder &MIRBuilder) const;
128
129
  // Emit a vector concat operation.
130
  MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
131
                                 Register Op2,
132
                                 MachineIRBuilder &MIRBuilder) const;
133
  MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
134
                                   MachineOperand &Predicate,
135
                                   MachineIRBuilder &MIRBuilder) const;
136
  MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
137
                        MachineIRBuilder &MIRBuilder) const;
138
  MachineInstr *emitTST(const Register &LHS, const Register &RHS,
139
                        MachineIRBuilder &MIRBuilder) const;
140
  MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
141
                                     const RegisterBank &DstRB, LLT ScalarTy,
142
                                     Register VecReg, unsigned LaneIdx,
143
                                     MachineIRBuilder &MIRBuilder) const;
144
145
  /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
146
  /// materialized using a FMOV instruction, then update MI and return it.
147
  /// Otherwise, do nothing and return a nullptr.
148
  MachineInstr *emitFMovForFConstant(MachineInstr &MI,
149
                                     MachineRegisterInfo &MRI) const;
150
151
  /// Emit a CSet for a compare.
152
  MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
153
                                MachineIRBuilder &MIRBuilder) const;
154
155
  // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
156
  // We use these manually instead of using the importer since it doesn't
157
  // support SDNodeXForm.
158
  ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
159
  ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
160
  ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
161
  ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
162
163
  ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
164
165
  ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
166
                                            unsigned Size) const;
167
168
190
  ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
169
190
    return selectAddrModeUnscaled(Root, 1);
170
190
  }
171
522
  ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
172
522
    return selectAddrModeUnscaled(Root, 2);
173
522
  }
174
30.8k
  ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
175
30.8k
    return selectAddrModeUnscaled(Root, 4);
176
30.8k
  }
177
13.1k
  ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
178
13.1k
    return selectAddrModeUnscaled(Root, 8);
179
13.1k
  }
180
7.46k
  ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
181
7.46k
    return selectAddrModeUnscaled(Root, 16);
182
7.46k
  }
183
184
  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
185
                                           unsigned Size) const;
186
  template <int Width>
187
829k
  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
188
829k
    return selectAddrModeIndexed(Root, Width / 8);
189
829k
  }
AArch64InstructionSelector.cpp:llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > (anonymous namespace)::AArch64InstructionSelector::selectAddrModeIndexed<128>(llvm::MachineOperand&) const
Line
Count
Source
187
44.4k
  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
188
44.4k
    return selectAddrModeIndexed(Root, Width / 8);
189
44.4k
  }
AArch64InstructionSelector.cpp:llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > (anonymous namespace)::AArch64InstructionSelector::selectAddrModeIndexed<16>(llvm::MachineOperand&) const
Line
Count
Source
187
5.08k
  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
188
5.08k
    return selectAddrModeIndexed(Root, Width / 8);
189
5.08k
  }
AArch64InstructionSelector.cpp:llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > (anonymous namespace)::AArch64InstructionSelector::selectAddrModeIndexed<32>(llvm::MachineOperand&) const
Line
Count
Source
187
448k
  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
188
448k
    return selectAddrModeIndexed(Root, Width / 8);
189
448k
  }
AArch64InstructionSelector.cpp:llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > (anonymous namespace)::AArch64InstructionSelector::selectAddrModeIndexed<64>(llvm::MachineOperand&) const
Line
Count
Source
187
314k
  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
188
314k
    return selectAddrModeIndexed(Root, Width / 8);
189
314k
  }
AArch64InstructionSelector.cpp:llvm::Optional<llvm::SmallVector<std::__1::function<void (llvm::MachineInstrBuilder&)>, 4u> > (anonymous namespace)::AArch64InstructionSelector::selectAddrModeIndexed<8>(llvm::MachineOperand&) const
Line
Count
Source
187
16.4k
  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
188
16.4k
    return selectAddrModeIndexed(Root, Width / 8);
189
16.4k
  }
190
191
  bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
192
                                     const MachineRegisterInfo &MRI) const;
193
  ComplexRendererFns
194
  selectAddrModeShiftedExtendXReg(MachineOperand &Root,
195
                                  unsigned SizeInBytes) const;
196
  ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
197
  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
198
                                       unsigned SizeInBytes) const;
199
200
  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
201
202
  // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
203
  void materializeLargeCMVal(MachineInstr &I, const Value *V,
204
                             unsigned char OpFlags) const;
205
206
  // Optimization methods.
207
  bool tryOptVectorShuffle(MachineInstr &I) const;
208
  bool tryOptVectorDup(MachineInstr &MI) const;
209
  bool tryOptSelect(MachineInstr &MI) const;
210
  MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
211
                                      MachineOperand &Predicate,
212
                                      MachineIRBuilder &MIRBuilder) const;
213
214
  const AArch64TargetMachine &TM;
215
  const AArch64Subtarget &STI;
216
  const AArch64InstrInfo &TII;
217
  const AArch64RegisterInfo &TRI;
218
  const AArch64RegisterBankInfo &RBI;
219
220
#define GET_GLOBALISEL_PREDICATES_DECL
221
#include "AArch64GenGlobalISel.inc"
222
#undef GET_GLOBALISEL_PREDICATES_DECL
223
224
// We declare the temporaries used by selectImpl() in the class to minimize the
225
// cost of constructing placeholder values.
226
#define GET_GLOBALISEL_TEMPORARIES_DECL
227
#include "AArch64GenGlobalISel.inc"
228
#undef GET_GLOBALISEL_TEMPORARIES_DECL
229
};
230
231
} // end anonymous namespace
232
233
#define GET_GLOBALISEL_IMPL
234
#include "AArch64GenGlobalISel.inc"
235
#undef GET_GLOBALISEL_IMPL
236
237
AArch64InstructionSelector::AArch64InstructionSelector(
238
    const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
239
    const AArch64RegisterBankInfo &RBI)
240
    : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
241
      TRI(*STI.getRegisterInfo()), RBI(RBI),
242
#define GET_GLOBALISEL_PREDICATES_INIT
243
#include "AArch64GenGlobalISel.inc"
244
#undef GET_GLOBALISEL_PREDICATES_INIT
245
#define GET_GLOBALISEL_TEMPORARIES_INIT
246
#include "AArch64GenGlobalISel.inc"
247
#undef GET_GLOBALISEL_TEMPORARIES_INIT
248
9.10k
{
249
9.10k
}
250
251
// FIXME: This should be target-independent, inferred from the types declared
252
// for each class in the bank.
253
static const TargetRegisterClass *
254
getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
255
                         const RegisterBankInfo &RBI,
256
584k
                         bool GetAllRegSet = false) {
257
584k
  if (RB.getID() == AArch64::GPRRegBankID) {
258
580k
    if (Ty.getSizeInBits() <= 32)
259
370k
      return GetAllRegSet ? 
&AArch64::GPR32allRegClass0
260
370k
                          : &AArch64::GPR32RegClass;
261
210k
    if (Ty.getSizeInBits() == 64)
262
210k
      return GetAllRegSet ? 
&AArch64::GPR64allRegClass0
263
210k
                          : &AArch64::GPR64RegClass;
264
0
    return nullptr;
265
0
  }
266
3.93k
267
3.93k
  if (RB.getID() == AArch64::FPRRegBankID) {
268
3.93k
    if (Ty.getSizeInBits() <= 16)
269
35
      return &AArch64::FPR16RegClass;
270
3.89k
    if (Ty.getSizeInBits() == 32)
271
1.79k
      return &AArch64::FPR32RegClass;
272
2.10k
    if (Ty.getSizeInBits() == 64)
273
1.96k
      return &AArch64::FPR64RegClass;
274
140
    if (Ty.getSizeInBits() == 128)
275
140
      return &AArch64::FPR128RegClass;
276
0
    return nullptr;
277
0
  }
278
0
279
0
  return nullptr;
280
0
}
281
282
/// Given a register bank, and size in bits, return the smallest register class
283
/// that can represent that combination.
284
static const TargetRegisterClass *
285
getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
286
7.82M
                      bool GetAllRegSet = false) {
287
7.82M
  unsigned RegBankID = RB.getID();
288
7.82M
289
7.82M
  if (RegBankID == AArch64::GPRRegBankID) {
290
7.72M
    if (SizeInBits <= 32)
291
1.48M
      return GetAllRegSet ? &AArch64::GPR32allRegClass
292
1.48M
                          : 
&AArch64::GPR32RegClass0
;
293
6.24M
    if (SizeInBits == 64)
294
6.24M
      return GetAllRegSet ? &AArch64::GPR64allRegClass
295
6.24M
                          : 
&AArch64::GPR64RegClass0
;
296
93.1k
  }
297
93.1k
298
93.1k
  if (RegBankID == AArch64::FPRRegBankID) {
299
93.1k
    switch (SizeInBits) {
300
93.1k
    default:
301
0
      return nullptr;
302
93.1k
    case 8:
303
4
      return &AArch64::FPR8RegClass;
304
93.1k
    case 16:
305
2.54k
      return &AArch64::FPR16RegClass;
306
93.1k
    case 32:
307
13.0k
      return &AArch64::FPR32RegClass;
308
93.1k
    case 64:
309
72.9k
      return &AArch64::FPR64RegClass;
310
93.1k
    case 128:
311
4.56k
      return &AArch64::FPR128RegClass;
312
0
    }
313
0
  }
314
0
315
0
  return nullptr;
316
0
}
317
318
/// Returns the correct subregister to use for a given register class.
319
static bool getSubRegForClass(const TargetRegisterClass *RC,
320
13.2k
                              const TargetRegisterInfo &TRI, unsigned &SubReg) {
321
13.2k
  switch (TRI.getRegSizeInBits(*RC)) {
322
13.2k
  case 8:
323
0
    SubReg = AArch64::bsub;
324
0
    break;
325
13.2k
  case 16:
326
1.99k
    SubReg = AArch64::hsub;
327
1.99k
    break;
328
13.2k
  case 32:
329
0
    if (RC == &AArch64::GPR32RegClass)
330
0
      SubReg = AArch64::sub_32;
331
0
    else
332
0
      SubReg = AArch64::ssub;
333
0
    break;
334
13.2k
  case 64:
335
11.2k
    SubReg = AArch64::dsub;
336
11.2k
    break;
337
13.2k
  default:
338
0
    LLVM_DEBUG(
339
0
        dbgs() << "Couldn't find appropriate subregister for register class.");
340
0
    return false;
341
13.2k
  }
342
13.2k
343
13.2k
  return true;
344
13.2k
}
345
346
/// Check whether \p I is a currently unsupported binary operation:
347
/// - it has an unsized type
348
/// - an operand is not a vreg
349
/// - all operands are not in the same bank
350
/// These are checks that should someday live in the verifier, but right now,
351
/// these are mostly limitations of the aarch64 selector.
352
static bool unsupportedBinOp(const MachineInstr &I,
353
                             const AArch64RegisterBankInfo &RBI,
354
                             const MachineRegisterInfo &MRI,
355
331k
                             const AArch64RegisterInfo &TRI) {
356
331k
  LLT Ty = MRI.getType(I.getOperand(0).getReg());
357
331k
  if (!Ty.isValid()) {
358
0
    LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
359
0
    return true;
360
0
  }
361
331k
362
331k
  const RegisterBank *PrevOpBank = nullptr;
363
993k
  for (auto &MO : I.operands()) {
364
993k
    // FIXME: Support non-register operands.
365
993k
    if (!MO.isReg()) {
366
0
      LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
367
0
      return true;
368
0
    }
369
993k
370
993k
    // FIXME: Can generic operations have physical registers operands? If
371
993k
    // so, this will need to be taught about that, and we'll need to get the
372
993k
    // bank out of the minimal class for the register.
373
993k
    // Either way, this needs to be documented (and possibly verified).
374
993k
    if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
375
0
      LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
376
0
      return true;
377
0
    }
378
993k
379
993k
    const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
380
993k
    if (!OpBank) {
381
0
      LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
382
0
      return true;
383
0
    }
384
993k
385
993k
    if (PrevOpBank && 
OpBank != PrevOpBank662k
) {
386
0
      LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
387
0
      return true;
388
0
    }
389
993k
    PrevOpBank = OpBank;
390
993k
  }
391
331k
  return false;
392
331k
}
393
394
/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
395
/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
396
/// and of size \p OpSize.
397
/// \returns \p GenericOpc if the combination is unsupported.
398
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
399
331k
                               unsigned OpSize) {
400
331k
  switch (RegBankID) {
401
331k
  case AArch64::GPRRegBankID:
402
331k
    if (OpSize == 32) {
403
5.84k
      switch (GenericOpc) {
404
5.84k
      case TargetOpcode::G_SHL:
405
1.97k
        return AArch64::LSLVWr;
406
5.84k
      case TargetOpcode::G_LSHR:
407
2.21k
        return AArch64::LSRVWr;
408
5.84k
      case TargetOpcode::G_ASHR:
409
1.66k
        return AArch64::ASRVWr;
410
5.84k
      default:
411
0
        return GenericOpc;
412
325k
      }
413
325k
    } else if (OpSize == 64) {
414
325k
      switch (GenericOpc) {
415
325k
      case TargetOpcode::G_GEP:
416
325k
        return AArch64::ADDXrr;
417
325k
      case TargetOpcode::G_SHL:
418
0
        return AArch64::LSLVXr;
419
325k
      case TargetOpcode::G_LSHR:
420
0
        return AArch64::LSRVXr;
421
325k
      case TargetOpcode::G_ASHR:
422
0
        return AArch64::ASRVXr;
423
325k
      default:
424
0
        return GenericOpc;
425
0
      }
426
0
    }
427
0
    break;
428
7
  case AArch64::FPRRegBankID:
429
7
    switch (OpSize) {
430
7
    case 32:
431
0
      switch (GenericOpc) {
432
0
      case TargetOpcode::G_FADD:
433
0
        return AArch64::FADDSrr;
434
0
      case TargetOpcode::G_FSUB:
435
0
        return AArch64::FSUBSrr;
436
0
      case TargetOpcode::G_FMUL:
437
0
        return AArch64::FMULSrr;
438
0
      case TargetOpcode::G_FDIV:
439
0
        return AArch64::FDIVSrr;
440
0
      default:
441
0
        return GenericOpc;
442
0
      }
443
1
    case 64:
444
1
      switch (GenericOpc) {
445
1
      case TargetOpcode::G_FADD:
446
0
        return AArch64::FADDDrr;
447
1
      case TargetOpcode::G_FSUB:
448
0
        return AArch64::FSUBDrr;
449
1
      case TargetOpcode::G_FMUL:
450
0
        return AArch64::FMULDrr;
451
1
      case TargetOpcode::G_FDIV:
452
0
        return AArch64::FDIVDrr;
453
1
      case TargetOpcode::G_OR:
454
0
        return AArch64::ORRv8i8;
455
1
      default:
456
1
        return GenericOpc;
457
6
      }
458
6
    }
459
6
    break;
460
6
  }
461
6
  return GenericOpc;
462
6
}
463
464
/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
465
/// appropriate for the (value) register bank \p RegBankID and of memory access
466
/// size \p OpSize.  This returns the variant with the base+unsigned-immediate
467
/// addressing mode (e.g., LDRXui).
468
/// \returns \p GenericOpc if the combination is unsupported.
469
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
470
785k
                                    unsigned OpSize) {
471
785k
  const bool isStore = GenericOpc == TargetOpcode::G_STORE;
472
785k
  switch (RegBankID) {
473
785k
  case AArch64::GPRRegBankID:
474
785k
    switch (OpSize) {
475
785k
    case 8:
476
176k
      return isStore ? 
AArch64::STRBBui110k
:
AArch64::LDRBBui65.7k
;
477
785k
    case 16:
478
73.4k
      return isStore ? 
AArch64::STRHHui63.4k
:
AArch64::LDRHHui9.96k
;
479
785k
    case 32:
480
8.44k
      return isStore ? 
AArch64::STRWui0
: AArch64::LDRWui;
481
785k
    case 64:
482
527k
      return isStore ? 
AArch64::STRXui145k
:
AArch64::LDRXui381k
;
483
0
    }
484
0
    break;
485
3
  case AArch64::FPRRegBankID:
486
3
    switch (OpSize) {
487
3
    case 8:
488
3
      return isStore ? 
AArch64::STRBui0
: AArch64::LDRBui;
489
3
    case 16:
490
0
      return isStore ? AArch64::STRHui : AArch64::LDRHui;
491
3
    case 32:
492
0
      return isStore ? AArch64::STRSui : AArch64::LDRSui;
493
3
    case 64:
494
0
      return isStore ? AArch64::STRDui : AArch64::LDRDui;
495
0
    }
496
0
    break;
497
0
  }
498
0
  return GenericOpc;
499
0
}
500
501
#ifndef NDEBUG
502
/// Helper function that verifies that we have a valid copy at the end of
503
/// selectCopy. Verifies that the source and dest have the expected sizes and
504
/// then returns true.
505
static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
506
                        const MachineRegisterInfo &MRI,
507
                        const TargetRegisterInfo &TRI,
508
                        const RegisterBankInfo &RBI) {
509
  const unsigned DstReg = I.getOperand(0).getReg();
510
  const unsigned SrcReg = I.getOperand(1).getReg();
511
  const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
512
  const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
513
514
  // Make sure the size of the source and dest line up.
515
  assert(
516
      (DstSize == SrcSize ||
517
       // Copies are a mean to setup initial types, the number of
518
       // bits may not exactly match.
519
       (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
520
       // Copies are a mean to copy bits around, as long as we are
521
       // on the same register class, that's fine. Otherwise, that
522
       // means we need some SUBREG_TO_REG or AND & co.
523
       (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
524
      "Copy with different width?!");
525
526
  // Check the size of the destination.
527
  assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
528
         "GPRs cannot get more than 64-bit width values");
529
530
  return true;
531
}
532
#endif
533
534
/// Helper function for selectCopy. Inserts a subregister copy from
535
/// \p *From to \p *To, linking it up to \p I.
536
///
537
/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
538
///
539
/// CopyReg (From class) = COPY SrcReg
540
/// SubRegCopy (To class) = COPY CopyReg:SubReg
541
/// Dst = COPY SubRegCopy
542
static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
543
                                  const RegisterBankInfo &RBI, unsigned SrcReg,
544
                                  const TargetRegisterClass *From,
545
                                  const TargetRegisterClass *To,
546
1.99k
                                  unsigned SubReg) {
547
1.99k
  MachineIRBuilder MIB(I);
548
1.99k
  auto Copy = MIB.buildCopy({From}, {SrcReg});
549
1.99k
  auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
550
1.99k
                        .addReg(Copy.getReg(0), 0, SubReg);
551
1.99k
  MachineOperand &RegOp = I.getOperand(1);
552
1.99k
  RegOp.setReg(SubRegCopy.getReg(0));
553
1.99k
554
1.99k
  // It's possible that the destination register won't be constrained. Make
555
1.99k
  // sure that happens.
556
1.99k
  if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
557
1.97k
    RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
558
1.99k
559
1.99k
  return true;
560
1.99k
}
561
562
/// Helper function to get the source and destination register classes for a
563
/// copy. Returns a std::pair containing the source register class for the
564
/// copy, and the destination register class for the copy. If a register class
565
/// cannot be determined, then it will be nullptr.
566
static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
567
getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
568
                     MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
569
3.90M
                     const RegisterBankInfo &RBI) {
570
3.90M
  unsigned DstReg = I.getOperand(0).getReg();
571
3.90M
  unsigned SrcReg = I.getOperand(1).getReg();
572
3.90M
  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
573
3.90M
  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
574
3.90M
  unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
575
3.90M
  unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
576
3.90M
577
3.90M
  // Special casing for cross-bank copies of s1s. We can technically represent
578
3.90M
  // a 1-bit value with any size of register. The minimum size for a GPR is 32
579
3.90M
  // bits. So, we need to put the FPR on 32 bits as well.
580
3.90M
  //
581
3.90M
  // FIXME: I'm not sure if this case holds true outside of copies. If it does,
582
3.90M
  // then we can pull it into the helpers that get the appropriate class for a
583
3.90M
  // register bank. Or make a new helper that carries along some constraint
584
3.90M
  // information.
585
3.90M
  if (SrcRegBank != DstRegBank && 
(6.55k
DstSize == 16.55k
&&
SrcSize == 17
))
586
7
    SrcSize = DstSize = 32;
587
3.90M
588
3.90M
  return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
589
3.90M
          getMinClassForRegBank(DstRegBank, DstSize, true)};
590
3.90M
}
591
592
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
593
                       MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
594
3.90M
                       const RegisterBankInfo &RBI) {
595
3.90M
596
3.90M
  unsigned DstReg = I.getOperand(0).getReg();
597
3.90M
  unsigned SrcReg = I.getOperand(1).getReg();
598
3.90M
  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
599
3.90M
  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
600
3.90M
601
3.90M
  // Find the correct register classes for the source and destination registers.
602
3.90M
  const TargetRegisterClass *SrcRC;
603
3.90M
  const TargetRegisterClass *DstRC;
604
3.90M
  std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
605
3.90M
606
3.90M
  if (!DstRC) {
607
0
    LLVM_DEBUG(dbgs() << "Unexpected dest size "
608
0
                      << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
609
0
    return false;
610
0
  }
611
3.90M
612
3.90M
  // A couple helpers below, for making sure that the copy we produce is valid.
613
3.90M
614
3.90M
  // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
615
3.90M
  // to verify that the src and dst are the same size, since that's handled by
616
3.90M
  // the SUBREG_TO_REG.
617
3.90M
  bool KnownValid = false;
618
3.90M
619
3.90M
  // Returns true, or asserts if something we don't expect happens. Instead of
620
3.90M
  // returning true, we return isValidCopy() to ensure that we verify the
621
3.90M
  // result.
622
3.90M
  auto CheckCopy = [&]() {
623
3.90M
    // If we have a bitcast or something, we can't have physical registers.
624
3.90M
    assert(
625
3.90M
        (I.isCopy() ||
626
3.90M
         (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
627
3.90M
          !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
628
3.90M
        "No phys reg on generic operator!");
629
3.90M
    assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
630
3.90M
    (void)KnownValid;
631
3.90M
    return true;
632
3.90M
  };
633
3.90M
634
3.90M
  // Is this a copy? If so, then we may need to insert a subregister copy, or
635
3.90M
  // a SUBREG_TO_REG.
636
3.90M
  if (I.isCopy()) {
637
3.78M
    // Yes. Check if there's anything to fix up.
638
3.78M
    if (!SrcRC) {
639
0
      LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
640
0
      return false;
641
0
    }
642
3.78M
643
3.78M
    // Is this a cross-bank copy?
644
3.78M
    if (DstRegBank.getID() != SrcRegBank.getID()) {
645
6.55k
      // If we're doing a cross-bank copy on different-sized registers, we need
646
6.55k
      // to do a bit more work.
647
6.55k
      unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
648
6.55k
      unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
649
6.55k
650
6.55k
      if (SrcSize > DstSize) {
651
1.99k
        // We're doing a cross-bank copy into a smaller register. We need a
652
1.99k
        // subregister copy. First, get a register class that's on the same bank
653
1.99k
        // as the destination, but the same size as the source.
654
1.99k
        const TargetRegisterClass *SubregRC =
655
1.99k
            getMinClassForRegBank(DstRegBank, SrcSize, true);
656
1.99k
        assert(SubregRC && "Didn't get a register class for subreg?");
657
1.99k
658
1.99k
        // Get the appropriate subregister for the destination.
659
1.99k
        unsigned SubReg = 0;
660
1.99k
        if (!getSubRegForClass(DstRC, TRI, SubReg)) {
661
0
          LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
662
0
          return false;
663
0
        }
664
1.99k
665
1.99k
        // Now, insert a subregister copy using the new register class.
666
1.99k
        selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
667
1.99k
        return CheckCopy();
668
1.99k
      }
669
4.55k
670
4.55k
      else if (DstRegBank.getID() == AArch64::GPRRegBankID && 
DstSize == 321.61k
&&
671
4.55k
               
SrcSize == 16414
) {
672
88
        // Special case for FPR16 to GPR32.
673
88
        // FIXME: This can probably be generalized like the above case.
674
88
        unsigned PromoteReg =
675
88
            MRI.createVirtualRegister(&AArch64::FPR32RegClass);
676
88
        BuildMI(*I.getParent(), I, I.getDebugLoc(),
677
88
                TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
678
88
            .addImm(0)
679
88
            .addUse(SrcReg)
680
88
            .addImm(AArch64::hsub);
681
88
        MachineOperand &RegOp = I.getOperand(1);
682
88
        RegOp.setReg(PromoteReg);
683
88
684
88
        // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
685
88
        KnownValid = true;
686
88
      }
687
6.55k
    }
688
3.78M
689
3.78M
    // If the destination is a physical register, then there's nothing to
690
3.78M
    // change, so we're done.
691
3.78M
    
if (3.78M
TargetRegisterInfo::isPhysicalRegister(DstReg)3.78M
)
692
2.52M
      return CheckCopy();
693
1.37M
  }
694
1.37M
695
1.37M
  // No need to constrain SrcReg. It will get constrained when we hit another
696
1.37M
  // of its use or its defs. Copies do not have constraints.
697
1.37M
  if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
698
0
    LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
699
0
                      << " operand\n");
700
0
    return false;
701
0
  }
702
1.37M
  I.setDesc(TII.get(AArch64::COPY));
703
1.37M
  return CheckCopy();
704
1.37M
}
705
706
313
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
707
313
  if (!DstTy.isScalar() || 
!SrcTy.isScalar()0
)
708
313
    return GenericOpc;
709
0
710
0
  const unsigned DstSize = DstTy.getSizeInBits();
711
0
  const unsigned SrcSize = SrcTy.getSizeInBits();
712
0
713
0
  switch (DstSize) {
714
0
  case 32:
715
0
    switch (SrcSize) {
716
0
    case 32:
717
0
      switch (GenericOpc) {
718
0
      case TargetOpcode::G_SITOFP:
719
0
        return AArch64::SCVTFUWSri;
720
0
      case TargetOpcode::G_UITOFP:
721
0
        return AArch64::UCVTFUWSri;
722
0
      case TargetOpcode::G_FPTOSI:
723
0
        return AArch64::FCVTZSUWSr;
724
0
      case TargetOpcode::G_FPTOUI:
725
0
        return AArch64::FCVTZUUWSr;
726
0
      default:
727
0
        return GenericOpc;
728
0
      }
729
0
    case 64:
730
0
      switch (GenericOpc) {
731
0
      case TargetOpcode::G_SITOFP:
732
0
        return AArch64::SCVTFUXSri;
733
0
      case TargetOpcode::G_UITOFP:
734
0
        return AArch64::UCVTFUXSri;
735
0
      case TargetOpcode::G_FPTOSI:
736
0
        return AArch64::FCVTZSUWDr;
737
0
      case TargetOpcode::G_FPTOUI:
738
0
        return AArch64::FCVTZUUWDr;
739
0
      default:
740
0
        return GenericOpc;
741
0
      }
742
0
    default:
743
0
      return GenericOpc;
744
0
    }
745
0
  case 64:
746
0
    switch (SrcSize) {
747
0
    case 32:
748
0
      switch (GenericOpc) {
749
0
      case TargetOpcode::G_SITOFP:
750
0
        return AArch64::SCVTFUWDri;
751
0
      case TargetOpcode::G_UITOFP:
752
0
        return AArch64::UCVTFUWDri;
753
0
      case TargetOpcode::G_FPTOSI:
754
0
        return AArch64::FCVTZSUXSr;
755
0
      case TargetOpcode::G_FPTOUI:
756
0
        return AArch64::FCVTZUUXSr;
757
0
      default:
758
0
        return GenericOpc;
759
0
      }
760
0
    case 64:
761
0
      switch (GenericOpc) {
762
0
      case TargetOpcode::G_SITOFP:
763
0
        return AArch64::SCVTFUXDri;
764
0
      case TargetOpcode::G_UITOFP:
765
0
        return AArch64::UCVTFUXDri;
766
0
      case TargetOpcode::G_FPTOSI:
767
0
        return AArch64::FCVTZSUXDr;
768
0
      case TargetOpcode::G_FPTOUI:
769
0
        return AArch64::FCVTZUUXDr;
770
0
      default:
771
0
        return GenericOpc;
772
0
      }
773
0
    default:
774
0
      return GenericOpc;
775
0
    }
776
0
  default:
777
0
    return GenericOpc;
778
0
  };
779
0
  return GenericOpc;
780
0
}
781
782
static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
783
64.3k
                                const RegisterBankInfo &RBI) {
784
64.3k
  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
785
64.3k
  bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
786
64.3k
               AArch64::GPRRegBankID);
787
64.3k
  LLT Ty = MRI.getType(I.getOperand(0).getReg());
788
64.3k
  if (Ty == LLT::scalar(32))
789
41.3k
    return IsFP ? 
AArch64::FCSELSrrr763
:
AArch64::CSELWr40.6k
;
790
22.9k
  else if (Ty == LLT::scalar(64) || 
Ty == LLT::pointer(0, 64)7.60k
)
791
22.9k
    return IsFP ? 
AArch64::FCSELDrrr1.02k
:
AArch64::CSELXr21.8k
;
792
0
  return 0;
793
0
}
794
795
/// Helper function to select the opcode for a G_FCMP.
796
15.9k
static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
797
15.9k
  // If this is a compare against +0.0, then we don't have to explicitly
798
15.9k
  // materialize a constant.
799
15.9k
  const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
800
15.9k
  bool ShouldUseImm = FPImm && 
(11.3k
FPImm->isZero()11.3k
&&
!FPImm->isNegative()1.67k
);
801
15.9k
  unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
802
15.9k
  if (OpSize != 32 && 
OpSize != 6411.3k
)
803
0
    return 0;
804
15.9k
  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
805
15.9k
                              {AArch64::FCMPSri, AArch64::FCMPDri}};
806
15.9k
  return CmpOpcTbl[ShouldUseImm][OpSize == 64];
807
15.9k
}
808
809
/// Returns true if \p P is an unsigned integer comparison predicate.
810
435k
static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
811
435k
  switch (P) {
812
435k
  default:
813
359k
    return false;
814
435k
  case CmpInst::ICMP_UGT:
815
76.2k
  case CmpInst::ICMP_UGE:
816
76.2k
  case CmpInst::ICMP_ULT:
817
76.2k
  case CmpInst::ICMP_ULE:
818
76.2k
    return true;
819
435k
  }
820
435k
}
821
822
871k
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
823
871k
  switch (P) {
824
871k
  default:
825
0
    llvm_unreachable("Unknown condition code!");
826
871k
  case CmpInst::ICMP_NE:
827
205k
    return AArch64CC::NE;
828
871k
  case CmpInst::ICMP_EQ:
829
228k
    return AArch64CC::EQ;
830
871k
  case CmpInst::ICMP_SGT:
831
94.4k
    return AArch64CC::GT;
832
871k
  case CmpInst::ICMP_SGE:
833
34.5k
    return AArch64CC::GE;
834
871k
  case CmpInst::ICMP_SLT:
835
80.3k
    return AArch64CC::LT;
836
871k
  case CmpInst::ICMP_SLE:
837
75.4k
    return AArch64CC::LE;
838
871k
  case CmpInst::ICMP_UGT:
839
28.0k
    return AArch64CC::HI;
840
871k
  case CmpInst::ICMP_UGE:
841
38.8k
    return AArch64CC::HS;
842
871k
  case CmpInst::ICMP_ULT:
843
65.9k
    return AArch64CC::LO;
844
871k
  case CmpInst::ICMP_ULE:
845
19.7k
    return AArch64CC::LS;
846
871k
  }
847
871k
}
848
849
static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
850
                                      AArch64CC::CondCode &CondCode,
851
15.9k
                                      AArch64CC::CondCode &CondCode2) {
852
15.9k
  CondCode2 = AArch64CC::AL;
853
15.9k
  switch (P) {
854
15.9k
  default:
855
0
    llvm_unreachable("Unknown FP condition!");
856
15.9k
  case CmpInst::FCMP_OEQ:
857
1.75k
    CondCode = AArch64CC::EQ;
858
1.75k
    break;
859
15.9k
  case CmpInst::FCMP_OGT:
860
3.29k
    CondCode = AArch64CC::GT;
861
3.29k
    break;
862
15.9k
  case CmpInst::FCMP_OGE:
863
137
    CondCode = AArch64CC::GE;
864
137
    break;
865
15.9k
  case CmpInst::FCMP_OLT:
866
6.67k
    CondCode = AArch64CC::MI;
867
6.67k
    break;
868
15.9k
  case CmpInst::FCMP_OLE:
869
41
    CondCode = AArch64CC::LS;
870
41
    break;
871
15.9k
  case CmpInst::FCMP_ONE:
872
15
    CondCode = AArch64CC::MI;
873
15
    CondCode2 = AArch64CC::GT;
874
15
    break;
875
15.9k
  case CmpInst::FCMP_ORD:
876
23
    CondCode = AArch64CC::VC;
877
23
    break;
878
15.9k
  case CmpInst::FCMP_UNO:
879
53
    CondCode = AArch64CC::VS;
880
53
    break;
881
15.9k
  case CmpInst::FCMP_UEQ:
882
13
    CondCode = AArch64CC::EQ;
883
13
    CondCode2 = AArch64CC::VS;
884
13
    break;
885
15.9k
  case CmpInst::FCMP_UGT:
886
272
    CondCode = AArch64CC::HI;
887
272
    break;
888
15.9k
  case CmpInst::FCMP_UGE:
889
50
    CondCode = AArch64CC::PL;
890
50
    break;
891
15.9k
  case CmpInst::FCMP_ULT:
892
3.07k
    CondCode = AArch64CC::LT;
893
3.07k
    break;
894
15.9k
  case CmpInst::FCMP_ULE:
895
48
    CondCode = AArch64CC::LE;
896
48
    break;
897
15.9k
  case CmpInst::FCMP_UNE:
898
458
    CondCode = AArch64CC::NE;
899
458
    break;
900
15.9k
  }
901
15.9k
}
902
903
bool AArch64InstructionSelector::selectCompareBranch(
904
686k
    MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
905
686k
906
686k
  const Register CondReg = I.getOperand(0).getReg();
907
686k
  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
908
686k
  MachineInstr *CCMI = MRI.getVRegDef(CondReg);
909
686k
  if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
910
686k
    CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
911
686k
  if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
912
73.6k
    return false;
913
613k
914
613k
  Register LHS = CCMI->getOperand(2).getReg();
915
613k
  Register RHS = CCMI->getOperand(3).getReg();
916
613k
  auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
917
613k
  if (!VRegAndVal)
918
157k
    std::swap(RHS, LHS);
919
613k
920
613k
  VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
921
613k
  if (!VRegAndVal || 
VRegAndVal->Value != 0456k
) {
922
245k
    MachineIRBuilder MIB(I);
923
245k
    // If we can't select a CBZ then emit a cmp + Bcc.
924
245k
    if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
925
245k
                            CCMI->getOperand(1), MIB))
926
0
      return false;
927
245k
    const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
928
245k
        (CmpInst::Predicate)CCMI->getOperand(1).getPredicate());
929
245k
    MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
930
245k
    I.eraseFromParent();
931
245k
    return true;
932
245k
  }
933
367k
934
367k
  const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
935
367k
  if (RB.getID() != AArch64::GPRRegBankID)
936
0
    return false;
937
367k
938
367k
  const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
939
367k
  if (Pred != CmpInst::ICMP_NE && 
Pred != CmpInst::ICMP_EQ234k
)
940
45.7k
    return false;
941
322k
942
322k
  const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
943
322k
  unsigned CBOpc = 0;
944
322k
  if (CmpWidth <= 32)
945
108k
    CBOpc = (Pred == CmpInst::ICMP_EQ ? 
AArch64::CBZW63.8k
:
AArch64::CBNZW44.7k
);
946
213k
  else if (CmpWidth == 64)
947
213k
    CBOpc = (Pred == CmpInst::ICMP_EQ ? 
AArch64::CBZX125k
:
AArch64::CBNZX88.1k
);
948
0
  else
949
0
    return false;
950
322k
951
322k
  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
952
322k
      .addUse(LHS)
953
322k
      .addMBB(DestMBB)
954
322k
      .constrainAllUses(TII, TRI, RBI);
955
322k
956
322k
  I.eraseFromParent();
957
322k
  return true;
958
322k
}
959
960
bool AArch64InstructionSelector::selectVectorSHL(
961
111
    MachineInstr &I, MachineRegisterInfo &MRI) const {
962
111
  assert(I.getOpcode() == TargetOpcode::G_SHL);
963
111
  Register DstReg = I.getOperand(0).getReg();
964
111
  const LLT Ty = MRI.getType(DstReg);
965
111
  Register Src1Reg = I.getOperand(1).getReg();
966
111
  Register Src2Reg = I.getOperand(2).getReg();
967
111
968
111
  if (!Ty.isVector())
969
0
    return false;
970
111
971
111
  unsigned Opc = 0;
972
111
  if (Ty == LLT::vector(4, 32)) {
973
75
    Opc = AArch64::USHLv4i32;
974
75
  } else 
if (36
Ty == LLT::vector(2, 32)36
) {
975
16
    Opc = AArch64::USHLv2i32;
976
20
  } else {
977
20
    LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
978
20
    return false;
979
20
  }
980
91
981
91
  MachineIRBuilder MIB(I);
982
91
  auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
983
91
  constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
984
91
  I.eraseFromParent();
985
91
  return true;
986
91
}
987
988
bool AArch64InstructionSelector::selectVectorASHR(
989
30
    MachineInstr &I, MachineRegisterInfo &MRI) const {
990
30
  assert(I.getOpcode() == TargetOpcode::G_ASHR);
991
30
  Register DstReg = I.getOperand(0).getReg();
992
30
  const LLT Ty = MRI.getType(DstReg);
993
30
  Register Src1Reg = I.getOperand(1).getReg();
994
30
  Register Src2Reg = I.getOperand(2).getReg();
995
30
996
30
  if (!Ty.isVector())
997
0
    return false;
998
30
999
30
  // There is not a shift right register instruction, but the shift left
1000
30
  // register instruction takes a signed value, where negative numbers specify a
1001
30
  // right shift.
1002
30
1003
30
  unsigned Opc = 0;
1004
30
  unsigned NegOpc = 0;
1005
30
  const TargetRegisterClass *RC = nullptr;
1006
30
  if (Ty == LLT::vector(4, 32)) {
1007
19
    Opc = AArch64::SSHLv4i32;
1008
19
    NegOpc = AArch64::NEGv4i32;
1009
19
    RC = &AArch64::FPR128RegClass;
1010
19
  } else 
if (11
Ty == LLT::vector(2, 32)11
) {
1011
11
    Opc = AArch64::SSHLv2i32;
1012
11
    NegOpc = AArch64::NEGv2i32;
1013
11
    RC = &AArch64::FPR64RegClass;
1014
11
  } else {
1015
0
    LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1016
0
    return false;
1017
0
  }
1018
30
1019
30
  MachineIRBuilder MIB(I);
1020
30
  auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1021
30
  constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1022
30
  auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1023
30
  constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1024
30
  I.eraseFromParent();
1025
30
  return true;
1026
30
}
1027
1028
bool AArch64InstructionSelector::selectVaStartAAPCS(
1029
0
    MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1030
0
  return false;
1031
0
}
1032
1033
bool AArch64InstructionSelector::selectVaStartDarwin(
1034
63
    MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1035
63
  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1036
63
  Register ListReg = I.getOperand(0).getReg();
1037
63
1038
63
  Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1039
63
1040
63
  auto MIB =
1041
63
      BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1042
63
          .addDef(ArgsAddrReg)
1043
63
          .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1044
63
          .addImm(0)
1045
63
          .addImm(0);
1046
63
1047
63
  constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1048
63
1049
63
  MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1050
63
            .addUse(ArgsAddrReg)
1051
63
            .addUse(ListReg)
1052
63
            .addImm(0)
1053
63
            .addMemOperand(*I.memoperands_begin());
1054
63
1055
63
  constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1056
63
  I.eraseFromParent();
1057
63
  return true;
1058
63
}
1059
1060
void AArch64InstructionSelector::materializeLargeCMVal(
1061
4
    MachineInstr &I, const Value *V, unsigned char OpFlags) const {
1062
4
  MachineBasicBlock &MBB = *I.getParent();
1063
4
  MachineFunction &MF = *MBB.getParent();
1064
4
  MachineRegisterInfo &MRI = MF.getRegInfo();
1065
4
  MachineIRBuilder MIB(I);
1066
4
1067
4
  auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1068
4
  MovZ->addOperand(MF, I.getOperand(1));
1069
4
  MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1070
4
                                     AArch64II::MO_NC);
1071
4
  MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1072
4
  constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1073
4
1074
4
  auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1075
12
                       Register ForceDstReg) {
1076
12
    Register DstReg = ForceDstReg
1077
12
                          ? 
ForceDstReg4
1078
12
                          : 
MRI.createVirtualRegister(&AArch64::GPR64RegClass)8
;
1079
12
    auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1080
12
    if (auto *GV = dyn_cast<GlobalValue>(V)) {
1081
9
      MovI->addOperand(MF, MachineOperand::CreateGA(
1082
9
                               GV, MovZ->getOperand(1).getOffset(), Flags));
1083
9
    } else {
1084
3
      MovI->addOperand(
1085
3
          MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1086
3
                                       MovZ->getOperand(1).getOffset(), Flags));
1087
3
    }
1088
12
    MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1089
12
    constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1090
12
    return DstReg;
1091
12
  };
1092
4
  Register DstReg = BuildMovK(MovZ.getReg(0),
1093
4
                              AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1094
4
  DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1095
4
  BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1096
4
  return;
1097
4
}
1098
1099
6.02M
void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
1100
6.02M
  MachineBasicBlock &MBB = *I.getParent();
1101
6.02M
  MachineFunction &MF = *MBB.getParent();
1102
6.02M
  MachineRegisterInfo &MRI = MF.getRegInfo();
1103
6.02M
1104
6.02M
  switch (I.getOpcode()) {
1105
6.02M
  case TargetOpcode::G_SHL:
1106
109k
  case TargetOpcode::G_ASHR:
1107
109k
  case TargetOpcode::G_LSHR: {
1108
109k
    // These shifts are legalized to have 64 bit shift amounts because we want
1109
109k
    // to take advantage of the existing imported selection patterns that assume
1110
109k
    // the immediates are s64s. However, if the shifted type is 32 bits and for
1111
109k
    // some reason we receive input GMIR that has an s64 shift amount that's not
1112
109k
    // a G_CONSTANT, insert a truncate so that we can still select the s32
1113
109k
    // register-register variant.
1114
109k
    unsigned SrcReg = I.getOperand(1).getReg();
1115
109k
    unsigned ShiftReg = I.getOperand(2).getReg();
1116
109k
    const LLT ShiftTy = MRI.getType(ShiftReg);
1117
109k
    const LLT SrcTy = MRI.getType(SrcReg);
1118
109k
    if (SrcTy.isVector())
1119
148
      return;
1120
109k
    assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1121
109k
    if (SrcTy.getSizeInBits() != 32 || 
ShiftTy.getSizeInBits() != 6446.6k
)
1122
91.4k
      return;
1123
18.0k
    auto *AmtMI = MRI.getVRegDef(ShiftReg);
1124
18.0k
    assert(AmtMI && "could not find a vreg definition for shift amount");
1125
18.0k
    if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1126
1.82k
      // Insert a subregister copy to implement a 64->32 trunc
1127
1.82k
      MachineIRBuilder MIB(I);
1128
1.82k
      auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1129
1.82k
                       .addReg(ShiftReg, 0, AArch64::sub_32);
1130
1.82k
      MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1131
1.82k
      I.getOperand(2).setReg(Trunc.getReg(0));
1132
1.82k
    }
1133
18.0k
    return;
1134
18.0k
  }
1135
664k
  case TargetOpcode::G_STORE:
1136
664k
    contractCrossBankCopyIntoStore(I, MRI);
1137
664k
    return;
1138
5.24M
  default:
1139
5.24M
    return;
1140
6.02M
  }
1141
6.02M
}
1142
1143
bool AArch64InstructionSelector::earlySelectSHL(
1144
47.5k
    MachineInstr &I, MachineRegisterInfo &MRI) const {
1145
47.5k
  // We try to match the immediate variant of LSL, which is actually an alias
1146
47.5k
  // for a special case of UBFM. Otherwise, we fall back to the imported
1147
47.5k
  // selector which will match the register variant.
1148
47.5k
  assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1149
47.5k
  const auto &MO = I.getOperand(2);
1150
47.5k
  auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1151
47.5k
  if (!VRegAndVal)
1152
3.96k
    return false;
1153
43.6k
1154
43.6k
  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1155
43.6k
  if (DstTy.isVector())
1156
0
    return false;
1157
43.6k
  bool Is64Bit = DstTy.getSizeInBits() == 64;
1158
43.6k
  auto Imm1Fn = Is64Bit ? 
selectShiftA_64(MO)18.9k
:
selectShiftA_32(MO)24.6k
;
1159
43.6k
  auto Imm2Fn = Is64Bit ? 
selectShiftB_64(MO)18.9k
:
selectShiftB_32(MO)24.6k
;
1160
43.6k
  MachineIRBuilder MIB(I);
1161
43.6k
1162
43.6k
  if (!Imm1Fn || !Imm2Fn)
1163
0
    return false;
1164
43.6k
1165
43.6k
  auto NewI =
1166
43.6k
      MIB.buildInstr(Is64Bit ? 
AArch64::UBFMXri18.9k
:
AArch64::UBFMWri24.6k
,
1167
43.6k
                     {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1168
43.6k
1169
43.6k
  for (auto &RenderFn : *Imm1Fn)
1170
43.6k
    RenderFn(NewI);
1171
43.6k
  for (auto &RenderFn : *Imm2Fn)
1172
43.6k
    RenderFn(NewI);
1173
43.6k
1174
43.6k
  I.eraseFromParent();
1175
43.6k
  return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1176
43.6k
}
1177
1178
void AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1179
664k
    MachineInstr &I, MachineRegisterInfo &MRI) const {
1180
664k
  assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
1181
664k
  // If we're storing a scalar, it doesn't matter what register bank that
1182
664k
  // scalar is on. All that matters is the size.
1183
664k
  //
1184
664k
  // So, if we see something like this (with a 32-bit scalar as an example):
1185
664k
  //
1186
664k
  // %x:gpr(s32) = ... something ...
1187
664k
  // %y:fpr(s32) = COPY %x:gpr(s32)
1188
664k
  // G_STORE %y:fpr(s32)
1189
664k
  //
1190
664k
  // We can fix this up into something like this:
1191
664k
  //
1192
664k
  // G_STORE %x:gpr(s32)
1193
664k
  //
1194
664k
  // And then continue the selection process normally.
1195
664k
  MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI);
1196
664k
  if (!Def)
1197
0
    return;
1198
664k
  Register DefDstReg = Def->getOperand(0).getReg();
1199
664k
  LLT DefDstTy = MRI.getType(DefDstReg);
1200
664k
  Register StoreSrcReg = I.getOperand(0).getReg();
1201
664k
  LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1202
664k
1203
664k
  // If we get something strange like a physical register, then we shouldn't
1204
664k
  // go any further.
1205
664k
  if (!DefDstTy.isValid())
1206
0
    return;
1207
664k
1208
664k
  // Are the source and dst types the same size?
1209
664k
  if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1210
0
    return;
1211
664k
1212
664k
  if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1213
664k
      RBI.getRegBank(DefDstReg, MRI, TRI))
1214
662k
    return;
1215
1.72k
1216
1.72k
  // We have a cross-bank copy, which is entering a store. Let's fold it.
1217
1.72k
  I.getOperand(0).setReg(DefDstReg);
1218
1.72k
}
1219
1220
bool AArch64InstructionSelector::earlySelectLoad(
1221
904k
    MachineInstr &I, MachineRegisterInfo &MRI) const {
1222
904k
  // Try to fold in shifts, etc into the addressing mode of a load.
1223
904k
  assert(I.getOpcode() == TargetOpcode::G_LOAD && "unexpected op");
1224
904k
1225
904k
  // Don't handle atomic loads/stores yet.
1226
904k
  auto &MemOp = **I.memoperands_begin();
1227
904k
  if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
1228
138
    LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
1229
138
    return false;
1230
138
  }
1231
904k
1232
904k
  unsigned MemBytes = MemOp.getSize();
1233
904k
1234
904k
  // Only support 64-bit loads for now.
1235
904k
  if (MemBytes != 8)
1236
309k
    return false;
1237
595k
1238
595k
  Register DstReg = I.getOperand(0).getReg();
1239
595k
  const LLT DstTy = MRI.getType(DstReg);
1240
595k
  // Don't handle vectors.
1241
595k
  if (DstTy.isVector())
1242
2.42k
    return false;
1243
592k
1244
592k
  unsigned DstSize = DstTy.getSizeInBits();
1245
592k
  // TODO: 32-bit destinations.
1246
592k
  if (DstSize != 64)
1247
0
    return false;
1248
592k
1249
592k
  // Check if we can do any folding from GEPs/shifts etc. into the load.
1250
592k
  auto ImmFn = selectAddrModeXRO(I.getOperand(1), MemBytes);
1251
592k
  if (!ImmFn)
1252
558k
    return false;
1253
34.7k
1254
34.7k
  // We can fold something. Emit the load here.
1255
34.7k
  MachineIRBuilder MIB(I);
1256
34.7k
1257
34.7k
  // Choose the instruction based off the size of the element being loaded, and
1258
34.7k
  // whether or not we're loading into a FPR.
1259
34.7k
  const RegisterBank &RB = *RBI.getRegBank(DstReg, MRI, TRI);
1260
34.7k
  unsigned Opc =
1261
34.7k
      RB.getID() == AArch64::GPRRegBankID ? 
AArch64::LDRXroX31.1k
:
AArch64::LDRDroX3.59k
;
1262
34.7k
  // Construct the load.
1263
34.7k
  auto LoadMI = MIB.buildInstr(Opc, {DstReg}, {});
1264
34.7k
  for (auto &RenderFn : *ImmFn)
1265
139k
    RenderFn(LoadMI);
1266
34.7k
  LoadMI.addMemOperand(*I.memoperands_begin());
1267
34.7k
  I.eraseFromParent();
1268
34.7k
  return constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
1269
34.7k
}
1270
1271
6.02M
bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1272
6.02M
  assert(I.getParent() && "Instruction should be in a basic block!");
1273
6.02M
  assert(I.getParent()->getParent() && "Instruction should be in a function!");
1274
6.02M
1275
6.02M
  MachineBasicBlock &MBB = *I.getParent();
1276
6.02M
  MachineFunction &MF = *MBB.getParent();
1277
6.02M
  MachineRegisterInfo &MRI = MF.getRegInfo();
1278
6.02M
1279
6.02M
  switch (I.getOpcode()) {
1280
6.02M
  case TargetOpcode::G_SHL:
1281
47.5k
    return earlySelectSHL(I, MRI);
1282
6.02M
  case TargetOpcode::G_LOAD:
1283
904k
    return earlySelectLoad(I, MRI);
1284
6.02M
  default:
1285
5.06M
    return false;
1286
6.02M
  }
1287
6.02M
}
1288
1289
bool AArch64InstructionSelector::select(MachineInstr &I,
1290
13.7M
                                        CodeGenCoverage &CoverageInfo) const {
1291
13.7M
  assert(I.getParent() && "Instruction should be in a basic block!");
1292
13.7M
  assert(I.getParent()->getParent() && "Instruction should be in a function!");
1293
13.7M
1294
13.7M
  MachineBasicBlock &MBB = *I.getParent();
1295
13.7M
  MachineFunction &MF = *MBB.getParent();
1296
13.7M
  MachineRegisterInfo &MRI = MF.getRegInfo();
1297
13.7M
1298
13.7M
  unsigned Opcode = I.getOpcode();
1299
13.7M
  // G_PHI requires same handling as PHI
1300
13.7M
  if (!isPreISelGenericOpcode(Opcode) || 
Opcode == TargetOpcode::G_PHI6.41M
) {
1301
7.74M
    // Certain non-generic instructions also need some special handling.
1302
7.74M
1303
7.74M
    if (Opcode ==  TargetOpcode::LOAD_STACK_GUARD)
1304
3.25k
      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1305
7.73M
1306
7.73M
    if (Opcode == TargetOpcode::PHI || 
Opcode == TargetOpcode::G_PHI7.73M
) {
1307
398k
      const Register DefReg = I.getOperand(0).getReg();
1308
398k
      const LLT DefTy = MRI.getType(DefReg);
1309
398k
1310
398k
      const RegClassOrRegBank &RegClassOrBank =
1311
398k
        MRI.getRegClassOrRegBank(DefReg);
1312
398k
1313
398k
      const TargetRegisterClass *DefRC
1314
398k
        = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1315
398k
      if (!DefRC) {
1316
132k
        if (!DefTy.isValid()) {
1317
0
          LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1318
0
          return false;
1319
0
        }
1320
132k
        const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1321
132k
        DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1322
132k
        if (!DefRC) {
1323
0
          LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1324
0
          return false;
1325
0
        }
1326
398k
      }
1327
398k
1328
398k
      I.setDesc(TII.get(TargetOpcode::PHI));
1329
398k
1330
398k
      return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1331
398k
    }
1332
7.34M
1333
7.34M
    if (I.isCopy())
1334
3.78M
      return selectCopy(I, TII, MRI, TRI, RBI);
1335
3.55M
1336
3.55M
    return true;
1337
3.55M
  }
1338
6.02M
1339
6.02M
1340
6.02M
  if (I.getNumOperands() != I.getNumExplicitOperands()) {
1341
0
    LLVM_DEBUG(
1342
0
        dbgs() << "Generic instruction has unexpected implicit operands\n");
1343
0
    return false;
1344
0
  }
1345
6.02M
1346
6.02M
  // Try to do some lowering before we start instruction selecting. These
1347
6.02M
  // lowerings are purely transformations on the input G_MIR and so selection
1348
6.02M
  // must continue after any modification of the instruction.
1349
6.02M
  preISelLower(I);
1350
6.02M
1351
6.02M
  // There may be patterns where the importer can't deal with them optimally,
1352
6.02M
  // but does select it to a suboptimal sequence so our custom C++ selection
1353
6.02M
  // code later never has a chance to work on it. Therefore, we have an early
1354
6.02M
  // selection attempt here to give priority to certain selection routines
1355
6.02M
  // over the imported ones.
1356
6.02M
  if (earlySelect(I))
1357
78.3k
    return true;
1358
5.94M
1359
5.94M
  if (selectImpl(I, CoverageInfo))
1360
2.80M
    return true;
1361
3.13M
1362
3.13M
  LLT Ty =
1363
3.13M
      I.getOperand(0).isReg() ? 
MRI.getType(I.getOperand(0).getReg())3.13M
:
LLT{}1.70k
;
1364
3.13M
1365
3.13M
  MachineIRBuilder MIB(I);
1366
3.13M
1367
3.13M
  switch (Opcode) {
1368
3.13M
  case TargetOpcode::G_BRCOND: {
1369
686k
    if (Ty.getSizeInBits() > 32) {
1370
0
      // We shouldn't need this on AArch64, but it would be implemented as an
1371
0
      // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1372
0
      // bit being tested is < 32.
1373
0
      LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1374
0
                        << ", expected at most 32-bits");
1375
0
      return false;
1376
0
    }
1377
686k
1378
686k
    const Register CondReg = I.getOperand(0).getReg();
1379
686k
    MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1380
686k
1381
686k
    // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1382
686k
    // instructions will not be produced, as they are conditional branch
1383
686k
    // instructions that do not set flags.
1384
686k
    bool ProduceNonFlagSettingCondBr =
1385
686k
        !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1386
686k
    if (ProduceNonFlagSettingCondBr && 
selectCompareBranch(I, MF, MRI)686k
)
1387
567k
      return true;
1388
119k
1389
119k
    if (ProduceNonFlagSettingCondBr) {
1390
119k
      auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1391
119k
                     .addUse(CondReg)
1392
119k
                     .addImm(/*bit offset=*/0)
1393
119k
                     .addMBB(DestMBB);
1394
119k
1395
119k
      I.eraseFromParent();
1396
119k
      return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1397
119k
    } else {
1398
4
      auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1399
4
                     .addDef(AArch64::WZR)
1400
4
                     .addUse(CondReg)
1401
4
                     .addImm(1);
1402
4
      constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1403
4
      auto Bcc =
1404
4
          BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1405
4
              .addImm(AArch64CC::EQ)
1406
4
              .addMBB(DestMBB);
1407
4
1408
4
      I.eraseFromParent();
1409
4
      return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1410
4
    }
1411
0
  }
1412
0
1413
5
  case TargetOpcode::G_BRINDIRECT: {
1414
5
    I.setDesc(TII.get(AArch64::BR));
1415
5
    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1416
0
  }
1417
0
1418
761
  case TargetOpcode::G_BRJT:
1419
761
    return selectBrJT(I, MRI);
1420
0
1421
4
  case TargetOpcode::G_BSWAP: {
1422
4
    // Handle vector types for G_BSWAP directly.
1423
4
    Register DstReg = I.getOperand(0).getReg();
1424
4
    LLT DstTy = MRI.getType(DstReg);
1425
4
1426
4
    // We should only get vector types here; everything else is handled by the
1427
4
    // importer right now.
1428
4
    if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1429
0
      LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1430
0
      return false;
1431
0
    }
1432
4
1433
4
    // Only handle 4 and 2 element vectors for now.
1434
4
    // TODO: 16-bit elements.
1435
4
    unsigned NumElts = DstTy.getNumElements();
1436
4
    if (NumElts != 4 && 
NumElts != 22
) {
1437
0
      LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1438
0
      return false;
1439
0
    }
1440
4
1441
4
    // Choose the correct opcode for the supported types. Right now, that's
1442
4
    // v2s32, v4s32, and v2s64.
1443
4
    unsigned Opc = 0;
1444
4
    unsigned EltSize = DstTy.getElementType().getSizeInBits();
1445
4
    if (EltSize == 32)
1446
3
      Opc = (DstTy.getNumElements() == 2) ? 
AArch64::REV32v8i81
1447
3
                                          : 
AArch64::REV32v16i82
;
1448
1
    else if (EltSize == 64)
1449
1
      Opc = AArch64::REV64v16i8;
1450
4
1451
4
    // We should always get something by the time we get here...
1452
4
    assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1453
4
1454
4
    I.setDesc(TII.get(Opc));
1455
4
    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1456
4
  }
1457
4
1458
148k
  case TargetOpcode::G_FCONSTANT:
1459
148k
  case TargetOpcode::G_CONSTANT: {
1460
148k
    const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1461
148k
1462
148k
    const LLT s8 = LLT::scalar(8);
1463
148k
    const LLT s16 = LLT::scalar(16);
1464
148k
    const LLT s32 = LLT::scalar(32);
1465
148k
    const LLT s64 = LLT::scalar(64);
1466
148k
    const LLT p0 = LLT::pointer(0, 64);
1467
148k
1468
148k
    const Register DefReg = I.getOperand(0).getReg();
1469
148k
    const LLT DefTy = MRI.getType(DefReg);
1470
148k
    const unsigned DefSize = DefTy.getSizeInBits();
1471
148k
    const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1472
148k
1473
148k
    // FIXME: Redundant check, but even less readable when factored out.
1474
148k
    if (isFP) {
1475
20.9k
      if (Ty != s32 && 
Ty != s6411.8k
) {
1476
0
        LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1477
0
                          << " constant, expected: " << s32 << " or " << s64
1478
0
                          << '\n');
1479
0
        return false;
1480
0
      }
1481
20.9k
1482
20.9k
      if (RB.getID() != AArch64::FPRRegBankID) {
1483
0
        LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1484
0
                          << " constant on bank: " << RB
1485
0
                          << ", expected: FPR\n");
1486
0
        return false;
1487
0
      }
1488
20.9k
1489
20.9k
      // The case when we have 0.0 is covered by tablegen. Reject it here so we
1490
20.9k
      // can be sure tablegen works correctly and isn't rescued by this code.
1491
20.9k
      if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1492
0
        return false;
1493
127k
    } else {
1494
127k
      // s32 and s64 are covered by tablegen.
1495
127k
      if (Ty != p0 && Ty != s8 && 
Ty != s1656.1k
) {
1496
0
        LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1497
0
                          << " constant, expected: " << s32 << ", " << s64
1498
0
                          << ", or " << p0 << '\n');
1499
0
        return false;
1500
0
      }
1501
127k
1502
127k
      if (RB.getID() != AArch64::GPRRegBankID) {
1503
0
        LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1504
0
                          << " constant on bank: " << RB
1505
0
                          << ", expected: GPR\n");
1506
0
        return false;
1507
0
      }
1508
148k
    }
1509
148k
1510
148k
    // We allow G_CONSTANT of types < 32b.
1511
148k
    const unsigned MovOpc =
1512
148k
        DefSize == 64 ? 
AArch64::MOVi64imm11.8k
:
AArch64::MOVi32imm136k
;
1513
148k
1514
148k
    if (isFP) {
1515
20.9k
      // Either emit a FMOV, or emit a copy to emit a normal mov.
1516
20.9k
      const TargetRegisterClass &GPRRC =
1517
20.9k
          DefSize == 32 ? 
AArch64::GPR32RegClass9.13k
:
AArch64::GPR64RegClass11.8k
;
1518
20.9k
      const TargetRegisterClass &FPRRC =
1519
20.9k
          DefSize == 32 ? 
AArch64::FPR32RegClass9.13k
:
AArch64::FPR64RegClass11.8k
;
1520
20.9k
1521
20.9k
      // Can we use a FMOV instruction to represent the immediate?
1522
20.9k
      if (emitFMovForFConstant(I, MRI))
1523
6.58k
        return true;
1524
14.3k
1525
14.3k
      // Nope. Emit a copy and use a normal mov instead.
1526
14.3k
      const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1527
14.3k
      MachineOperand &RegOp = I.getOperand(0);
1528
14.3k
      RegOp.setReg(DefGPRReg);
1529
14.3k
      MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1530
14.3k
      MIB.buildCopy({DefReg}, {DefGPRReg});
1531
14.3k
1532
14.3k
      if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
1533
0
        LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
1534
0
        return false;
1535
0
      }
1536
14.3k
1537
14.3k
      MachineOperand &ImmOp = I.getOperand(1);
1538
14.3k
      // FIXME: Is going through int64_t always correct?
1539
14.3k
      ImmOp.ChangeToImmediate(
1540
14.3k
          ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
1541
127k
    } else if (I.getOperand(1).isCImm()) {
1542
127k
      uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1543
127k
      I.getOperand(1).ChangeToImmediate(Val);
1544
127k
    } else 
if (0
I.getOperand(1).isImm()0
) {
1545
0
      uint64_t Val = I.getOperand(1).getImm();
1546
0
      I.getOperand(1).ChangeToImmediate(Val);
1547
0
    }
1548
148k
1549
148k
    I.setDesc(TII.get(MovOpc));
1550
142k
    constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1551
142k
    return true;
1552
148k
  }
1553
148k
  case TargetOpcode::G_EXTRACT: {
1554
8
    Register DstReg = I.getOperand(0).getReg();
1555
8
    Register SrcReg = I.getOperand(1).getReg();
1556
8
    LLT SrcTy = MRI.getType(SrcReg);
1557
8
    LLT DstTy = MRI.getType(DstReg);
1558
8
    (void)DstTy;
1559
8
    unsigned SrcSize = SrcTy.getSizeInBits();
1560
8
1561
8
    if (SrcTy.getSizeInBits() > 64) {
1562
4
      // This should be an extract of an s128, which is like a vector extract.
1563
4
      if (SrcTy.getSizeInBits() != 128)
1564
0
        return false;
1565
4
      // Only support extracting 64 bits from an s128 at the moment.
1566
4
      if (DstTy.getSizeInBits() != 64)
1567
2
        return false;
1568
2
1569
2
      const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1570
2
      const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1571
2
      // Check we have the right regbank always.
1572
2
      assert(SrcRB.getID() == AArch64::FPRRegBankID &&
1573
2
             DstRB.getID() == AArch64::FPRRegBankID &&
1574
2
             "Wrong extract regbank!");
1575
2
      (void)SrcRB;
1576
2
1577
2
      // Emit the same code as a vector extract.
1578
2
      // Offset must be a multiple of 64.
1579
2
      unsigned Offset = I.getOperand(2).getImm();
1580
2
      if (Offset % 64 != 0)
1581
0
        return false;
1582
2
      unsigned LaneIdx = Offset / 64;
1583
2
      MachineIRBuilder MIB(I);
1584
2
      MachineInstr *Extract = emitExtractVectorElt(
1585
2
          DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
1586
2
      if (!Extract)
1587
0
        return false;
1588
2
      I.eraseFromParent();
1589
2
      return true;
1590
2
    }
1591
4
1592
4
    I.setDesc(TII.get(SrcSize == 64 ? 
AArch64::UBFMXri2
:
AArch64::UBFMWri2
));
1593
4
    MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1594
4
                                      Ty.getSizeInBits() - 1);
1595
4
1596
4
    if (SrcSize < 64) {
1597
2
      assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1598
2
             "unexpected G_EXTRACT types");
1599
2
      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1600
2
    }
1601
2
1602
2
    DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1603
2
    MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1604
2
    MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1605
2
        .addReg(DstReg, 0, AArch64::sub_32);
1606
2
    RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1607
2
                                 AArch64::GPR32RegClass, MRI);
1608
2
    I.getOperand(0).setReg(DstReg);
1609
2
1610
2
    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1611
2
  }
1612
2
1613
6
  case TargetOpcode::G_INSERT: {
1614
6
    LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
1615
6
    LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1616
6
    unsigned DstSize = DstTy.getSizeInBits();
1617
6
    // Larger inserts are vectors, same-size ones should be something else by
1618
6
    // now (split up or turned into COPYs).
1619
6
    if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1620
0
      return false;
1621
6
1622
6
    I.setDesc(TII.get(DstSize == 64 ? 
AArch64::BFMXri2
:
AArch64::BFMWri4
));
1623
6
    unsigned LSB = I.getOperand(3).getImm();
1624
6
    unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
1625
6
    I.getOperand(3).setImm((DstSize - LSB) % DstSize);
1626
6
    MachineInstrBuilder(MF, I).addImm(Width - 1);
1627
6
1628
6
    if (DstSize < 64) {
1629
4
      assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1630
4
             "unexpected G_INSERT types");
1631
4
      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1632
4
    }
1633
2
1634
2
    Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1635
2
    BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1636
2
            TII.get(AArch64::SUBREG_TO_REG))
1637
2
        .addDef(SrcReg)
1638
2
        .addImm(0)
1639
2
        .addUse(I.getOperand(2).getReg())
1640
2
        .addImm(AArch64::sub_32);
1641
2
    RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1642
2
                                 AArch64::GPR32RegClass, MRI);
1643
2
    I.getOperand(2).setReg(SrcReg);
1644
2
1645
2
    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1646
2
  }
1647
40.6k
  case TargetOpcode::G_FRAME_INDEX: {
1648
40.6k
    // allocas and G_FRAME_INDEX are only supported in addrspace(0).
1649
40.6k
    if (Ty != LLT::pointer(0, 64)) {
1650
0
      LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1651
0
                        << ", expected: " << LLT::pointer(0, 64) << '\n');
1652
0
      return false;
1653
0
    }
1654
40.6k
    I.setDesc(TII.get(AArch64::ADDXri));
1655
40.6k
1656
40.6k
    // MOs for a #0 shifted immediate.
1657
40.6k
    I.addOperand(MachineOperand::CreateImm(0));
1658
40.6k
    I.addOperand(MachineOperand::CreateImm(0));
1659
40.6k
1660
40.6k
    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1661
40.6k
  }
1662
40.6k
1663
376k
  case TargetOpcode::G_GLOBAL_VALUE: {
1664
376k
    auto GV = I.getOperand(1).getGlobal();
1665
376k
    if (GV->isThreadLocal()) {
1666
3
      // FIXME: we don't support TLS yet.
1667
3
      return false;
1668
3
    }
1669
376k
    unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
1670
376k
    if (OpFlags & AArch64II::MO_GOT) {
1671
105k
      I.setDesc(TII.get(AArch64::LOADgot));
1672
105k
      I.getOperand(1).setTargetFlags(OpFlags);
1673
271k
    } else if (TM.getCodeModel() == CodeModel::Large) {
1674
3
      // Materialize the global using movz/movk instructions.
1675
3
      materializeLargeCMVal(I, GV, OpFlags);
1676
3
      I.eraseFromParent();
1677
3
      return true;
1678
271k
    } else if (TM.getCodeModel() == CodeModel::Tiny) {
1679
34
      I.setDesc(TII.get(AArch64::ADR));
1680
34
      I.getOperand(1).setTargetFlags(OpFlags);
1681
271k
    } else {
1682
271k
      I.setDesc(TII.get(AArch64::MOVaddr));
1683
271k
      I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1684
271k
      MachineInstrBuilder MIB(MF, I);
1685
271k
      MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1686
271k
                           OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1687
271k
    }
1688
376k
    
return constrainSelectedInstRegOperands(I, TII, TRI, RBI)376k
;
1689
376k
  }
1690
376k
1691
789k
  case TargetOpcode::G_ZEXTLOAD:
1692
789k
  case TargetOpcode::G_LOAD:
1693
789k
  case TargetOpcode::G_STORE: {
1694
789k
    bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1695
789k
    MachineIRBuilder MIB(I);
1696
789k
1697
789k
    LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
1698
789k
1699
789k
    if (PtrTy != LLT::pointer(0, 64)) {
1700
0
      LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1701
0
                        << ", expected: " << LLT::pointer(0, 64) << '\n');
1702
0
      return false;
1703
0
    }
1704
789k
1705
789k
    auto &MemOp = **I.memoperands_begin();
1706
789k
    if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
1707
4.32k
      LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
1708
4.32k
      return false;
1709
4.32k
    }
1710
785k
    unsigned MemSizeInBits = MemOp.getSize() * 8;
1711
785k
1712
785k
    const Register PtrReg = I.getOperand(1).getReg();
1713
#ifndef NDEBUG
1714
    const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
1715
    // Sanity-check the pointer register.
1716
    assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1717
           "Load/Store pointer operand isn't a GPR");
1718
    assert(MRI.getType(PtrReg).isPointer() &&
1719
           "Load/Store pointer operand isn't a pointer");
1720
#endif
1721
1722
785k
    const Register ValReg = I.getOperand(0).getReg();
1723
785k
    const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1724
785k
1725
785k
    const unsigned NewOpc =
1726
785k
        selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
1727
785k
    if (NewOpc == I.getOpcode())
1728
0
      return false;
1729
785k
1730
785k
    I.setDesc(TII.get(NewOpc));
1731
785k
1732
785k
    uint64_t Offset = 0;
1733
785k
    auto *PtrMI = MRI.getVRegDef(PtrReg);
1734
785k
1735
785k
    // Try to fold a GEP into our unsigned immediate addressing mode.
1736
785k
    if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1737
540k
      if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1738
510k
        int64_t Imm = *COff;
1739
510k
        const unsigned Size = MemSizeInBits / 8;
1740
510k
        const unsigned Scale = Log2_32(Size);
1741
510k
        if ((Imm & (Size - 1)) == 0 && 
Imm >= 0510k
&&
Imm < (0x1000 << Scale)493k
) {
1742
493k
          unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
1743
493k
          I.getOperand(1).setReg(Ptr2Reg);
1744
493k
          PtrMI = MRI.getVRegDef(Ptr2Reg);
1745
493k
          Offset = Imm / Size;
1746
493k
        }
1747
510k
      }
1748
540k
    }
1749
785k
1750
785k
    // If we haven't folded anything into our addressing mode yet, try to fold
1751
785k
    // a frame index into the base+offset.
1752
785k
    if (!Offset && 
PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX306k
)
1753
32.9k
      I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1754
785k
1755
785k
    I.addOperand(MachineOperand::CreateImm(Offset));
1756
785k
1757
785k
    // If we're storing a 0, use WZR/XZR.
1758
785k
    if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1759
138k
      if (*CVal == 0 && 
Opcode == TargetOpcode::G_STORE65.8k
) {
1760
65.8k
        if (I.getOpcode() == AArch64::STRWui)
1761
0
          I.getOperand(0).setReg(AArch64::WZR);
1762
65.8k
        else if (I.getOpcode() == AArch64::STRXui)
1763
0
          I.getOperand(0).setReg(AArch64::XZR);
1764
65.8k
      }
1765
138k
    }
1766
785k
1767
785k
    if (IsZExtLoad) {
1768
14.4k
      // The zextload from a smaller type to i32 should be handled by the importer.
1769
14.4k
      if (MRI.getType(ValReg).getSizeInBits() != 64)
1770
0
        return false;
1771
14.4k
      // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1772
14.4k
      //and zero_extend with SUBREG_TO_REG.
1773
14.4k
      Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1774
14.4k
      Register DstReg = I.getOperand(0).getReg();
1775
14.4k
      I.getOperand(0).setReg(LdReg);
1776
14.4k
1777
14.4k
      MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1778
14.4k
      MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1779
14.4k
          .addImm(0)
1780
14.4k
          .addUse(LdReg)
1781
14.4k
          .addImm(AArch64::sub_32);
1782
14.4k
      constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1783
14.4k
      return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1784
14.4k
                                          MRI);
1785
14.4k
    }
1786
770k
    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1787
770k
  }
1788
770k
1789
770k
  case TargetOpcode::G_SMULH:
1790
0
  case TargetOpcode::G_UMULH: {
1791
0
    // Reject the various things we don't support yet.
1792
0
    if (unsupportedBinOp(I, RBI, MRI, TRI))
1793
0
      return false;
1794
0
1795
0
    const Register DefReg = I.getOperand(0).getReg();
1796
0
    const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1797
0
1798
0
    if (RB.getID() != AArch64::GPRRegBankID) {
1799
0
      LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
1800
0
      return false;
1801
0
    }
1802
0
1803
0
    if (Ty != LLT::scalar(64)) {
1804
0
      LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1805
0
                        << ", expected: " << LLT::scalar(64) << '\n');
1806
0
      return false;
1807
0
    }
1808
0
1809
0
    unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1810
0
                                                             : AArch64::UMULHrr;
1811
0
    I.setDesc(TII.get(NewOpc));
1812
0
1813
0
    // Now that we selected an opcode, we need to constrain the register
1814
0
    // operands to use appropriate classes.
1815
0
    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1816
0
  }
1817
1.69k
  case TargetOpcode::G_FADD:
1818
1.69k
  case TargetOpcode::G_FSUB:
1819
1.69k
  case TargetOpcode::G_FMUL:
1820
1.69k
  case TargetOpcode::G_FDIV:
1821
1.69k
1822
1.69k
  case TargetOpcode::G_ASHR:
1823
1.69k
    if (MRI.getType(I.getOperand(0).getReg()).isVector())
1824
30
      return selectVectorASHR(I, MRI);
1825
1.66k
    LLVM_FALLTHROUGH;
1826
3.74k
  case TargetOpcode::G_SHL:
1827
3.74k
    if (Opcode == TargetOpcode::G_SHL &&
1828
3.74k
        
MRI.getType(I.getOperand(0).getReg()).isVector()2.08k
)
1829
111
      return selectVectorSHL(I, MRI);
1830
3.63k
    LLVM_FALLTHROUGH;
1831
331k
  case TargetOpcode::G_OR:
1832
331k
  case TargetOpcode::G_LSHR:
1833
331k
  case TargetOpcode::G_GEP: {
1834
331k
    // Reject the various things we don't support yet.
1835
331k
    if (unsupportedBinOp(I, RBI, MRI, TRI))
1836
0
      return false;
1837
331k
1838
331k
    const unsigned OpSize = Ty.getSizeInBits();
1839
331k
1840
331k
    const Register DefReg = I.getOperand(0).getReg();
1841
331k
    const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1842
331k
1843
331k
    const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1844
331k
    if (NewOpc == I.getOpcode())
1845
7
      return false;
1846
331k
1847
331k
    I.setDesc(TII.get(NewOpc));
1848
331k
    // FIXME: Should the type be always reset in setDesc?
1849
331k
1850
331k
    // Now that we selected an opcode, we need to constrain the register
1851
331k
    // operands to use appropriate classes.
1852
331k
    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1853
331k
  }
1854
331k
1855
331k
  case TargetOpcode::G_UADDO: {
1856
460
    // TODO: Support other types.
1857
460
    unsigned OpSize = Ty.getSizeInBits();
1858
460
    if (OpSize != 32 && 
OpSize != 64192
) {
1859
0
      LLVM_DEBUG(
1860
0
          dbgs()
1861
0
          << "G_UADDO currently only supported for 32 and 64 b types.\n");
1862
0
      return false;
1863
0
    }
1864
460
1865
460
    // TODO: Support vectors.
1866
460
    if (Ty.isVector()) {
1867
0
      LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1868
0
      return false;
1869
0
    }
1870
460
1871
460
    // Add and set the set condition flag.
1872
460
    unsigned AddsOpc = OpSize == 32 ? 
AArch64::ADDSWrr268
:
AArch64::ADDSXrr192
;
1873
460
    MachineIRBuilder MIRBuilder(I);
1874
460
    auto AddsMI = MIRBuilder.buildInstr(
1875
460
        AddsOpc, {I.getOperand(0).getReg()},
1876
460
        {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1877
460
    constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1878
460
1879
460
    // Now, put the overflow result in the register given by the first operand
1880
460
    // to the G_UADDO. CSINC increments the result when the predicate is false,
1881
460
    // so to get the increment when it's true, we need to use the inverse. In
1882
460
    // this case, we want to increment when carry is set.
1883
460
    auto CsetMI = MIRBuilder
1884
460
                      .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1885
460
                                  {Register(AArch64::WZR), Register(AArch64::WZR)})
1886
460
                      .addImm(getInvertedCondCode(AArch64CC::HS));
1887
460
    constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1888
460
    I.eraseFromParent();
1889
460
    return true;
1890
460
  }
1891
460
1892
460
  case TargetOpcode::G_PTR_MASK: {
1893
64
    uint64_t Align = I.getOperand(2).getImm();
1894
64
    if (Align >= 64 || Align == 0)
1895
0
      return false;
1896
64
1897
64
    uint64_t Mask = ~((1ULL << Align) - 1);
1898
64
    I.setDesc(TII.get(AArch64::ANDXri));
1899
64
    I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
1900
64
1901
64
    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1902
64
  }
1903
224k
  case TargetOpcode::G_PTRTOINT:
1904
224k
  case TargetOpcode::G_TRUNC: {
1905
224k
    const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1906
224k
    const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1907
224k
1908
224k
    const Register DstReg = I.getOperand(0).getReg();
1909
224k
    const Register SrcReg = I.getOperand(1).getReg();
1910
224k
1911
224k
    const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1912
224k
    const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1913
224k
1914
224k
    if (DstRB.getID() != SrcRB.getID()) {
1915
0
      LLVM_DEBUG(
1916
0
          dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
1917
0
      return false;
1918
0
    }
1919
224k
1920
224k
    if (DstRB.getID() == AArch64::GPRRegBankID) {
1921
224k
      const TargetRegisterClass *DstRC =
1922
224k
          getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1923
224k
      if (!DstRC)
1924
0
        return false;
1925
224k
1926
224k
      const TargetRegisterClass *SrcRC =
1927
224k
          getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1928
224k
      if (!SrcRC)
1929
0
        return false;
1930
224k
1931
224k
      if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1932
224k
          !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1933
0
        LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
1934
0
        return false;
1935
0
      }
1936
224k
1937
224k
      if (DstRC == SrcRC) {
1938
220k
        // Nothing to be done
1939
220k
      } else 
if (3.53k
Opcode == TargetOpcode::G_TRUNC3.53k
&&
DstTy == LLT::scalar(32)2.99k
&&
1940
3.53k
                 
SrcTy == LLT::scalar(64)0
) {
1941
0
        llvm_unreachable("TableGen can import this case");
1942
0
        return false;
1943
3.53k
      } else if (DstRC == &AArch64::GPR32RegClass &&
1944
3.53k
                 SrcRC == &AArch64::GPR64RegClass) {
1945
3.53k
        I.getOperand(1).setSubReg(AArch64::sub_32);
1946
3.53k
      } else {
1947
0
        LLVM_DEBUG(
1948
0
            dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
1949
0
        return false;
1950
0
      }
1951
224k
1952
224k
      I.setDesc(TII.get(TargetOpcode::COPY));
1953
224k
      return true;
1954
224k
    } else 
if (2
DstRB.getID() == AArch64::FPRRegBankID2
) {
1955
2
      if (DstTy == LLT::vector(4, 16) && 
SrcTy == LLT::vector(4, 32)0
) {
1956
0
        I.setDesc(TII.get(AArch64::XTNv4i16));
1957
0
        constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1958
0
        return true;
1959
0
      }
1960
2
1961
2
      if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
1962
2
        MachineIRBuilder MIB(I);
1963
2
        MachineInstr *Extract = emitExtractVectorElt(
1964
2
            DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
1965
2
        if (!Extract)
1966
0
          return false;
1967
2
        I.eraseFromParent();
1968
2
        return true;
1969
2
      }
1970
2
    }
1971
0
1972
0
    return false;
1973
0
  }
1974
0
1975
31.1k
  case TargetOpcode::G_ANYEXT: {
1976
31.1k
    const Register DstReg = I.getOperand(0).getReg();
1977
31.1k
    const Register SrcReg = I.getOperand(1).getReg();
1978
31.1k
1979
31.1k
    const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
1980
31.1k
    if (RBDst.getID() != AArch64::GPRRegBankID) {
1981
0
      LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
1982
0
                        << ", expected: GPR\n");
1983
0
      return false;
1984
0
    }
1985
31.1k
1986
31.1k
    const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
1987
31.1k
    if (RBSrc.getID() != AArch64::GPRRegBankID) {
1988
0
      LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
1989
0
                        << ", expected: GPR\n");
1990
0
      return false;
1991
0
    }
1992
31.1k
1993
31.1k
    const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
1994
31.1k
1995
31.1k
    if (DstSize == 0) {
1996
0
      LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
1997
0
      return false;
1998
0
    }
1999
31.1k
2000
31.1k
    if (DstSize != 64 && 
DstSize > 3218.2k
) {
2001
0
      LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2002
0
                        << ", expected: 32 or 64\n");
2003
0
      return false;
2004
0
    }
2005
31.1k
    // At this point G_ANYEXT is just like a plain COPY, but we need
2006
31.1k
    // to explicitly form the 64-bit value if any.
2007
31.1k
    if (DstSize > 32) {
2008
12.9k
      Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2009
12.9k
      BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2010
12.9k
          .addDef(ExtSrc)
2011
12.9k
          .addImm(0)
2012
12.9k
          .addUse(SrcReg)
2013
12.9k
          .addImm(AArch64::sub_32);
2014
12.9k
      I.getOperand(1).setReg(ExtSrc);
2015
12.9k
    }
2016
31.1k
    return selectCopy(I, TII, MRI, TRI, RBI);
2017
31.1k
  }
2018
31.1k
2019
158k
  case TargetOpcode::G_ZEXT:
2020
158k
  case TargetOpcode::G_SEXT: {
2021
158k
    unsigned Opcode = I.getOpcode();
2022
158k
    const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2023
158k
              SrcTy = MRI.getType(I.getOperand(1).getReg());
2024
158k
    const bool isSigned = Opcode == TargetOpcode::G_SEXT;
2025
158k
    const Register DefReg = I.getOperand(0).getReg();
2026
158k
    const Register SrcReg = I.getOperand(1).getReg();
2027
158k
    const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2028
158k
2029
158k
    if (RB.getID() != AArch64::GPRRegBankID) {
2030
0
      LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB
2031
0
                        << ", expected: GPR\n");
2032
0
      return false;
2033
0
    }
2034
158k
2035
158k
    MachineInstr *ExtI;
2036
158k
    if (DstTy == LLT::scalar(64)) {
2037
100k
      // FIXME: Can we avoid manually doing this?
2038
100k
      if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
2039
0
        LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2040
0
                          << " operand\n");
2041
0
        return false;
2042
0
      }
2043
100k
2044
100k
      const Register SrcXReg =
2045
100k
          MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2046
100k
      BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2047
100k
          .addDef(SrcXReg)
2048
100k
          .addImm(0)
2049
100k
          .addUse(SrcReg)
2050
100k
          .addImm(AArch64::sub_32);
2051
100k
2052
100k
      const unsigned NewOpc = isSigned ? 
AArch64::SBFMXri70.1k
:
AArch64::UBFMXri30.5k
;
2053
100k
      ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
2054
100k
                 .addDef(DefReg)
2055
100k
                 .addUse(SrcXReg)
2056
100k
                 .addImm(0)
2057
100k
                 .addImm(SrcTy.getSizeInBits() - 1);
2058
100k
    } else 
if (57.6k
DstTy.isScalar()57.6k
&&
DstTy.getSizeInBits() <= 3257.6k
) {
2059
57.6k
      const unsigned NewOpc = isSigned ? 
AArch64::SBFMWri9.14k
:
AArch64::UBFMWri48.5k
;
2060
57.6k
      ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
2061
57.6k
                 .addDef(DefReg)
2062
57.6k
                 .addUse(SrcReg)
2063
57.6k
                 .addImm(0)
2064
57.6k
                 .addImm(SrcTy.getSizeInBits() - 1);
2065
57.6k
    } else {
2066
0
      return false;
2067
0
    }
2068
158k
2069
158k
    constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
2070
158k
2071
158k
    I.eraseFromParent();
2072
158k
    return true;
2073
158k
  }
2074
158k
2075
158k
  case TargetOpcode::G_SITOFP:
2076
313
  case TargetOpcode::G_UITOFP:
2077
313
  case TargetOpcode::G_FPTOSI:
2078
313
  case TargetOpcode::G_FPTOUI: {
2079
313
    const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2080
313
              SrcTy = MRI.getType(I.getOperand(1).getReg());
2081
313
    const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2082
313
    if (NewOpc == Opcode)
2083
313
      return false;
2084
0
2085
0
    I.setDesc(TII.get(NewOpc));
2086
0
    constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2087
0
2088
0
    return true;
2089
0
  }
2090
0
2091
0
2092
85.0k
  case TargetOpcode::G_INTTOPTR:
2093
85.0k
    // The importer is currently unable to import pointer types since they
2094
85.0k
    // didn't exist in SelectionDAG.
2095
85.0k
    return selectCopy(I, TII, MRI, TRI, RBI);
2096
0
2097
528
  case TargetOpcode::G_BITCAST:
2098
528
    // Imported SelectionDAG rules can handle every bitcast except those that
2099
528
    // bitcast from a type to the same type. Ideally, these shouldn't occur
2100
528
    // but we might not run an optimizer that deletes them. The other exception
2101
528
    // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2102
528
    // of them.
2103
528
    return selectCopy(I, TII, MRI, TRI, RBI);
2104
0
2105
64.3k
  case TargetOpcode::G_SELECT: {
2106
64.3k
    if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
2107
0
      LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
2108
0
                        << ", expected: " << LLT::scalar(1) << '\n');
2109
0
      return false;
2110
0
    }
2111
64.3k
2112
64.3k
    const Register CondReg = I.getOperand(1).getReg();
2113
64.3k
    const Register TReg = I.getOperand(2).getReg();
2114
64.3k
    const Register FReg = I.getOperand(3).getReg();
2115
64.3k
2116
64.3k
    if (tryOptSelect(I))
2117
51.2k
      return true;
2118
13.0k
2119
13.0k
    Register CSelOpc = selectSelectOpc(I, MRI, RBI);
2120
13.0k
    MachineInstr &TstMI =
2121
13.0k
        *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2122
13.0k
             .addDef(AArch64::WZR)
2123
13.0k
             .addUse(CondReg)
2124
13.0k
             .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2125
13.0k
2126
13.0k
    MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
2127
13.0k
                                .addDef(I.getOperand(0).getReg())
2128
13.0k
                                .addUse(TReg)
2129
13.0k
                                .addUse(FReg)
2130
13.0k
                                .addImm(AArch64CC::NE);
2131
13.0k
2132
13.0k
    constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
2133
13.0k
    constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
2134
13.0k
2135
13.0k
    I.eraseFromParent();
2136
13.0k
    return true;
2137
13.0k
  }
2138
141k
  case TargetOpcode::G_ICMP: {
2139
141k
    if (Ty.isVector())
2140
94
      return selectVectorICmp(I, MRI);
2141
141k
2142
141k
    if (Ty != LLT::scalar(32)) {
2143
0
      LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
2144
0
                        << ", expected: " << LLT::scalar(32) << '\n');
2145
0
      return false;
2146
0
    }
2147
141k
2148
141k
    MachineIRBuilder MIRBuilder(I);
2149
141k
    if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
2150
141k
                            MIRBuilder))
2151
0
      return false;
2152
141k
    emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
2153
141k
                    MIRBuilder);
2154
141k
    I.eraseFromParent();
2155
141k
    return true;
2156
141k
  }
2157
141k
2158
141k
  case TargetOpcode::G_FCMP: {
2159
14.2k
    if (Ty != LLT::scalar(32)) {
2160
0
      LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2161
0
                        << ", expected: " << LLT::scalar(32) << '\n');
2162
0
      return false;
2163
0
    }
2164
14.2k
2165
14.2k
    unsigned CmpOpc = selectFCMPOpc(I, MRI);
2166
14.2k
    if (!CmpOpc)
2167
0
      return false;
2168
14.2k
2169
14.2k
    // FIXME: regbank
2170
14.2k
2171
14.2k
    AArch64CC::CondCode CC1, CC2;
2172
14.2k
    changeFCMPPredToAArch64CC(
2173
14.2k
        (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2174
14.2k
2175
14.2k
    // Partially build the compare. Decide if we need to add a use for the
2176
14.2k
    // third operand based off whether or not we're comparing against 0.0.
2177
14.2k
    auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2178
14.2k
                     .addUse(I.getOperand(2).getReg());
2179
14.2k
2180
14.2k
    // If we don't have an immediate compare, then we need to add a use of the
2181
14.2k
    // register which wasn't used for the immediate.
2182
14.2k
    // Note that the immediate will always be the last operand.
2183
14.2k
    if (CmpOpc != AArch64::FCMPSri && 
CmpOpc != AArch64::FCMPDri13.5k
)
2184
12.9k
      CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2185
14.2k
2186
14.2k
    const Register DefReg = I.getOperand(0).getReg();
2187
14.2k
    Register Def1Reg = DefReg;
2188
14.2k
    if (CC2 != AArch64CC::AL)
2189
20
      Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2190
14.2k
2191
14.2k
    MachineInstr &CSetMI =
2192
14.2k
        *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2193
14.2k
             .addDef(Def1Reg)
2194
14.2k
             .addUse(AArch64::WZR)
2195
14.2k
             .addUse(AArch64::WZR)
2196
14.2k
             .addImm(getInvertedCondCode(CC1));
2197
14.2k
2198
14.2k
    if (CC2 != AArch64CC::AL) {
2199
20
      Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2200
20
      MachineInstr &CSet2MI =
2201
20
          *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2202
20
               .addDef(Def2Reg)
2203
20
               .addUse(AArch64::WZR)
2204
20
               .addUse(AArch64::WZR)
2205
20
               .addImm(getInvertedCondCode(CC2));
2206
20
      MachineInstr &OrMI =
2207
20
          *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2208
20
               .addDef(DefReg)
2209
20
               .addUse(Def1Reg)
2210
20
               .addUse(Def2Reg);
2211
20
      constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
2212
20
      constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2213
20
    }
2214
14.2k
    constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2215
14.2k
    constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2216
14.2k
2217
14.2k
    I.eraseFromParent();
2218
14.2k
    return true;
2219
14.2k
  }
2220
14.2k
  case TargetOpcode::G_VASTART:
2221
63
    return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2222
63
                                : 
selectVaStartAAPCS(I, MF, MRI)0
;
2223
14.2k
  case TargetOpcode::G_INTRINSIC:
2224
655
    return selectIntrinsic(I, MRI);
2225
14.2k
  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2226
8.93k
    return selectIntrinsicWithSideEffects(I, MRI);
2227
14.2k
  case TargetOpcode::G_IMPLICIT_DEF: {
2228
1.51k
    I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
2229
1.51k
    const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2230
1.51k
    const Register DstReg = I.getOperand(0).getReg();
2231
1.51k
    const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2232
1.51k
    const TargetRegisterClass *DstRC =
2233
1.51k
        getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2234
1.51k
    RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
2235
1.51k
    return true;
2236
14.2k
  }
2237
14.2k
  case TargetOpcode::G_BLOCK_ADDR: {
2238
2
    if (TM.getCodeModel() == CodeModel::Large) {
2239
1
      materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2240
1
      I.eraseFromParent();
2241
1
      return true;
2242
1
    } else {
2243
1
      I.setDesc(TII.get(AArch64::MOVaddrBA));
2244
1
      auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2245
1
                           I.getOperand(0).getReg())
2246
1
                       .addBlockAddress(I.getOperand(1).getBlockAddress(),
2247
1
                                        /* Offset */ 0, AArch64II::MO_PAGE)
2248
1
                       .addBlockAddress(
2249
1
                           I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2250
1
                           AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2251
1
      I.eraseFromParent();
2252
1
      return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2253
1
    }
2254
0
  }
2255
33
  case TargetOpcode::G_INTRINSIC_TRUNC:
2256
33
    return selectIntrinsicTrunc(I, MRI);
2257
27
  case TargetOpcode::G_INTRINSIC_ROUND:
2258
27
    return selectIntrinsicRound(I, MRI);
2259
18.4k
  case TargetOpcode::G_BUILD_VECTOR:
2260
18.4k
    return selectBuildVector(I, MRI);
2261
1
  case TargetOpcode::G_MERGE_VALUES:
2262
1
    return selectMergeValues(I, MRI);
2263
149
  case TargetOpcode::G_UNMERGE_VALUES:
2264
149
    return selectUnmergeValues(I, MRI);
2265
3.60k
  case TargetOpcode::G_SHUFFLE_VECTOR:
2266
3.60k
    return selectShuffleVector(I, MRI);
2267
1.23k
  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2268
1.23k
    return selectExtractElt(I, MRI);
2269
2.89k
  case TargetOpcode::G_INSERT_VECTOR_ELT:
2270
2.89k
    return selectInsertElt(I, MRI);
2271
2
  case TargetOpcode::G_CONCAT_VECTORS:
2272
2
    return selectConcatVectors(I, MRI);
2273
761
  case TargetOpcode::G_JUMP_TABLE:
2274
761
    return selectJumpTable(I, MRI);
2275
1.45k
  }
2276
1.45k
2277
1.45k
  return false;
2278
1.45k
}
2279
2280
bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2281
761
                                            MachineRegisterInfo &MRI) const {
2282
761
  assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
2283
761
  Register JTAddr = I.getOperand(0).getReg();
2284
761
  unsigned JTI = I.getOperand(1).getIndex();
2285
761
  Register Index = I.getOperand(2).getReg();
2286
761
  MachineIRBuilder MIB(I);
2287
761
2288
761
  Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2289
761
  Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
2290
761
  MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
2291
761
                 {JTAddr, Index})
2292
761
      .addJumpTableIndex(JTI);
2293
761
2294
761
  // Build the indirect branch.
2295
761
  MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2296
761
  I.eraseFromParent();
2297
761
  return true;
2298
761
}
2299
2300
bool AArch64InstructionSelector::selectJumpTable(
2301
761
    MachineInstr &I, MachineRegisterInfo &MRI) const {
2302
761
  assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2303
761
  assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2304
761
2305
761
  Register DstReg = I.getOperand(0).getReg();
2306
761
  unsigned JTI = I.getOperand(1).getIndex();
2307
761
  // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2308
761
  MachineIRBuilder MIB(I);
2309
761
  auto MovMI =
2310
761
    MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2311
761
          .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2312
761
          .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2313
761
  I.eraseFromParent();
2314
761
  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2315
761
}
2316
2317
bool AArch64InstructionSelector::selectIntrinsicTrunc(
2318
33
    MachineInstr &I, MachineRegisterInfo &MRI) const {
2319
33
  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2320
33
2321
33
  // Select the correct opcode.
2322
33
  unsigned Opc = 0;
2323
33
  if (!SrcTy.isVector()) {
2324
17
    switch (SrcTy.getSizeInBits()) {
2325
17
    default:
2326
2
    case 16:
2327
2
      Opc = AArch64::FRINTZHr;
2328
2
      break;
2329
14
    case 32:
2330
14
      Opc = AArch64::FRINTZSr;
2331
14
      break;
2332
1
    case 64:
2333
1
      Opc = AArch64::FRINTZDr;
2334
1
      break;
2335
16
    }
2336
16
  } else {
2337
16
    unsigned NumElts = SrcTy.getNumElements();
2338
16
    switch (SrcTy.getElementType().getSizeInBits()) {
2339
16
    default:
2340
0
      break;
2341
16
    case 16:
2342
4
      if (NumElts == 4)
2343
2
        Opc = AArch64::FRINTZv4f16;
2344
2
      else if (NumElts == 8)
2345
2
        Opc = AArch64::FRINTZv8f16;
2346
4
      break;
2347
16
    case 32:
2348
8
      if (NumElts == 2)
2349
4
        Opc = AArch64::FRINTZv2f32;
2350
4
      else if (NumElts == 4)
2351
4
        Opc = AArch64::FRINTZv4f32;
2352
8
      break;
2353
16
    case 64:
2354
4
      if (NumElts == 2)
2355
4
        Opc = AArch64::FRINTZv2f64;
2356
4
      break;
2357
33
    }
2358
33
  }
2359
33
2360
33
  if (!Opc) {
2361
0
    // Didn't get an opcode above, bail.
2362
0
    LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2363
0
    return false;
2364
0
  }
2365
33
2366
33
  // Legalization would have set us up perfectly for this; we just need to
2367
33
  // set the opcode and move on.
2368
33
  I.setDesc(TII.get(Opc));
2369
33
  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2370
33
}
2371
2372
bool AArch64InstructionSelector::selectIntrinsicRound(
2373
27
    MachineInstr &I, MachineRegisterInfo &MRI) const {
2374
27
  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2375
27
2376
27
  // Select the correct opcode.
2377
27
  unsigned Opc = 0;
2378
27
  if (!SrcTy.isVector()) {
2379
17
    switch (SrcTy.getSizeInBits()) {
2380
17
    default:
2381
2
    case 16:
2382
2
      Opc = AArch64::FRINTAHr;
2383
2
      break;
2384
14
    case 32:
2385
14
      Opc = AArch64::FRINTASr;
2386
14
      break;
2387
1
    case 64:
2388
1
      Opc = AArch64::FRINTADr;
2389
1
      break;
2390
10
    }
2391
10
  } else {
2392
10
    unsigned NumElts = SrcTy.getNumElements();
2393
10
    switch (SrcTy.getElementType().getSizeInBits()) {
2394
10
    default:
2395
0
      break;
2396
10
    case 16:
2397
4
      if (NumElts == 4)
2398
2
        Opc = AArch64::FRINTAv4f16;
2399
2
      else if (NumElts == 8)
2400
2
        Opc = AArch64::FRINTAv8f16;
2401
4
      break;
2402
10
    case 32:
2403
4
      if (NumElts == 2)
2404
2
        Opc = AArch64::FRINTAv2f32;
2405
2
      else if (NumElts == 4)
2406
2
        Opc = AArch64::FRINTAv4f32;
2407
4
      break;
2408
10
    case 64:
2409
2
      if (NumElts == 2)
2410
2
        Opc = AArch64::FRINTAv2f64;
2411
2
      break;
2412
27
    }
2413
27
  }
2414
27
2415
27
  if (!Opc) {
2416
0
    // Didn't get an opcode above, bail.
2417
0
    LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2418
0
    return false;
2419
0
  }
2420
27
2421
27
  // Legalization would have set us up perfectly for this; we just need to
2422
27
  // set the opcode and move on.
2423
27
  I.setDesc(TII.get(Opc));
2424
27
  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2425
27
}
2426
2427
bool AArch64InstructionSelector::selectVectorICmp(
2428
94
    MachineInstr &I, MachineRegisterInfo &MRI) const {
2429
94
  Register DstReg = I.getOperand(0).getReg();
2430
94
  LLT DstTy = MRI.getType(DstReg);
2431
94
  Register SrcReg = I.getOperand(2).getReg();
2432
94
  Register Src2Reg = I.getOperand(3).getReg();
2433
94
  LLT SrcTy = MRI.getType(SrcReg);
2434
94
2435
94
  unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2436
94
  unsigned NumElts = DstTy.getNumElements();
2437
94
2438
94
  // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2439
94
  // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2440
94
  // Third index is cc opcode:
2441
94
  // 0 == eq
2442
94
  // 1 == ugt
2443
94
  // 2 == uge
2444
94
  // 3 == ult
2445
94
  // 4 == ule
2446
94
  // 5 == sgt
2447
94
  // 6 == sge
2448
94
  // 7 == slt
2449
94
  // 8 == sle
2450
94
  // ne is done by negating 'eq' result.
2451
94
2452
94
  // This table below assumes that for some comparisons the operands will be
2453
94
  // commuted.
2454
94
  // ult op == commute + ugt op
2455
94
  // ule op == commute + uge op
2456
94
  // slt op == commute + sgt op
2457
94
  // sle op == commute + sge op
2458
94
  unsigned PredIdx = 0;
2459
94
  bool SwapOperands = false;
2460
94
  CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
2461
94
  switch (Pred) {
2462
94
  case CmpInst::ICMP_NE:
2463
24
  case CmpInst::ICMP_EQ:
2464
24
    PredIdx = 0;
2465
24
    break;
2466
24
  case CmpInst::ICMP_UGT:
2467
8
    PredIdx = 1;
2468
8
    break;
2469
24
  case CmpInst::ICMP_UGE:
2470
8
    PredIdx = 2;
2471
8
    break;
2472
24
  case CmpInst::ICMP_ULT:
2473
8
    PredIdx = 3;
2474
8
    SwapOperands = true;
2475
8
    break;
2476
24
  case CmpInst::ICMP_ULE:
2477
8
    PredIdx = 4;
2478
8
    SwapOperands = true;
2479
8
    break;
2480
24
  case CmpInst::ICMP_SGT:
2481
8
    PredIdx = 5;
2482
8
    break;
2483
24
  case CmpInst::ICMP_SGE:
2484
14
    PredIdx = 6;
2485
14
    break;
2486
24
  case CmpInst::ICMP_SLT:
2487
8
    PredIdx = 7;
2488
8
    SwapOperands = true;
2489
8
    break;
2490
24
  case CmpInst::ICMP_SLE:
2491
8
    PredIdx = 8;
2492
8
    SwapOperands = true;
2493
8
    break;
2494
24
  default:
2495
0
    llvm_unreachable("Unhandled icmp predicate");
2496
24
    
return false0
;
2497
94
  }
2498
94
2499
94
  // This table obviously should be tablegen'd when we have our GISel native
2500
94
  // tablegen selector.
2501
94
2502
94
  static const unsigned OpcTable[4][4][9] = {
2503
94
      {
2504
94
          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2505
94
           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2506
94
           0 /* invalid */},
2507
94
          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2508
94
           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2509
94
           0 /* invalid */},
2510
94
          {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2511
94
           AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2512
94
           AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2513
94
          {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2514
94
           AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2515
94
           AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2516
94
      },
2517
94
      {
2518
94
          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2519
94
           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2520
94
           0 /* invalid */},
2521
94
          {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2522
94
           AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2523
94
           AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2524
94
          {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2525
94
           AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2526
94
           AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2527
94
          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2528
94
           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2529
94
           0 /* invalid */}
2530
94
      },
2531
94
      {
2532
94
          {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2533
94
           AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2534
94
           AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2535
94
          {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2536
94
           AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2537
94
           AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2538
94
          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2539
94
           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2540
94
           0 /* invalid */},
2541
94
          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2542
94
           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2543
94
           0 /* invalid */}
2544
94
      },
2545
94
      {
2546
94
          {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2547
94
           AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2548
94
           AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2549
94
          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2550
94
           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2551
94
           0 /* invalid */},
2552
94
          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2553
94
           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2554
94
           0 /* invalid */},
2555
94
          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2556
94
           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2557
94
           0 /* invalid */}
2558
94
      },
2559
94
  };
2560
94
  unsigned EltIdx = Log2_32(SrcEltSize / 8);
2561
94
  unsigned NumEltsIdx = Log2_32(NumElts / 2);
2562
94
  unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2563
94
  if (!Opc) {
2564
0
    LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2565
0
    return false;
2566
0
  }
2567
94
2568
94
  const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2569
94
  const TargetRegisterClass *SrcRC =
2570
94
      getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2571
94
  if (!SrcRC) {
2572
0
    LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2573
0
    return false;
2574
0
  }
2575
94
2576
94
  unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? 
AArch64::NOTv8i810
:
084
;
2577
94
  if (SrcTy.getSizeInBits() == 128)
2578
54
    NotOpc = NotOpc ? 
AArch64::NOTv16i86
:
048
;
2579
94
2580
94
  if (SwapOperands)
2581
32
    std::swap(SrcReg, Src2Reg);
2582
94
2583
94
  MachineIRBuilder MIB(I);
2584
94
  auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2585
94
  constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2586
94
2587
94
  // Invert if we had a 'ne' cc.
2588
94
  if (NotOpc) {
2589
10
    Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2590
10
    constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2591
84
  } else {
2592
84
    MIB.buildCopy(DstReg, Cmp.getReg(0));
2593
84
  }
2594
94
  RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2595
94
  I.eraseFromParent();
2596
94
  return true;
2597
94
}
2598
2599
MachineInstr *AArch64InstructionSelector::emitScalarToVector(
2600
    unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
2601
39.9k
    MachineIRBuilder &MIRBuilder) const {
2602
39.9k
  auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
2603
39.9k
2604
39.9k
  auto BuildFn = [&](unsigned SubregIndex) {
2605
39.9k
    auto Ins =
2606
39.9k
        MIRBuilder
2607
39.9k
            .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2608
39.9k
            .addImm(SubregIndex);
2609
39.9k
    constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
2610
39.9k
    constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
2611
39.9k
    return &*Ins;
2612
39.9k
  };
2613
39.9k
2614
39.9k
  switch (EltSize) {
2615
39.9k
  case 16:
2616
2.30k
    return BuildFn(AArch64::hsub);
2617
39.9k
  case 32:
2618
24.3k
    return BuildFn(AArch64::ssub);
2619
39.9k
  case 64:
2620
13.2k
    return BuildFn(AArch64::dsub);
2621
39.9k
  default:
2622
0
    return nullptr;
2623
39.9k
  }
2624
39.9k
}
2625
2626
bool AArch64InstructionSelector::selectMergeValues(
2627
1
    MachineInstr &I, MachineRegisterInfo &MRI) const {
2628
1
  assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2629
1
  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2630
1
  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2631
1
  assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2632
1
  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2633
1
2634
1
  if (I.getNumOperands() != 3)
2635
0
    return false;
2636
1
2637
1
  // Merging 2 s64s into an s128.
2638
1
  if (DstTy == LLT::scalar(128)) {
2639
0
    if (SrcTy.getSizeInBits() != 64)
2640
0
      return false;
2641
0
    MachineIRBuilder MIB(I);
2642
0
    Register DstReg = I.getOperand(0).getReg();
2643
0
    Register Src1Reg = I.getOperand(1).getReg();
2644
0
    Register Src2Reg = I.getOperand(2).getReg();
2645
0
    auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
2646
0
    MachineInstr *InsMI =
2647
0
        emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
2648
0
    if (!InsMI)
2649
0
      return false;
2650
0
    MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
2651
0
                                          Src2Reg, /* LaneIdx */ 1, RB, MIB);
2652
0
    if (!Ins2MI)
2653
0
      return false;
2654
0
    constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
2655
0
    constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
2656
0
    I.eraseFromParent();
2657
0
    return true;
2658
0
  }
2659
1
2660
1
  if (RB.getID() != AArch64::GPRRegBankID)
2661
0
    return false;
2662
1
2663
1
  if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2664
0
    return false;
2665
1
2666
1
  auto *DstRC = &AArch64::GPR64RegClass;
2667
1
  Register SubToRegDef = MRI.createVirtualRegister(DstRC);
2668
1
  MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2669
1
                                    TII.get(TargetOpcode::SUBREG_TO_REG))
2670
1
                                .addDef(SubToRegDef)
2671
1
                                .addImm(0)
2672
1
                                .addUse(I.getOperand(1).getReg())
2673
1
                                .addImm(AArch64::sub_32);
2674
1
  Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
2675
1
  // Need to anyext the second scalar before we can use bfm
2676
1
  MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2677
1
                                    TII.get(TargetOpcode::SUBREG_TO_REG))
2678
1
                                .addDef(SubToRegDef2)
2679
1
                                .addImm(0)
2680
1
                                .addUse(I.getOperand(2).getReg())
2681
1
                                .addImm(AArch64::sub_32);
2682
1
  MachineInstr &BFM =
2683
1
      *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
2684
1
           .addDef(I.getOperand(0).getReg())
2685
1
           .addUse(SubToRegDef)
2686
1
           .addUse(SubToRegDef2)
2687
1
           .addImm(32)
2688
1
           .addImm(31);
2689
1
  constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2690
1
  constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2691
1
  constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
2692
1
  I.eraseFromParent();
2693
1
  return true;
2694
1
}
2695
2696
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2697
1.38k
                              const unsigned EltSize) {
2698
1.38k
  // Choose a lane copy opcode and subregister based off of the size of the
2699
1.38k
  // vector's elements.
2700
1.38k
  switch (EltSize) {
2701
1.38k
  case 16:
2702
97
    CopyOpc = AArch64::CPYi16;
2703
97
    ExtractSubReg = AArch64::hsub;
2704
97
    break;
2705
1.38k
  case 32:
2706
1.25k
    CopyOpc = AArch64::CPYi32;
2707
1.25k
    ExtractSubReg = AArch64::ssub;
2708
1.25k
    break;
2709
1.38k
  case 64:
2710
40
    CopyOpc = AArch64::CPYi64;
2711
40
    ExtractSubReg = AArch64::dsub;
2712
40
    break;
2713
1.38k
  default:
2714
0
    // Unknown size, bail out.
2715
0
    LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2716
0
    return false;
2717
1.38k
  }
2718
1.38k
  return true;
2719
1.38k
}
2720
2721
MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
2722
    Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2723
1.24k
    Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
2724
1.24k
  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2725
1.24k
  unsigned CopyOpc = 0;
2726
1.24k
  unsigned ExtractSubReg = 0;
2727
1.24k
  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2728
0
    LLVM_DEBUG(
2729
0
        dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2730
0
    return nullptr;
2731
0
  }
2732
1.24k
2733
1.24k
  const TargetRegisterClass *DstRC =
2734
1.24k
      getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2735
1.24k
  if (!DstRC) {
2736
0
    LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2737
0
    return nullptr;
2738
0
  }
2739
1.24k
2740
1.24k
  const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2741
1.24k
  const LLT &VecTy = MRI.getType(VecReg);
2742
1.24k
  const TargetRegisterClass *VecRC =
2743
1.24k
      getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2744
1.24k
  if (!VecRC) {
2745
0
    LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2746
0
    return nullptr;
2747
0
  }
2748
1.24k
2749
1.24k
  // The register that we're going to copy into.
2750
1.24k
  Register InsertReg = VecReg;
2751
1.24k
  if (!DstReg)
2752
0
    DstReg = MRI.createVirtualRegister(DstRC);
2753
1.24k
  // If the lane index is 0, we just use a subregister COPY.
2754
1.24k
  if (LaneIdx == 0) {
2755
647
    auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2756
647
                    .addReg(VecReg, 0, ExtractSubReg);
2757
647
    RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2758
647
    return &*Copy;
2759
647
  }
2760
593
2761
593
  // Lane copies require 128-bit wide registers. If we're dealing with an
2762
593
  // unpacked vector, then we need to move up to that width. Insert an implicit
2763
593
  // def and a subregister insert to get us there.
2764
593
  if (VecTy.getSizeInBits() != 128) {
2765
586
    MachineInstr *ScalarToVector = emitScalarToVector(
2766
586
        VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2767
586
    if (!ScalarToVector)
2768
0
      return nullptr;
2769
586
    InsertReg = ScalarToVector->getOperand(0).getReg();
2770
586
  }
2771
593
2772
593
  MachineInstr *LaneCopyMI =
2773
593
      MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2774
593
  constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2775
593
2776
593
  // Make sure that we actually constrain the initial copy.
2777
593
  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2778
593
  return LaneCopyMI;
2779
593
}
2780
2781
bool AArch64InstructionSelector::selectExtractElt(
2782
1.23k
    MachineInstr &I, MachineRegisterInfo &MRI) const {
2783
1.23k
  assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2784
1.23k
         "unexpected opcode!");
2785
1.23k
  Register DstReg = I.getOperand(0).getReg();
2786
1.23k
  const LLT NarrowTy = MRI.getType(DstReg);
2787
1.23k
  const Register SrcReg = I.getOperand(1).getReg();
2788
1.23k
  const LLT WideTy = MRI.getType(SrcReg);
2789
1.23k
  (void)WideTy;
2790
1.23k
  assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2791
1.23k
         "source register size too small!");
2792
1.23k
  assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2793
1.23k
2794
1.23k
  // Need the lane index to determine the correct copy opcode.
2795
1.23k
  MachineOperand &LaneIdxOp = I.getOperand(2);
2796
1.23k
  assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2797
1.23k
2798
1.23k
  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2799
0
    LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2800
0
    return false;
2801
0
  }
2802
1.23k
2803
1.23k
  // Find the index to extract from.
2804
1.23k
  auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2805
1.23k
  if (!VRegAndVal)
2806
0
    return false;
2807
1.23k
  unsigned LaneIdx = VRegAndVal->Value;
2808
1.23k
2809
1.23k
  MachineIRBuilder MIRBuilder(I);
2810
1.23k
2811
1.23k
  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2812
1.23k
  MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2813
1.23k
                                               LaneIdx, MIRBuilder);
2814
1.23k
  if (!Extract)
2815
0
    return false;
2816
1.23k
2817
1.23k
  I.eraseFromParent();
2818
1.23k
  return true;
2819
1.23k
}
2820
2821
bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2822
2
    MachineInstr &I, MachineRegisterInfo &MRI) const {
2823
2
  unsigned NumElts = I.getNumOperands() - 1;
2824
2
  Register SrcReg = I.getOperand(NumElts).getReg();
2825
2
  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2826
2
  const LLT SrcTy = MRI.getType(SrcReg);
2827
2
2828
2
  assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2829
2
  if (SrcTy.getSizeInBits() > 128) {
2830
0
    LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2831
0
    return false;
2832
0
  }
2833
2
2834
2
  MachineIRBuilder MIB(I);
2835
2
2836
2
  // We implement a split vector operation by treating the sub-vectors as
2837
2
  // scalars and extracting them.
2838
2
  const RegisterBank &DstRB =
2839
2
      *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2840
6
  for (unsigned OpIdx = 0; OpIdx < NumElts; 
++OpIdx4
) {
2841
4
    Register Dst = I.getOperand(OpIdx).getReg();
2842
4
    MachineInstr *Extract =
2843
4
        emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2844
4
    if (!Extract)
2845
0
      return false;
2846
4
  }
2847
2
  I.eraseFromParent();
2848
2
  return true;
2849
2
}
2850
2851
bool AArch64InstructionSelector::selectUnmergeValues(
2852
149
    MachineInstr &I, MachineRegisterInfo &MRI) const {
2853
149
  assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2854
149
         "unexpected opcode");
2855
149
2856
149
  // TODO: Handle unmerging into GPRs and from scalars to scalars.
2857
149
  if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2858
149
          AArch64::FPRRegBankID ||
2859
149
      RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2860
149
          AArch64::FPRRegBankID) {
2861
0
    LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2862
0
                         "currently unsupported.\n");
2863
0
    return false;
2864
0
  }
2865
149
2866
149
  // The last operand is the vector source register, and every other operand is
2867
149
  // a register to unpack into.
2868
149
  unsigned NumElts = I.getNumOperands() - 1;
2869
149
  Register SrcReg = I.getOperand(NumElts).getReg();
2870
149
  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2871
149
  const LLT WideTy = MRI.getType(SrcReg);
2872
149
  (void)WideTy;
2873
149
  assert(WideTy.isVector() && "can only unmerge from vector types!");
2874
149
  assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2875
149
         "source register size too small!");
2876
149
2877
149
  if (!NarrowTy.isScalar())
2878
2
    return selectSplitVectorUnmerge(I, MRI);
2879
147
2880
147
  MachineIRBuilder MIB(I);
2881
147
2882
147
  // Choose a lane copy opcode and subregister based off of the size of the
2883
147
  // vector's elements.
2884
147
  unsigned CopyOpc = 0;
2885
147
  unsigned ExtractSubReg = 0;
2886
147
  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
2887
0
    return false;
2888
147
2889
147
  // Set up for the lane copies.
2890
147
  MachineBasicBlock &MBB = *I.getParent();
2891
147
2892
147
  // Stores the registers we'll be copying from.
2893
147
  SmallVector<Register, 4> InsertRegs;
2894
147
2895
147
  // We'll use the first register twice, so we only need NumElts-1 registers.
2896
147
  unsigned NumInsertRegs = NumElts - 1;
2897
147
2898
147
  // If our elements fit into exactly 128 bits, then we can copy from the source
2899
147
  // directly. Otherwise, we need to do a bit of setup with some subregister
2900
147
  // inserts.
2901
147
  if (NarrowTy.getSizeInBits() * NumElts == 128) {
2902
91
    InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
2903
91
  } else {
2904
56
    // No. We have to perform subregister inserts. For each insert, create an
2905
56
    // implicit def and a subregister insert, and save the register we create.
2906
174
    for (unsigned Idx = 0; Idx < NumInsertRegs; 
++Idx118
) {
2907
118
      Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2908
118
      MachineInstr &ImpDefMI =
2909
118
          *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2910
118
                   ImpDefReg);
2911
118
2912
118
      // Now, create the subregister insert from SrcReg.
2913
118
      Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2914
118
      MachineInstr &InsMI =
2915
118
          *BuildMI(MBB, I, I.getDebugLoc(),
2916
118
                   TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2917
118
               .addUse(ImpDefReg)
2918
118
               .addUse(SrcReg)
2919
118
               .addImm(AArch64::dsub);
2920
118
2921
118
      constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
2922
118
      constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
2923
118
2924
118
      // Save the register so that we can copy from it after.
2925
118
      InsertRegs.push_back(InsertReg);
2926
118
    }
2927
56
  }
2928
147
2929
147
  // Now that we've created any necessary subregister inserts, we can
2930
147
  // create the copies.
2931
147
  //
2932
147
  // Perform the first copy separately as a subregister copy.
2933
147
  Register CopyTo = I.getOperand(0).getReg();
2934
147
  auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
2935
147
                       .addReg(InsertRegs[0], 0, ExtractSubReg);
2936
147
  constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
2937
147
2938
147
  // Now, perform the remaining copies as vector lane copies.
2939
147
  unsigned LaneIdx = 1;
2940
447
  for (Register InsReg : InsertRegs) {
2941
447
    Register CopyTo = I.getOperand(LaneIdx).getReg();
2942
447
    MachineInstr &CopyInst =
2943
447
        *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
2944
447
             .addUse(InsReg)
2945
447
             .addImm(LaneIdx);
2946
447
    constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
2947
447
    ++LaneIdx;
2948
447
  }
2949
147
2950
147
  // Separately constrain the first copy's destination. Because of the
2951
147
  // limitation in constrainOperandRegClass, we can't guarantee that this will
2952
147
  // actually be constrained. So, do it ourselves using the second operand.
2953
147
  const TargetRegisterClass *RC =
2954
147
      MRI.getRegClassOrNull(I.getOperand(1).getReg());
2955
147
  if (!RC) {
2956
0
    LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
2957
0
    return false;
2958
0
  }
2959
147
2960
147
  RBI.constrainGenericRegister(CopyTo, *RC, MRI);
2961
147
  I.eraseFromParent();
2962
147
  return true;
2963
147
}
2964
2965
bool AArch64InstructionSelector::selectConcatVectors(
2966
2
    MachineInstr &I, MachineRegisterInfo &MRI) const {
2967
2
  assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
2968
2
         "Unexpected opcode");
2969
2
  Register Dst = I.getOperand(0).getReg();
2970
2
  Register Op1 = I.getOperand(1).getReg();
2971
2
  Register Op2 = I.getOperand(2).getReg();
2972
2
  MachineIRBuilder MIRBuilder(I);
2973
2
  MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
2974
2
  if (!ConcatMI)
2975
0
    return false;
2976
2
  I.eraseFromParent();
2977
2
  return true;
2978
2
}
2979
2980
void AArch64InstructionSelector::collectShuffleMaskIndices(
2981
    MachineInstr &I, MachineRegisterInfo &MRI,
2982
2.04k
    SmallVectorImpl<Optional<int>> &Idxs) const {
2983
2.04k
  MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
2984
2.04k
  assert(
2985
2.04k
      MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
2986
2.04k
      "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
2987
2.04k
  // Find the constant indices.
2988
6.27k
  for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; 
++i4.22k
) {
2989
4.22k
    // Look through copies.
2990
4.22k
    MachineInstr *ScalarDef =
2991
4.22k
        getDefIgnoringCopies(MaskDef->getOperand(i).getReg(), MRI);
2992
4.22k
    assert(ScalarDef && "Could not find vreg def of shufflevec index op");
2993
4.22k
    if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) {
2994
143
      // This be an undef if not a constant.
2995
143
      assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
2996
143
      Idxs.push_back(None);
2997
4.08k
    } else {
2998
4.08k
      Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
2999
4.08k
    }
3000
4.22k
  }
3001
2.04k
}
3002
3003
unsigned
3004
AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
3005
2.04k
                                                  MachineFunction &MF) const {
3006
2.04k
  Type *CPTy = CPVal->getType();
3007
2.04k
  unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
3008
2.04k
  if (Align == 0)
3009
0
    Align = MF.getDataLayout().getTypeAllocSize(CPTy);
3010
2.04k
3011
2.04k
  MachineConstantPool *MCP = MF.getConstantPool();
3012
2.04k
  return MCP->getConstantPoolIndex(CPVal, Align);
3013
2.04k
}
3014
3015
MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3016
2.04k
    Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3017
2.04k
  unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3018
2.04k
3019
2.04k
  auto Adrp =
3020
2.04k
      MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3021
2.04k
          .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3022
2.04k
3023
2.04k
  MachineInstr *LoadMI = nullptr;
3024
2.04k
  switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3025
2.04k
  case 16:
3026
175
    LoadMI =
3027
175
        &*MIRBuilder
3028
175
              .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3029
175
              .addConstantPoolIndex(CPIdx, 0,
3030
175
                                    AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3031
175
    break;
3032
2.04k
  case 8:
3033
1.86k
    LoadMI = &*MIRBuilder
3034
1.86k
                 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3035
1.86k
                 .addConstantPoolIndex(
3036
1.86k
                     CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3037
1.86k
    break;
3038
2.04k
  default:
3039
0
    LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3040
0
                      << *CPVal->getType());
3041
0
    return nullptr;
3042
2.04k
  }
3043
2.04k
  constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3044
2.04k
  constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3045
2.04k
  return LoadMI;
3046
2.04k
}
3047
3048
/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3049
/// size and RB.
3050
static std::pair<unsigned, unsigned>
3051
38.2k
getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3052
38.2k
  unsigned Opc, SubregIdx;
3053
38.2k
  if (RB.getID() == AArch64::GPRRegBankID) {
3054
24.8k
    if (EltSize == 32) {
3055
22.5k
      Opc = AArch64::INSvi32gpr;
3056
22.5k
      SubregIdx = AArch64::ssub;
3057
22.5k
    } else 
if (2.32k
EltSize == 642.32k
) {
3058
2.32k
      Opc = AArch64::INSvi64gpr;
3059
2.32k
      SubregIdx = AArch64::dsub;
3060
2.32k
    } else {
3061
0
      llvm_unreachable("invalid elt size!");
3062
0
    }
3063
13.3k
  } else {
3064
13.3k
    if (EltSize == 8) {
3065
0
      Opc = AArch64::INSvi8lane;
3066
0
      SubregIdx = AArch64::bsub;
3067
13.3k
    } else if (EltSize == 16) {
3068
1.89k
      Opc = AArch64::INSvi16lane;
3069
1.89k
      SubregIdx = AArch64::hsub;
3070
11.4k
    } else if (EltSize == 32) {
3071
8.81k
      Opc = AArch64::INSvi32lane;
3072
8.81k
      SubregIdx = AArch64::ssub;
3073
8.81k
    } else 
if (2.68k
EltSize == 642.68k
) {
3074
2.68k
      Opc = AArch64::INSvi64lane;
3075
2.68k
      SubregIdx = AArch64::dsub;
3076
2.68k
    } else {
3077
0
      llvm_unreachable("invalid elt size!");
3078
0
    }
3079
38.2k
  }
3080
38.2k
  return std::make_pair(Opc, SubregIdx);
3081
38.2k
}
3082
3083
MachineInstr *
3084
AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
3085
42
                                    MachineIRBuilder &MIRBuilder) const {
3086
42
  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3087
42
  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3088
42
  static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3089
42
                                       {AArch64::ADDSWrr, AArch64::ADDSWri}};
3090
42
  bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
3091
42
  auto ImmFns = selectArithImmed(RHS);
3092
42
  unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3093
42
  Register ZReg = Is32Bit ? 
AArch64::WZR36
:
AArch64::XZR6
;
3094
42
3095
42
  auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()});
3096
42
3097
42
  // If we matched a valid constant immediate, add those operands.
3098
42
  if (ImmFns) {
3099
1
    for (auto &RenderFn : *ImmFns)
3100
2
      RenderFn(CmpMI);
3101
41
  } else {
3102
41
    CmpMI.addUse(RHS.getReg());
3103
41
  }
3104
42
3105
42
  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3106
42
  return &*CmpMI;
3107
42
}
3108
3109
MachineInstr *
3110
AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
3111
7.10k
                                    MachineIRBuilder &MIRBuilder) const {
3112
7.10k
  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3113
7.10k
  unsigned RegSize = MRI.getType(LHS).getSizeInBits();
3114
7.10k
  bool Is32Bit = (RegSize == 32);
3115
7.10k
  static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
3116
7.10k
                                       {AArch64::ANDSWrr, AArch64::ANDSWri}};
3117
7.10k
  Register ZReg = Is32Bit ? 
AArch64::WZR4.17k
:
AArch64::XZR2.93k
;
3118
7.10k
3119
7.10k
  // We might be able to fold in an immediate into the TST. We need to make sure
3120
7.10k
  // it's a logical immediate though, since ANDS requires that.
3121
7.10k
  auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
3122
7.10k
  bool IsImmForm = ValAndVReg.hasValue() &&
3123
7.10k
                   
AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize)6.85k
;
3124
7.10k
  unsigned Opc = OpcTable[Is32Bit][IsImmForm];
3125
7.10k
  auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3126
7.10k
3127
7.10k
  if (IsImmForm)
3128
6.82k
    TstMI.addImm(
3129
6.82k
        AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
3130
286
  else
3131
286
    TstMI.addUse(RHS);
3132
7.10k
3133
7.10k
  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3134
7.10k
  return &*TstMI;
3135
7.10k
}
3136
3137
MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
3138
    MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3139
435k
    MachineIRBuilder &MIRBuilder) const {
3140
435k
  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3141
435k
  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3142
435k
3143
435k
  // Fold the compare if possible.
3144
435k
  MachineInstr *FoldCmp =
3145
435k
      tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
3146
435k
  if (FoldCmp)
3147
7.14k
    return FoldCmp;
3148
428k
3149
428k
  // Can't fold into a CMN. Just emit a normal compare.
3150
428k
  unsigned CmpOpc = 0;
3151
428k
  Register ZReg;
3152
428k
3153
428k
  LLT CmpTy = MRI.getType(LHS.getReg());
3154
428k
  assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
3155
428k
         "Expected scalar or pointer");
3156
428k
  if (CmpTy == LLT::scalar(32)) {
3157
259k
    CmpOpc = AArch64::SUBSWrr;
3158
259k
    ZReg = AArch64::WZR;
3159
259k
  } else 
if (169k
CmpTy == LLT::scalar(64)169k
||
CmpTy.isPointer()64.2k
) {
3160
169k
    CmpOpc = AArch64::SUBSXrr;
3161
169k
    ZReg = AArch64::XZR;
3162
169k
  } else {
3163
0
    return nullptr;
3164
0
  }
3165
428k
3166
428k
  // Try to match immediate forms.
3167
428k
  auto ImmFns = selectArithImmed(RHS);
3168
428k
  if (ImmFns)
3169
201k
    CmpOpc = CmpOpc == AArch64::SUBSWrr ? 
AArch64::SUBSWri148k
:
AArch64::SUBSXri53.5k
;
3170
428k
3171
428k
  auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
3172
428k
  // If we matched a valid constant immediate, add those operands.
3173
428k
  if (ImmFns) {
3174
201k
    for (auto &RenderFn : *ImmFns)
3175
403k
      RenderFn(CmpMI);
3176
226k
  } else {
3177
226k
    CmpMI.addUse(RHS.getReg());
3178
226k
  }
3179
428k
3180
428k
  // Make sure that we can constrain the compare that we emitted.
3181
428k
  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3182
428k
  return &*CmpMI;
3183
428k
}
3184
3185
MachineInstr *AArch64InstructionSelector::emitVectorConcat(
3186
    Optional<Register> Dst, Register Op1, Register Op2,
3187
1.87k
    MachineIRBuilder &MIRBuilder) const {
3188
1.87k
  // We implement a vector concat by:
3189
1.87k
  // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3190
1.87k
  // 2. Insert the upper vector into the destination's upper element
3191
1.87k
  // TODO: some of this code is common with G_BUILD_VECTOR handling.
3192
1.87k
  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3193
1.87k
3194
1.87k
  const LLT Op1Ty = MRI.getType(Op1);
3195
1.87k
  const LLT Op2Ty = MRI.getType(Op2);
3196
1.87k
3197
1.87k
  if (Op1Ty != Op2Ty) {
3198
0
    LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3199
0
    return nullptr;
3200
0
  }
3201
1.87k
  assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3202
1.87k
3203
1.87k
  if (Op1Ty.getSizeInBits() >= 128) {
3204
0
    LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3205
0
    return nullptr;
3206
0
  }
3207
1.87k
3208
1.87k
  // At the moment we just support 64 bit vector concats.
3209
1.87k
  if (Op1Ty.getSizeInBits() != 64) {
3210
0
    LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3211
0
    return nullptr;
3212
0
  }
3213
1.87k
3214
1.87k
  const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3215
1.87k
  const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3216
1.87k
  const TargetRegisterClass *DstRC =
3217
1.87k
      getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3218
1.87k
3219
1.87k
  MachineInstr *WidenedOp1 =
3220
1.87k
      emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3221
1.87k
  MachineInstr *WidenedOp2 =
3222
1.87k
      emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3223
1.87k
  if (!WidenedOp1 || !WidenedOp2) {
3224
0
    LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3225
0
    return nullptr;
3226
0
  }
3227
1.87k
3228
1.87k
  // Now do the insert of the upper element.
3229
1.87k
  unsigned InsertOpc, InsSubRegIdx;
3230
1.87k
  std::tie(InsertOpc, InsSubRegIdx) =
3231
1.87k
      getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3232
1.87k
3233
1.87k
  if (!Dst)
3234
1.86k
    Dst = MRI.createVirtualRegister(DstRC);
3235
1.87k
  auto InsElt =
3236
1.87k
      MIRBuilder
3237
1.87k
          .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
3238
1.87k
          .addImm(1) /* Lane index */
3239
1.87k
          .addUse(WidenedOp2->getOperand(0).getReg())
3240
1.87k
          .addImm(0);
3241
1.87k
  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3242
1.87k
  return &*InsElt;
3243
1.87k
}
3244
3245
MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3246
20.9k
    MachineInstr &I, MachineRegisterInfo &MRI) const {
3247
20.9k
  assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3248
20.9k
         "Expected a G_FCONSTANT!");
3249
20.9k
  MachineOperand &ImmOp = I.getOperand(1);
3250
20.9k
  unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3251
20.9k
3252
20.9k
  // Only handle 32 and 64 bit defs for now.
3253
20.9k
  if (DefSize != 32 && 
DefSize != 6411.8k
)
3254
0
    return nullptr;
3255
20.9k
3256
20.9k
  // Don't handle null values using FMOV.
3257
20.9k
  if (ImmOp.getFPImm()->isNullValue())
3258
0
    return nullptr;
3259
20.9k
3260
20.9k
  // Get the immediate representation for the FMOV.
3261
20.9k
  const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3262
20.9k
  int Imm = DefSize == 32 ? 
AArch64_AM::getFP32Imm(ImmValAPF)9.13k
3263
20.9k
                          : 
AArch64_AM::getFP64Imm(ImmValAPF)11.8k
;
3264
20.9k
3265
20.9k
  // If this is -1, it means the immediate can't be represented as the requested
3266
20.9k
  // floating point value. Bail.
3267
20.9k
  if (Imm == -1)
3268
14.3k
    return nullptr;
3269
6.58k
3270
6.58k
  // Update MI to represent the new FMOV instruction, constrain it, and return.
3271
6.58k
  ImmOp.ChangeToImmediate(Imm);
3272
6.58k
  unsigned MovOpc = DefSize == 32 ? 
AArch64::FMOVSi924
:
AArch64::FMOVDi5.66k
;
3273
6.58k
  I.setDesc(TII.get(MovOpc));
3274
6.58k
  constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3275
6.58k
  return &I;
3276
6.58k
}
3277
3278
MachineInstr *
3279
AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
3280
141k
                                     MachineIRBuilder &MIRBuilder) const {
3281
141k
  // CSINC increments the result when the predicate is false. Invert it.
3282
141k
  const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
3283
141k
      CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3284
141k
  auto I =
3285
141k
      MIRBuilder
3286
141k
    .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
3287
141k
          .addImm(InvCC);
3288
141k
  constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
3289
141k
  return &*I;
3290
141k
}
3291
3292
64.3k
bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3293
64.3k
  MachineIRBuilder MIB(I);
3294
64.3k
  MachineRegisterInfo &MRI = *MIB.getMRI();
3295
64.3k
  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3296
64.3k
3297
64.3k
  // We want to recognize this pattern:
3298
64.3k
  //
3299
64.3k
  // $z = G_FCMP pred, $x, $y
3300
64.3k
  // ...
3301
64.3k
  // $w = G_SELECT $z, $a, $b
3302
64.3k
  //
3303
64.3k
  // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3304
64.3k
  // some copies/truncs in between.)
3305
64.3k
  //
3306
64.3k
  // If we see this, then we can emit something like this:
3307
64.3k
  //
3308
64.3k
  // fcmp $x, $y
3309
64.3k
  // fcsel $w, $a, $b, pred
3310
64.3k
  //
3311
64.3k
  // Rather than emitting both of the rather long sequences in the standard
3312
64.3k
  // G_FCMP/G_SELECT select methods.
3313
64.3k
3314
64.3k
  // First, check if the condition is defined by a compare.
3315
64.3k
  MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3316
120k
  while (CondDef) {
3317
120k
    // We can only fold if all of the defs have one use.
3318
120k
    if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
3319
11.1k
      return false;
3320
109k
3321
109k
    // We can skip over G_TRUNC since the condition is 1-bit.
3322
109k
    // Truncating/extending can have no impact on the value.
3323
109k
    unsigned Opc = CondDef->getOpcode();
3324
109k
    if (Opc != TargetOpcode::COPY && 
Opc != TargetOpcode::G_TRUNC109k
)
3325
53.0k
      break;
3326
56.4k
3327
56.4k
    // Can't see past copies from physregs.
3328
56.4k
    if (Opc == TargetOpcode::COPY &&
3329
56.4k
        
TargetRegisterInfo::isPhysicalRegister(CondDef->getOperand(1).getReg())142
)
3330
130
      return false;
3331
56.3k
3332
56.3k
    CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
3333
56.3k
  }
3334
64.3k
3335
64.3k
  // Is the condition defined by a compare?
3336
64.3k
  
if (53.0k
!CondDef53.0k
)
3337
0
    return false;
3338
53.0k
3339
53.0k
  unsigned CondOpc = CondDef->getOpcode();
3340
53.0k
  if (CondOpc != TargetOpcode::G_ICMP && 
CondOpc != TargetOpcode::G_FCMP3.39k
)
3341
1.78k
    return false;
3342
51.2k
3343
51.2k
  AArch64CC::CondCode CondCode;
3344
51.2k
  if (CondOpc == TargetOpcode::G_ICMP) {
3345
49.6k
    CondCode = changeICMPPredToAArch64CC(
3346
49.6k
        (CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
3347
49.6k
    if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
3348
49.6k
                            CondDef->getOperand(1), MIB)) {
3349
0
      LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
3350
0
      return false;
3351
0
    }
3352
1.61k
  } else {
3353
1.61k
    // Get the condition code for the select.
3354
1.61k
    AArch64CC::CondCode CondCode2;
3355
1.61k
    changeFCMPPredToAArch64CC(
3356
1.61k
        (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
3357
1.61k
        CondCode2);
3358
1.61k
3359
1.61k
    // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
3360
1.61k
    // instructions to emit the comparison.
3361
1.61k
    // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
3362
1.61k
    // unnecessary.
3363
1.61k
    if (CondCode2 != AArch64CC::AL)
3364
8
      return false;
3365
1.60k
3366
1.60k
    // Make sure we'll be able to select the compare.
3367
1.60k
    unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
3368
1.60k
    if (!CmpOpc)
3369
0
      return false;
3370
1.60k
3371
1.60k
    // Emit a new compare.
3372
1.60k
    auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
3373
1.60k
    if (CmpOpc != AArch64::FCMPSri && 
CmpOpc != AArch64::FCMPDri1.43k
)
3374
1.29k
      Cmp.addUse(CondDef->getOperand(3).getReg());
3375
1.60k
    constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3376
1.60k
  }
3377
51.2k
3378
51.2k
  // Emit the select.
3379
51.2k
  unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
3380
51.2k
  auto CSel =
3381
51.2k
      MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
3382
51.2k
                     {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
3383
51.2k
          .addImm(CondCode);
3384
51.2k
  constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
3385
51.2k
  I.eraseFromParent();
3386
51.2k
  return true;
3387
51.2k
}
3388
3389
MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
3390
    MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3391
435k
    MachineIRBuilder &MIRBuilder) const {
3392
435k
  assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
3393
435k
         "Unexpected MachineOperand");
3394
435k
  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3395
435k
  // We want to find this sort of thing:
3396
435k
  // x = G_SUB 0, y
3397
435k
  // G_ICMP z, x
3398
435k
  //
3399
435k
  // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
3400
435k
  // e.g:
3401
435k
  //
3402
435k
  // cmn z, y
3403
435k
3404
435k
  // Helper lambda to detect the subtract followed by the compare.
3405
435k
  // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
3406
871k
  auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
3407
871k
    if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
3408
845k
      return false;
3409
26.2k
3410
26.2k
    // Need to make sure NZCV is the same at the end of the transformation.
3411
26.2k
    if (CC != AArch64CC::EQ && 
CC != AArch64CC::NE23.4k
)
3412
22.8k
      return false;
3413
3.43k
3414
3.43k
    // We want to match against SUBs.
3415
3.43k
    if (DefMI->getOpcode() != TargetOpcode::G_SUB)
3416
0
      return false;
3417
3.43k
3418
3.43k
    // Make sure that we're getting
3419
3.43k
    // x = G_SUB 0, y
3420
3.43k
    auto ValAndVReg =
3421
3.43k
        getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
3422
3.43k
    if (!ValAndVReg || 
ValAndVReg->Value != 090
)
3423
3.39k
      return false;
3424
42
3425
42
    // This can safely be represented as a CMN.
3426
42
    return true;
3427
42
  };
3428
435k
3429
435k
  // Check if the RHS or LHS of the G_ICMP is defined by a SUB
3430
435k
  MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
3431
435k
  MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
3432
435k
  CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
3433
435k
  const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
3434
435k
3435
435k
  // Given this:
3436
435k
  //
3437
435k
  // x = G_SUB 0, y
3438
435k
  // G_ICMP x, z
3439
435k
  //
3440
435k
  // Produce this:
3441
435k
  //
3442
435k
  // cmn y, z
3443
435k
  if (IsCMN(LHSDef, CC))
3444
9
    return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
3445
435k
3446
435k
  // Same idea here, but with the RHS of the compare instead:
3447
435k
  //
3448
435k
  // Given this:
3449
435k
  //
3450
435k
  // x = G_SUB 0, y
3451
435k
  // G_ICMP z, x
3452
435k
  //
3453
435k
  // Produce this:
3454
435k
  //
3455
435k
  // cmn z, y
3456
435k
  if (IsCMN(RHSDef, CC))
3457
33
    return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
3458
435k
3459
435k
  // Given this:
3460
435k
  //
3461
435k
  // z = G_AND x, y
3462
435k
  // G_ICMP z, 0
3463
435k
  //
3464
435k
  // Produce this if the compare is signed:
3465
435k
  //
3466
435k
  // tst x, y
3467
435k
  if (!isUnsignedICMPPred(P) && 
LHSDef359k
&&
3468
435k
      
LHSDef->getOpcode() == TargetOpcode::G_AND359k
) {
3469
15.0k
    // Make sure that the RHS is 0.
3470
15.0k
    auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
3471
15.0k
    if (!ValAndVReg || 
ValAndVReg->Value != 011.7k
)
3472
7.99k
      return nullptr;
3473
7.10k
3474
7.10k
    return emitTST(LHSDef->getOperand(1).getReg(),
3475
7.10k
                   LHSDef->getOperand(2).getReg(), MIRBuilder);
3476
7.10k
  }
3477
420k
3478
420k
  return nullptr;
3479
420k
}
3480
3481
3.60k
bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
3482
3.60k
  // Try to match a vector splat operation into a dup instruction.
3483
3.60k
  // We're looking for this pattern:
3484
3.60k
  //    %scalar:gpr(s64) = COPY $x0
3485
3.60k
  //    %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
3486
3.60k
  //    %cst0:gpr(s32) = G_CONSTANT i32 0
3487
3.60k
  //    %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
3488
3.60k
  //    %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
3489
3.60k
  //    %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
3490
3.60k
  //                                             %zerovec(<2 x s32>)
3491
3.60k
  //
3492
3.60k
  // ...into:
3493
3.60k
  // %splat = DUP %scalar
3494
3.60k
  // We use the regbank of the scalar to determine which kind of dup to use.
3495
3.60k
  MachineIRBuilder MIB(I);
3496
3.60k
  MachineRegisterInfo &MRI = *MIB.getMRI();
3497
3.60k
  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3498
3.60k
  using namespace TargetOpcode;
3499
3.60k
  using namespace MIPatternMatch;
3500
3.60k
3501
3.60k
  // Begin matching the insert.
3502
3.60k
  auto *InsMI =
3503
3.60k
      getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
3504
3.60k
  if (!InsMI)
3505
378
    return false;
3506
3.22k
  // Match the undef vector operand.
3507
3.22k
  auto *UndefMI =
3508
3.22k
      getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
3509
3.22k
  if (!UndefMI)
3510
1.65k
    return false;
3511
1.56k
  // Match the scalar being splatted.
3512
1.56k
  Register ScalarReg = InsMI->getOperand(2).getReg();
3513
1.56k
  const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
3514
1.56k
  // Match the index constant 0.
3515
1.56k
  int64_t Index = 0;
3516
1.56k
  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
3517
0
    return false;
3518
1.56k
3519
1.56k
  // The shuffle's second operand doesn't matter if the mask is all zero.
3520
1.56k
  auto *ZeroVec = getOpcodeDef(G_BUILD_VECTOR, I.getOperand(3).getReg(), MRI);
3521
1.56k
  if (!ZeroVec)
3522
0
    return false;
3523
1.56k
  int64_t Zero = 0;
3524
1.56k
  if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
3525
0
    return false;
3526
4.90k
  
for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; 1.56k
i < e;
++i3.34k
) {
3527
3.34k
    if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
3528
2
      return false; // This wasn't an all zeros vector.
3529
3.34k
  }
3530
1.56k
3531
1.56k
  // We're done, now find out what kind of splat we need.
3532
1.56k
  LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3533
1.56k
  LLT EltTy = VecTy.getElementType();
3534
1.56k
  if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
3535
0
    LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
3536
0
    return false;
3537
0
  }
3538
1.56k
  bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
3539
1.56k
  static const unsigned OpcTable[2][2] = {
3540
1.56k
      {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
3541
1.56k
      {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
3542
1.56k
  unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
3543
1.56k
3544
1.56k
  // For FP splats, we need to widen the scalar reg via undef too.
3545
1.56k
  if (IsFP) {
3546
1.01k
    MachineInstr *Widen = emitScalarToVector(
3547
1.01k
        EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
3548
1.01k
    if (!Widen)
3549
0
      return false;
3550
1.01k
    ScalarReg = Widen->getOperand(0).getReg();
3551
1.01k
  }
3552
1.56k
  auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3553
1.56k
  if (IsFP)
3554
1.01k
    Dup.addImm(0);
3555
1.56k
  constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3556
1.56k
  I.eraseFromParent();
3557
1.56k
  return true;
3558
1.56k
}
3559
3560
3.60k
bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3561
3.60k
  if (TM.getOptLevel() == CodeGenOpt::None)
3562
5
    return false;
3563
3.60k
  if (tryOptVectorDup(I))
3564
1.56k
    return true;
3565
2.03k
  return false;
3566
2.03k
}
3567
3568
bool AArch64InstructionSelector::selectShuffleVector(
3569
3.60k
    MachineInstr &I, MachineRegisterInfo &MRI) const {
3570
3.60k
  if (tryOptVectorShuffle(I))
3571
1.56k
    return true;
3572
2.04k
  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3573
2.04k
  Register Src1Reg = I.getOperand(1).getReg();
3574
2.04k
  const LLT Src1Ty = MRI.getType(Src1Reg);
3575
2.04k
  Register Src2Reg = I.getOperand(2).getReg();
3576
2.04k
  const LLT Src2Ty = MRI.getType(Src2Reg);
3577
2.04k
3578
2.04k
  MachineBasicBlock &MBB = *I.getParent();
3579
2.04k
  MachineFunction &MF = *MBB.getParent();
3580
2.04k
  LLVMContext &Ctx = MF.getFunction().getContext();
3581
2.04k
3582
2.04k
  // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
3583
2.04k
  // operand, it comes in as a normal vector value which we have to analyze to
3584
2.04k
  // find the mask indices. If the mask element is undef, then
3585
2.04k
  // collectShuffleMaskIndices() will add a None entry for that index into
3586
2.04k
  // the list.
3587
2.04k
  SmallVector<Optional<int>, 8> Mask;
3588
2.04k
  collectShuffleMaskIndices(I, MRI, Mask);
3589
2.04k
  assert(!Mask.empty() && "Expected to find mask indices");
3590
2.04k
3591
2.04k
  // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3592
2.04k
  // it's originated from a <1 x T> type. Those should have been lowered into
3593
2.04k
  // G_BUILD_VECTOR earlier.
3594
2.04k
  if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3595
0
    LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3596
0
    return false;
3597
0
  }
3598
2.04k
3599
2.04k
  unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3600
2.04k
3601
2.04k
  SmallVector<Constant *, 64> CstIdxs;
3602
4.22k
  for (auto &MaybeVal : Mask) {
3603
4.22k
    // For now, any undef indexes we'll just assume to be 0. This should be
3604
4.22k
    // optimized in future, e.g. to select DUP etc.
3605
4.22k
    int Val = MaybeVal.hasValue() ? 
*MaybeVal4.08k
:
0143
;
3606
21.9k
    for (unsigned Byte = 0; Byte < BytesPerElt; 
++Byte17.7k
) {
3607
17.7k
      unsigned Offset = Byte + Val * BytesPerElt;
3608
17.7k
      CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3609
17.7k
    }
3610
4.22k
  }
3611
2.04k
3612
2.04k
  MachineIRBuilder MIRBuilder(I);
3613
2.04k
3614
2.04k
  // Use a constant pool to load the index vector for TBL.
3615
2.04k
  Constant *CPVal = ConstantVector::get(CstIdxs);
3616
2.04k
  MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3617
2.04k
  if (!IndexLoad) {
3618
0
    LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3619
0
    return false;
3620
0
  }
3621
2.04k
3622
2.04k
  if (DstTy.getSizeInBits() != 128) {
3623
1.86k
    assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3624
1.86k
    // This case can be done with TBL1.
3625
1.86k
    MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
3626
1.86k
    if (!Concat) {
3627
0
      LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3628
0
      return false;
3629
0
    }
3630
1.86k
3631
1.86k
    // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3632
1.86k
    IndexLoad =
3633
1.86k
        emitScalarToVector(64, &AArch64::FPR128RegClass,
3634
1.86k
                           IndexLoad->getOperand(0).getReg(), MIRBuilder);
3635
1.86k
3636
1.86k
    auto TBL1 = MIRBuilder.buildInstr(
3637
1.86k
        AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3638
1.86k
        {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3639
1.86k
    constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
3640
1.86k
3641
1.86k
    auto Copy =
3642
1.86k
        MIRBuilder
3643
1.86k
            .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3644
1.86k
            .addReg(TBL1.getReg(0), 0, AArch64::dsub);
3645
1.86k
    RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3646
1.86k
    I.eraseFromParent();
3647
1.86k
    return true;
3648
1.86k
  }
3649
175
3650
175
  // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3651
175
  // Q registers for regalloc.
3652
175
  auto RegSeq = MIRBuilder
3653
175
                    .buildInstr(TargetOpcode::REG_SEQUENCE,
3654
175
                                {&AArch64::QQRegClass}, {Src1Reg})
3655
175
                    .addImm(AArch64::qsub0)
3656
175
                    .addUse(Src2Reg)
3657
175
                    .addImm(AArch64::qsub1);
3658
175
3659
175
  auto TBL2 =
3660
175
      MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3661
175
                            {RegSeq, IndexLoad->getOperand(0).getReg()});
3662
175
  constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3663
175
  constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
3664
175
  I.eraseFromParent();
3665
175
  return true;
3666
175
}
3667
3668
MachineInstr *AArch64InstructionSelector::emitLaneInsert(
3669
    Optional<Register> DstReg, Register SrcReg, Register EltReg,
3670
    unsigned LaneIdx, const RegisterBank &RB,
3671
36.3k
    MachineIRBuilder &MIRBuilder) const {
3672
36.3k
  MachineInstr *InsElt = nullptr;
3673
36.3k
  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3674
36.3k
  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3675
36.3k
3676
36.3k
  // Create a register to define with the insert if one wasn't passed in.
3677
36.3k
  if (!DstReg)
3678
36.3k
    DstReg = MRI.createVirtualRegister(DstRC);
3679
36.3k
3680
36.3k
  unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3681
36.3k
  unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3682
36.3k
3683
36.3k
  if (RB.getID() == AArch64::FPRRegBankID) {
3684
11.5k
    auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3685
11.5k
    InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3686
11.5k
                 .addImm(LaneIdx)
3687
11.5k
                 .addUse(InsSub->getOperand(0).getReg())
3688
11.5k
                 .addImm(0);
3689
24.8k
  } else {
3690
24.8k
    InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3691
24.8k
                 .addImm(LaneIdx)
3692
24.8k
                 .addUse(EltReg);
3693
24.8k
  }
3694
36.3k
3695
36.3k
  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3696
36.3k
  return InsElt;
3697
36.3k
}
3698
3699
bool AArch64InstructionSelector::selectInsertElt(
3700
2.89k
    MachineInstr &I, MachineRegisterInfo &MRI) const {
3701
2.89k
  assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3702
2.89k
3703
2.89k
  // Get information on the destination.
3704
2.89k
  Register DstReg = I.getOperand(0).getReg();
3705
2.89k
  const LLT DstTy = MRI.getType(DstReg);
3706
2.89k
  unsigned VecSize = DstTy.getSizeInBits();
3707
2.89k
3708
2.89k
  // Get information on the element we want to insert into the destination.
3709
2.89k
  Register EltReg = I.getOperand(2).getReg();
3710
2.89k
  const LLT EltTy = MRI.getType(EltReg);
3711
2.89k
  unsigned EltSize = EltTy.getSizeInBits();
3712
2.89k
  if (EltSize < 16 || EltSize > 64)
3713
0
    return false; // Don't support all element types yet.
3714
2.89k
3715
2.89k
  // Find the definition of the index. Bail out if it's not defined by a
3716
2.89k
  // G_CONSTANT.
3717
2.89k
  Register IdxReg = I.getOperand(3).getReg();
3718
2.89k
  auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3719
2.89k
  if (!VRegAndVal)
3720
0
    return false;
3721
2.89k
  unsigned LaneIdx = VRegAndVal->Value;
3722
2.89k
3723
2.89k
  // Perform the lane insert.
3724
2.89k
  Register SrcReg = I.getOperand(1).getReg();
3725
2.89k
  const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3726
2.89k
  MachineIRBuilder MIRBuilder(I);
3727
2.89k
3728
2.89k
  if (VecSize < 128) {
3729
2.74k
    // If the vector we're inserting into is smaller than 128 bits, widen it
3730
2.74k
    // to 128 to do the insert.
3731
2.74k
    MachineInstr *ScalarToVec = emitScalarToVector(
3732
2.74k
        VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3733
2.74k
    if (!ScalarToVec)
3734
0
      return false;
3735
2.74k
    SrcReg = ScalarToVec->getOperand(0).getReg();
3736
2.74k
  }
3737
2.89k
3738
2.89k
  // Create an insert into a new FPR128 register.
3739
2.89k
  // Note that if our vector is already 128 bits, we end up emitting an extra
3740
2.89k
  // register.
3741
2.89k
  MachineInstr *InsMI =
3742
2.89k
      emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3743
2.89k
3744
2.89k
  if (VecSize < 128) {
3745
2.74k
    // If we had to widen to perform the insert, then we have to demote back to
3746
2.74k
    // the original size to get the result we want.
3747
2.74k
    Register DemoteVec = InsMI->getOperand(0).getReg();
3748
2.74k
    const TargetRegisterClass *RC =
3749
2.74k
        getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3750
2.74k
    if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3751
0
      LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3752
0
      return false;
3753
0
    }
3754
2.74k
    unsigned SubReg = 0;
3755
2.74k
    if (!getSubRegForClass(RC, TRI, SubReg))
3756
0
      return false;
3757
2.74k
    if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3758
0
      LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3759
0
                        << "\n");
3760
0
      return false;
3761
0
    }
3762
2.74k
    MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3763
2.74k
        .addReg(DemoteVec, 0, SubReg);
3764
2.74k
    RBI.constrainGenericRegister(DstReg, *RC, MRI);
3765
2.74k
  } else {
3766
143
    // No widening needed.
3767
143
    InsMI->getOperand(0).setReg(DstReg);
3768
143
    constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3769
143
  }
3770
2.89k
3771
2.89k
  I.eraseFromParent();
3772
2.89k
  return true;
3773
2.89k
}
3774
3775
bool AArch64InstructionSelector::selectBuildVector(
3776
18.4k
    MachineInstr &I, MachineRegisterInfo &MRI) const {
3777
18.4k
  assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3778
18.4k
  // Until we port more of the optimized selections, for now just use a vector
3779
18.4k
  // insert sequence.
3780
18.4k
  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3781
18.4k
  const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3782
18.4k
  unsigned EltSize = EltTy.getSizeInBits();
3783
18.4k
  if (EltSize < 16 || EltSize > 64)
3784
0
    return false; // Don't support all element types yet.
3785
18.4k
  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3786
18.4k
  MachineIRBuilder MIRBuilder(I);
3787
18.4k
3788
18.4k
  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3789
18.4k
  MachineInstr *ScalarToVec =
3790
18.4k
      emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3791
18.4k
                         I.getOperand(1).getReg(), MIRBuilder);
3792
18.4k
  if (!ScalarToVec)
3793
0
    return false;
3794
18.4k
3795
18.4k
  Register DstVec = ScalarToVec->getOperand(0).getReg();
3796
18.4k
  unsigned DstSize = DstTy.getSizeInBits();
3797
18.4k
3798
18.4k
  // Keep track of the last MI we inserted. Later on, we might be able to save
3799
18.4k
  // a copy using it.
3800
18.4k
  MachineInstr *PrevMI = nullptr;
3801
51.9k
  for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; 
++i33.5k
) {
3802
33.5k
    // Note that if we don't do a subregister copy, we can end up making an
3803
33.5k
    // extra register.
3804
33.5k
    PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3805
33.5k
                              MIRBuilder);
3806
33.5k
    DstVec = PrevMI->getOperand(0).getReg();
3807
33.5k
  }
3808
18.4k
3809
18.4k
  // If DstTy's size in bits is less than 128, then emit a subregister copy
3810
18.4k
  // from DstVec to the last register we've defined.
3811
18.4k
  if (DstSize < 128) {
3812
8.48k
    // Force this to be FPR using the destination vector.
3813
8.48k
    const TargetRegisterClass *RC =
3814
8.48k
        getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
3815
8.48k
    if (!RC)
3816
0
      return false;
3817
8.48k
    if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3818
0
      LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3819
0
      return false;
3820
0
    }
3821
8.48k
3822
8.48k
    unsigned SubReg = 0;
3823
8.48k
    if (!getSubRegForClass(RC, TRI, SubReg))
3824
0
      return false;
3825
8.48k
    if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3826
0
      LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3827
0
                        << "\n");
3828
0
      return false;
3829
0
    }
3830
8.48k
3831
8.48k
    Register Reg = MRI.createVirtualRegister(RC);
3832
8.48k
    Register DstReg = I.getOperand(0).getReg();
3833
8.48k
3834
8.48k
    MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3835
8.48k
        .addReg(DstVec, 0, SubReg);
3836
8.48k
    MachineOperand &RegOp = I.getOperand(1);
3837
8.48k
    RegOp.setReg(Reg);
3838
8.48k
    RBI.constrainGenericRegister(DstReg, *RC, MRI);
3839
9.98k
  } else {
3840
9.98k
    // We don't need a subregister copy. Save a copy by re-using the
3841
9.98k
    // destination register on the final insert.
3842
9.98k
    assert(PrevMI && "PrevMI was null?");
3843
9.98k
    PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3844
9.98k
    constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3845
9.98k
  }
3846
18.4k
3847
18.4k
  I.eraseFromParent();
3848
18.4k
  return true;
3849
18.4k
}
3850
3851
/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
3852
/// ID if it exists, and 0 otherwise.
3853
9.59k
static unsigned findIntrinsicID(MachineInstr &I) {
3854
19.2k
  auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3855
19.2k
    return Op.isIntrinsicID();
3856
19.2k
  });
3857
9.59k
  if (IntrinOp == I.operands_end())
3858
0
    return 0;
3859
9.59k
  return IntrinOp->getIntrinsicID();
3860
9.59k
}
3861
3862
/// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
3863
/// intrinsic.
3864
4.20k
static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
3865
4.20k
  switch (NumBytesToStore) {
3866
4.20k
  // TODO: 1, 2, and 4 byte stores.
3867
4.20k
  case 8:
3868
4.20k
    return AArch64::STLXRX;
3869
4.20k
  default:
3870
6
    LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
3871
6
                      << NumBytesToStore << ")\n");
3872
6
    break;
3873
6
  }
3874
6
  return 0;
3875
6
}
3876
3877
bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3878
8.93k
    MachineInstr &I, MachineRegisterInfo &MRI) const {
3879
8.93k
  // Find the intrinsic ID.
3880
8.93k
  unsigned IntrinID = findIntrinsicID(I);
3881
8.93k
  if (!IntrinID)
3882
0
    return false;
3883
8.93k
  MachineIRBuilder MIRBuilder(I);
3884
8.93k
3885
8.93k
  // Select the instruction.
3886
8.93k
  switch (IntrinID) {
3887
8.93k
  default:
3888
4.60k
    return false;
3889
8.93k
  case Intrinsic::trap:
3890
123
    MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3891
123
    break;
3892
8.93k
  case Intrinsic::debugtrap:
3893
1
    if (!STI.isTargetWindows())
3894
0
      return false;
3895
1
    MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
3896
1
    break;
3897
4.20k
  case Intrinsic::aarch64_stlxr:
3898
4.20k
    Register StatReg = I.getOperand(0).getReg();
3899
4.20k
    assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
3900
4.20k
           "Status register must be 32 bits!");
3901
4.20k
    Register SrcReg = I.getOperand(2).getReg();
3902
4.20k
3903
4.20k
    if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
3904
0
      LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
3905
0
      return false;
3906
0
    }
3907
4.20k
3908
4.20k
    Register PtrReg = I.getOperand(3).getReg();
3909
4.20k
    assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
3910
4.20k
3911
4.20k
    // Expect only one memory operand.
3912
4.20k
    if (!I.hasOneMemOperand())
3913
0
      return false;
3914
4.20k
3915
4.20k
    const MachineMemOperand *MemOp = *I.memoperands_begin();
3916
4.20k
    unsigned NumBytesToStore = MemOp->getSize();
3917
4.20k
    unsigned Opc = getStlxrOpcode(NumBytesToStore);
3918
4.20k
    if (!Opc)
3919
6
      return false;
3920
4.20k
3921
4.20k
    auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg});
3922
4.20k
    constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
3923
8.93k
  }
3924
8.93k
3925
8.93k
  I.eraseFromParent();
3926
4.32k
  return true;
3927
8.93k
}
3928
3929
bool AArch64InstructionSelector::selectIntrinsic(
3930
655
    MachineInstr &I, MachineRegisterInfo &MRI) const {
3931
655
  unsigned IntrinID = findIntrinsicID(I);
3932
655
  if (!IntrinID)
3933
0
    return false;
3934
655
  MachineIRBuilder MIRBuilder(I);
3935
655
3936
655
  switch (IntrinID) {
3937
655
  default:
3938
654
    break;
3939
655
  case Intrinsic::aarch64_crypto_sha1h:
3940
1
    Register DstReg = I.getOperand(0).getReg();
3941
1
    Register SrcReg = I.getOperand(2).getReg();
3942
1
3943
1
    // FIXME: Should this be an assert?
3944
1
    if (MRI.getType(DstReg).getSizeInBits() != 32 ||
3945
1
        MRI.getType(SrcReg).getSizeInBits() != 32)
3946
0
      return false;
3947
1
3948
1
    // The operation has to happen on FPRs. Set up some new FPR registers for
3949
1
    // the source and destination if they are on GPRs.
3950
1
    if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3951
1
      SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3952
1
      MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
3953
1
3954
1
      // Make sure the copy ends up getting constrained properly.
3955
1
      RBI.constrainGenericRegister(I.getOperand(2).getReg(),
3956
1
                                   AArch64::GPR32RegClass, MRI);
3957
1
    }
3958
1
3959
1
    if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
3960
1
      DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3961
1
3962
1
    // Actually insert the instruction.
3963
1
    auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
3964
1
    constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
3965
1
3966
1
    // Did we create a new register for the destination?
3967
1
    if (DstReg != I.getOperand(0).getReg()) {
3968
1
      // Yep. Copy the result of the instruction back into the original
3969
1
      // destination.
3970
1
      MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
3971
1
      RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3972
1
                                   AArch64::GPR32RegClass, MRI);
3973
1
    }
3974
1
3975
1
    I.eraseFromParent();
3976
1
    return true;
3977
654
  }
3978
654
  return false;
3979
654
}
3980
3981
1.07M
static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
3982
1.07M
  auto &MI = *Root.getParent();
3983
1.07M
  auto &MBB = *MI.getParent();
3984
1.07M
  auto &MF = *MBB.getParent();
3985
1.07M
  auto &MRI = MF.getRegInfo();
3986
1.07M
  uint64_t Immed;
3987
1.07M
  if (Root.isImm())
3988
0
    Immed = Root.getImm();
3989
1.07M
  else if (Root.isCImm())
3990
0
    Immed = Root.getCImm()->getZExtValue();
3991
1.07M
  else if (Root.isReg()) {
3992
1.07M
    auto ValAndVReg =
3993
1.07M
        getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
3994
1.07M
    if (!ValAndVReg)
3995
584k
      return None;
3996
491k
    Immed = ValAndVReg->Value;
3997
491k
  } else
3998
0
    return None;
3999
491k
  return Immed;
4000
491k
}
4001
4002
InstructionSelector::ComplexRendererFns
4003
24.6k
AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4004
24.6k
  auto MaybeImmed = getImmedFromMO(Root);
4005
24.6k
  if (MaybeImmed == None || *MaybeImmed > 31)
4006
0
    return None;
4007
24.6k
  uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4008
24.6k
  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4009
24.6k
}
4010
4011
InstructionSelector::ComplexRendererFns
4012
24.6k
AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4013
24.6k
  auto MaybeImmed = getImmedFromMO(Root);
4014
24.6k
  if (MaybeImmed == None || *MaybeImmed > 31)
4015
0
    return None;
4016
24.6k
  uint64_t Enc = 31 - *MaybeImmed;
4017
24.6k
  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4018
24.6k
}
4019
4020
InstructionSelector::ComplexRendererFns
4021
18.9k
AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4022
18.9k
  auto MaybeImmed = getImmedFromMO(Root);
4023
18.9k
  if (MaybeImmed == None || *MaybeImmed > 63)
4024
0
    return None;
4025
18.9k
  uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4026
18.9k
  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4027
18.9k
}
4028
4029
InstructionSelector::ComplexRendererFns
4030
18.9k
AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4031
18.9k
  auto MaybeImmed = getImmedFromMO(Root);
4032
18.9k
  if (MaybeImmed == None || *MaybeImmed > 63)
4033
0
    return None;
4034
18.9k
  uint64_t Enc = 63 - *MaybeImmed;
4035
18.9k
  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4036
18.9k
}
4037
4038
/// SelectArithImmed - Select an immediate value that can be represented as
4039
/// a 12-bit value shifted left by either 0 or 12.  If so, return true with
4040
/// Val set to the 12-bit value and Shift set to the shifter operand.
4041
InstructionSelector::ComplexRendererFns
4042
988k
AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
4043
988k
  // This function is called from the addsub_shifted_imm ComplexPattern,
4044
988k
  // which lists [imm] as the list of opcode it's interested in, however
4045
988k
  // we still need to check whether the operand is actually an immediate
4046
988k
  // here because the ComplexPattern opcode list is only used in
4047
988k
  // root-level opcode matching.
4048
988k
  auto MaybeImmed = getImmedFromMO(Root);
4049
988k
  if (MaybeImmed == None)
4050
584k
    return None;
4051
404k
  uint64_t Immed = *MaybeImmed;
4052
404k
  unsigned ShiftAmt;
4053
404k
4054
404k
  if (Immed >> 12 == 0) {
4055
312k
    ShiftAmt = 0;
4056
312k
  } else 
if (91.8k
(Immed & 0xfff) == 091.8k
&&
Immed >> 24 == 07.30k
) {
4057
1.44k
    ShiftAmt = 12;
4058
1.44k
    Immed = Immed >> 12;
4059
1.44k
  } else
4060
90.4k
    return None;
4061
313k
4062
313k
  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
4063
313k
  return {{
4064
313k
      [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
4065
313k
      [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
4066
313k
  }};
4067
313k
}
4068
4069
/// Return true if it is worth folding MI into an extended register. That is,
4070
/// if it's safe to pull it into the addressing mode of a load or store as a
4071
/// shift.
4072
bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4073
48.8k
    MachineInstr &MI, const MachineRegisterInfo &MRI) const {
4074
48.8k
  // Always fold if there is one use, or if we're optimizing for size.
4075
48.8k
  Register DefReg = MI.getOperand(0).getReg();
4076
48.8k
  if (MRI.hasOneUse(DefReg) ||
4077
48.8k
      
MI.getParent()->getParent()->getFunction().hasMinSize()13.8k
)
4078
35.0k
    return true;
4079
13.8k
4080
13.8k
  // It's better to avoid folding and recomputing shifts when we don't have a
4081
13.8k
  // fastpath.
4082
13.8k
  if (!STI.hasLSLFast())
4083
13.8k
    return false;
4084
1
4085
1
  // We have a fastpath, so folding a shift in and potentially computing it
4086
1
  // many times may be beneficial. Check if this is only used in memory ops.
4087
1
  // If it is, then we should fold.
4088
1
  return all_of(MRI.use_instructions(DefReg),
4089
2
                [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
4090
1
}
4091
4092
/// This is used for computing addresses like this:
4093
///
4094
/// ldr x1, [x2, x3, lsl #3]
4095
///
4096
/// Where x2 is the base register, and x3 is an offset register. The shift-left
4097
/// is a constant value specific to this load instruction. That is, we'll never
4098
/// see anything other than a 3 here (which corresponds to the size of the
4099
/// element being loaded.)
4100
InstructionSelector::ComplexRendererFns
4101
AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4102
262k
    MachineOperand &Root, unsigned SizeInBytes) const {
4103
262k
  if (!Root.isReg())
4104
0
    return None;
4105
262k
  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4106
262k
4107
262k
  // Make sure that the memory op is a valid size.
4108
262k
  int64_t LegalShiftVal = Log2_32(SizeInBytes);
4109
262k
  if (LegalShiftVal == 0)
4110
0
    return None;
4111
262k
4112
262k
  // We want to find something like this:
4113
262k
  //
4114
262k
  // val = G_CONSTANT LegalShiftVal
4115
262k
  // shift = G_SHL off_reg val
4116
262k
  // ptr = G_GEP base_reg shift
4117
262k
  // x = G_LOAD ptr
4118
262k
  //
4119
262k
  // And fold it into this addressing mode:
4120
262k
  //
4121
262k
  // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
4122
262k
4123
262k
  // Check if we can find the G_GEP.
4124
262k
  MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_GEP, Root.getReg(), MRI);
4125
262k
  if (!Gep || 
!isWorthFoldingIntoExtendedReg(*Gep, MRI)48.4k
)
4126
228k
    return None;
4127
34.7k
4128
34.7k
  // Now try to match the G_SHL.
4129
34.7k
  MachineInstr *Shl =
4130
34.7k
      getOpcodeDef(TargetOpcode::G_SHL, Gep->getOperand(2).getReg(), MRI);
4131
34.7k
  if (!Shl || 
!isWorthFoldingIntoExtendedReg(*Shl, MRI)389
)
4132
34.5k
    return None;
4133
227
4134
227
  // Now, try to find the specific G_CONSTANT.
4135
227
  auto ValAndVReg =
4136
227
      getConstantVRegValWithLookThrough(Shl->getOperand(2).getReg(), MRI);
4137
227
  if (!ValAndVReg)
4138
0
    return None;
4139
227
4140
227
  // The value must fit into 3 bits, and must be positive. Make sure that is
4141
227
  // true.
4142
227
  int64_t ImmVal = ValAndVReg->Value;
4143
227
  if ((ImmVal & 0x7) != ImmVal)
4144
0
    return None;
4145
227
4146
227
  // We are only allowed to shift by LegalShiftVal. This shift value is built
4147
227
  // into the instruction, so we can't just use whatever we want.
4148
227
  if (ImmVal != LegalShiftVal)
4149
1
    return None;
4150
226
4151
226
  // We can use the LHS of the GEP as the base, and the LHS of the shift as an
4152
226
  // offset. Signify that we are shifting by setting the shift flag to 1.
4153
226
  return {{
4154
226
      [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
4155
226
      [=](MachineInstrBuilder &MIB) { MIB.add(Shl->getOperand(1)); },
4156
226
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4157
226
      [=](MachineInstrBuilder &MIB) { MIB.addImm(1); },
4158
226
  }};
4159
226
}
4160
4161
/// This is used for computing addresses like this:
4162
///
4163
/// ldr x1, [x2, x3]
4164
///
4165
/// Where x2 is the base register, and x3 is an offset register.
4166
///
4167
/// When possible (or profitable) to fold a G_GEP into the address calculation,
4168
/// this will do so. Otherwise, it will return None.
4169
InstructionSelector::ComplexRendererFns
4170
AArch64InstructionSelector::selectAddrModeRegisterOffset(
4171
262k
    MachineOperand &Root) const {
4172
262k
  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4173
262k
4174
262k
  // We need a GEP.
4175
262k
  MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
4176
262k
  if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP)
4177
214k
    return None;
4178
48.2k
4179
48.2k
  // If this is used more than once, let's not bother folding.
4180
48.2k
  // TODO: Check if they are memory ops. If they are, then we can still fold
4181
48.2k
  // without having to recompute anything.
4182
48.2k
  if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
4183
13.6k
    return None;
4184
34.5k
4185
34.5k
  // Base is the GEP's LHS, offset is its RHS.
4186
34.5k
  return {{
4187
34.5k
      [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
4188
34.5k
      [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(2)); },
4189
34.5k
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4190
34.5k
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4191
34.5k
  }};
4192
34.5k
}
4193
4194
/// This is intended to be equivalent to selectAddrModeXRO in
4195
/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
4196
InstructionSelector::ComplexRendererFns
4197
AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
4198
592k
                                              unsigned SizeInBytes) const {
4199
592k
  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4200
592k
4201
592k
  // If we have a constant offset, then we probably don't want to match a
4202
592k
  // register offset.
4203
592k
  if (isBaseWithConstantOffset(Root, MRI))
4204
329k
    return None;
4205
262k
4206
262k
  // Try to fold shifts into the addressing mode.
4207
262k
  auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
4208
262k
  if (AddrModeFns)
4209
226
    return AddrModeFns;
4210
262k
4211
262k
  // If that doesn't work, see if it's possible to fold in registers from
4212
262k
  // a GEP.
4213
262k
  return selectAddrModeRegisterOffset(Root);
4214
262k
}
4215
4216
/// Select a "register plus unscaled signed 9-bit immediate" address.  This
4217
/// should only match when there is an offset that is not valid for a scaled
4218
/// immediate addressing mode.  The "Size" argument is the size in bytes of the
4219
/// memory reference, which is needed here to know what is valid for a scaled
4220
/// immediate.
4221
InstructionSelector::ComplexRendererFns
4222
AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
4223
317k
                                                   unsigned Size) const {
4224
317k
  MachineRegisterInfo &MRI =
4225
317k
      Root.getParent()->getParent()->getParent()->getRegInfo();
4226
317k
4227
317k
  if (!Root.isReg())
4228
0
    return None;
4229
317k
4230
317k
  if (!isBaseWithConstantOffset(Root, MRI))
4231
200k
    return None;
4232
117k
4233
117k
  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4234
117k
  if (!RootDef)
4235
0
    return None;
4236
117k
4237
117k
  MachineOperand &OffImm = RootDef->getOperand(2);
4238
117k
  if (!OffImm.isReg())
4239
0
    return None;
4240
117k
  MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
4241
117k
  if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
4242
0
    return None;
4243
117k
  int64_t RHSC;
4244
117k
  MachineOperand &RHSOp1 = RHS->getOperand(1);
4245
117k
  if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
4246
0
    return None;
4247
117k
  RHSC = RHSOp1.getCImm()->getSExtValue();
4248
117k
4249
117k
  // If the offset is valid as a scaled immediate, don't match here.
4250
117k
  if ((RHSC & (Size - 1)) == 0 && 
RHSC >= 035.9k
&&
RHSC < (0x1000 << Log2_32(Size))730
)
4251
0
    return None;
4252
117k
  if (RHSC >= -256 && 
RHSC < 256117k
) {
4253
116k
    MachineOperand &Base = RootDef->getOperand(1);
4254
116k
    return {{
4255
116k
        [=](MachineInstrBuilder &MIB) 
{ MIB.add(Base); }52.1k
,
4256
116k
        [=](MachineInstrBuilder &MIB) 
{ MIB.addImm(RHSC); }52.1k
,
4257
116k
    }};
4258
116k
  }
4259
1.17k
  return None;
4260
1.17k
}
4261
4262
/// Select a "register plus scaled unsigned 12-bit immediate" address.  The
4263
/// "Size" argument is the size in bytes of the memory reference, which
4264
/// determines the scale.
4265
InstructionSelector::ComplexRendererFns
4266
AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
4267
829k
                                                  unsigned Size) const {
4268
829k
  MachineRegisterInfo &MRI =
4269
829k
      Root.getParent()->getParent()->getParent()->getRegInfo();
4270
829k
4271
829k
  if (!Root.isReg())
4272
0
    return None;
4273
829k
4274
829k
  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4275
829k
  if (!RootDef)
4276
0
    return None;
4277
829k
4278
829k
  if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
4279
34.9k
    return {{
4280
34.9k
        [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
4281
34.9k
        [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4282
34.9k
    }};
4283
34.9k
  }
4284
794k
4285
794k
  if (isBaseWithConstantOffset(Root, MRI)) {
4286
594k
    MachineOperand &LHS = RootDef->getOperand(1);
4287
594k
    MachineOperand &RHS = RootDef->getOperand(2);
4288
594k
    MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
4289
594k
    MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
4290
594k
    if (LHSDef && RHSDef) {
4291
594k
      int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
4292
594k
      unsigned Scale = Log2_32(Size);
4293
594k
      if ((RHSC & (Size - 1)) == 0 && 
RHSC >= 0549k
&&
RHSC < (0x1000 << Scale)529k
) {
4294
528k
        if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
4295
40.7k
          return {{
4296
40.7k
              [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
4297
40.7k
              [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4298
40.7k
          }};
4299
487k
4300
487k
        return {{
4301
487k
            [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
4302
487k
            [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4303
487k
        }};
4304
487k
      }
4305
594k
    }
4306
594k
  }
4307
265k
4308
265k
  // Before falling back to our general case, check if the unscaled
4309
265k
  // instructions can handle this. If so, that's preferable.
4310
265k
  if (selectAddrModeUnscaled(Root, Size).hasValue())
4311
64.5k
    return None;
4312
201k
4313
201k
  return {{
4314
201k
      [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
4315
201k
      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4316
201k
  }};
4317
201k
}
4318
4319
void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
4320
4.02k
                                                const MachineInstr &MI) const {
4321
4.02k
  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4322
4.02k
  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4323
4.02k
  Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
4324
4.02k
  assert(CstVal && "Expected constant value");
4325
4.02k
  MIB.addImm(CstVal.getValue());
4326
4.02k
}
4327
4328
namespace llvm {
4329
InstructionSelector *
4330
createAArch64InstructionSelector(const AArch64TargetMachine &TM,
4331
                                 AArch64Subtarget &Subtarget,
4332
9.10k
                                 AArch64RegisterBankInfo &RBI) {
4333
9.10k
  return new AArch64InstructionSelector(TM, Subtarget, RBI);
4334
9.10k
}
4335
}