Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains the AArch64 implementation of the TargetInstrInfo class.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "AArch64InstrInfo.h"
14
#include "AArch64MachineFunctionInfo.h"
15
#include "AArch64Subtarget.h"
16
#include "MCTargetDesc/AArch64AddressingModes.h"
17
#include "Utils/AArch64BaseInfo.h"
18
#include "llvm/ADT/ArrayRef.h"
19
#include "llvm/ADT/STLExtras.h"
20
#include "llvm/ADT/SmallVector.h"
21
#include "llvm/CodeGen/MachineBasicBlock.h"
22
#include "llvm/CodeGen/MachineFrameInfo.h"
23
#include "llvm/CodeGen/MachineFunction.h"
24
#include "llvm/CodeGen/MachineInstr.h"
25
#include "llvm/CodeGen/MachineInstrBuilder.h"
26
#include "llvm/CodeGen/MachineMemOperand.h"
27
#include "llvm/CodeGen/MachineOperand.h"
28
#include "llvm/CodeGen/MachineRegisterInfo.h"
29
#include "llvm/CodeGen/MachineModuleInfo.h"
30
#include "llvm/CodeGen/StackMaps.h"
31
#include "llvm/CodeGen/TargetRegisterInfo.h"
32
#include "llvm/CodeGen/TargetSubtargetInfo.h"
33
#include "llvm/IR/DebugLoc.h"
34
#include "llvm/IR/GlobalValue.h"
35
#include "llvm/MC/MCInst.h"
36
#include "llvm/MC/MCInstrDesc.h"
37
#include "llvm/Support/Casting.h"
38
#include "llvm/Support/CodeGen.h"
39
#include "llvm/Support/CommandLine.h"
40
#include "llvm/Support/Compiler.h"
41
#include "llvm/Support/ErrorHandling.h"
42
#include "llvm/Support/MathExtras.h"
43
#include "llvm/Target/TargetMachine.h"
44
#include "llvm/Target/TargetOptions.h"
45
#include <cassert>
46
#include <cstdint>
47
#include <iterator>
48
#include <utility>
49
50
using namespace llvm;
51
52
#define GET_INSTRINFO_CTOR_DTOR
53
#include "AArch64GenInstrInfo.inc"
54
55
static cl::opt<unsigned> TBZDisplacementBits(
56
    "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
57
    cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
58
59
static cl::opt<unsigned> CBZDisplacementBits(
60
    "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
61
    cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
62
63
static cl::opt<unsigned>
64
    BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
65
                        cl::desc("Restrict range of Bcc instructions (DEBUG)"));
66
67
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
68
    : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
69
                          AArch64::CATCHRET),
70
9.10k
      RI(STI.getTargetTriple()), Subtarget(STI) {}
71
72
/// GetInstSize - Return the number of bytes of code the specified
73
/// instruction may be.  This returns the maximum number of bytes.
74
52.7M
unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
75
52.7M
  const MachineBasicBlock &MBB = *MI.getParent();
76
52.7M
  const MachineFunction *MF = MBB.getParent();
77
52.7M
  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
78
52.7M
79
52.7M
  {
80
52.7M
    auto Op = MI.getOpcode();
81
52.7M
    if (Op == AArch64::INLINEASM || 
Op == AArch64::INLINEASM_BR52.7M
)
82
5.22k
      return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
83
52.7M
  }
84
52.7M
85
52.7M
  // FIXME: We currently only handle pseudoinstructions that don't get expanded
86
52.7M
  //        before the assembly printer.
87
52.7M
  unsigned NumBytes = 0;
88
52.7M
  const MCInstrDesc &Desc = MI.getDesc();
89
52.7M
  switch (Desc.getOpcode()) {
90
52.7M
  default:
91
52.4M
    // Anything not explicitly designated otherwise is a normal 4-byte insn.
92
52.4M
    NumBytes = 4;
93
52.4M
    break;
94
52.7M
  case TargetOpcode::DBG_VALUE:
95
332k
  case TargetOpcode::EH_LABEL:
96
332k
  case TargetOpcode::IMPLICIT_DEF:
97
332k
  case TargetOpcode::KILL:
98
332k
    NumBytes = 0;
99
332k
    break;
100
332k
  case TargetOpcode::STACKMAP:
101
47
    // The upper bound for a stackmap intrinsic is the full length of its shadow
102
47
    NumBytes = StackMapOpers(&MI).getNumPatchBytes();
103
47
    assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
104
47
    break;
105
332k
  case TargetOpcode::PATCHPOINT:
106
143
    // The size of the patchpoint intrinsic is the number of bytes requested
107
143
    NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
108
143
    assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
109
143
    break;
110
332k
  case AArch64::TLSDESC_CALLSEQ:
111
84
    // This gets lowered to an instruction sequence which takes 16 bytes
112
84
    NumBytes = 16;
113
84
    break;
114
332k
  case AArch64::JumpTableDest32:
115
7.22k
  case AArch64::JumpTableDest16:
116
7.22k
  case AArch64::JumpTableDest8:
117
7.22k
    NumBytes = 12;
118
7.22k
    break;
119
7.22k
  case AArch64::SPACE:
120
12
    NumBytes = MI.getOperand(1).getImm();
121
12
    break;
122
52.7M
  }
123
52.7M
124
52.7M
  return NumBytes;
125
52.7M
}
126
127
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
128
31.5M
                            SmallVectorImpl<MachineOperand> &Cond) {
129
31.5M
  // Block ends with fall-through condbranch.
130
31.5M
  switch (LastInst->getOpcode()) {
131
31.5M
  default:
132
0
    llvm_unreachable("Unknown branch instruction?");
133
31.5M
  case AArch64::Bcc:
134
15.0M
    Target = LastInst->getOperand(1).getMBB();
135
15.0M
    Cond.push_back(LastInst->getOperand(0));
136
15.0M
    break;
137
31.5M
  case AArch64::CBZW:
138
13.6M
  case AArch64::CBZX:
139
13.6M
  case AArch64::CBNZW:
140
13.6M
  case AArch64::CBNZX:
141
13.6M
    Target = LastInst->getOperand(1).getMBB();
142
13.6M
    Cond.push_back(MachineOperand::CreateImm(-1));
143
13.6M
    Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
144
13.6M
    Cond.push_back(LastInst->getOperand(0));
145
13.6M
    break;
146
13.6M
  case AArch64::TBZW:
147
2.86M
  case AArch64::TBZX:
148
2.86M
  case AArch64::TBNZW:
149
2.86M
  case AArch64::TBNZX:
150
2.86M
    Target = LastInst->getOperand(2).getMBB();
151
2.86M
    Cond.push_back(MachineOperand::CreateImm(-1));
152
2.86M
    Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
153
2.86M
    Cond.push_back(LastInst->getOperand(0));
154
2.86M
    Cond.push_back(LastInst->getOperand(1));
155
31.5M
  }
156
31.5M
}
157
158
1.41M
static unsigned getBranchDisplacementBits(unsigned Opc) {
159
1.41M
  switch (Opc) {
160
1.41M
  default:
161
0
    llvm_unreachable("unexpected opcode!");
162
1.41M
  case AArch64::B:
163
292k
    return 64;
164
1.41M
  case AArch64::TBNZW:
165
87.0k
  case AArch64::TBZW:
166
87.0k
  case AArch64::TBNZX:
167
87.0k
  case AArch64::TBZX:
168
87.0k
    return TBZDisplacementBits;
169
476k
  case AArch64::CBNZW:
170
476k
  case AArch64::CBZW:
171
476k
  case AArch64::CBNZX:
172
476k
  case AArch64::CBZX:
173
476k
    return CBZDisplacementBits;
174
558k
  case AArch64::Bcc:
175
558k
    return BCCDisplacementBits;
176
1.41M
  }
177
1.41M
}
178
179
bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
180
1.41M
                                             int64_t BrOffset) const {
181
1.41M
  unsigned Bits = getBranchDisplacementBits(BranchOp);
182
1.41M
  assert(Bits >= 3 && "max branch displacement must be enough to jump"
183
1.41M
                      "over conditional branch expansion");
184
1.41M
  return isIntN(Bits, BrOffset / 4);
185
1.41M
}
186
187
MachineBasicBlock *
188
1.47M
AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
189
1.47M
  switch (MI.getOpcode()) {
190
1.47M
  default:
191
0
    llvm_unreachable("unexpected opcode!");
192
1.47M
  case AArch64::B:
193
292k
    return MI.getOperand(0).getMBB();
194
1.47M
  case AArch64::TBZW:
195
87.4k
  case AArch64::TBNZW:
196
87.4k
  case AArch64::TBZX:
197
87.4k
  case AArch64::TBNZX:
198
87.4k
    return MI.getOperand(2).getMBB();
199
1.09M
  case AArch64::CBZW:
200
1.09M
  case AArch64::CBNZW:
201
1.09M
  case AArch64::CBZX:
202
1.09M
  case AArch64::CBNZX:
203
1.09M
  case AArch64::Bcc:
204
1.09M
    return MI.getOperand(1).getMBB();
205
1.47M
  }
206
1.47M
}
207
208
// Branch analysis.
209
bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
210
                                     MachineBasicBlock *&TBB,
211
                                     MachineBasicBlock *&FBB,
212
                                     SmallVectorImpl<MachineOperand> &Cond,
213
55.6M
                                     bool AllowModify) const {
214
55.6M
  // If the block has no terminators, it just falls into the block after it.
215
55.6M
  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
216
55.6M
  if (I == MBB.end())
217
351k
    return false;
218
55.3M
219
55.3M
  if (!isUnpredicatedTerminator(*I))
220
12.5M
    return false;
221
42.8M
222
42.8M
  // Get the last instruction in the block.
223
42.8M
  MachineInstr *LastInst = &*I;
224
42.8M
225
42.8M
  // If there is only one terminator instruction, process it.
226
42.8M
  unsigned LastOpc = LastInst->getOpcode();
227
42.8M
  if (I == MBB.begin() || 
!isUnpredicatedTerminator(*--I)39.5M
) {
228
38.1M
    if (isUncondBranchOpcode(LastOpc)) {
229
6.93M
      TBB = LastInst->getOperand(0).getMBB();
230
6.93M
      return false;
231
6.93M
    }
232
31.2M
    if (isCondBranchOpcode(LastOpc)) {
233
26.9M
      // Block ends with fall-through condbranch.
234
26.9M
      parseCondBranch(LastInst, TBB, Cond);
235
26.9M
      return false;
236
26.9M
    }
237
4.29M
    return true; // Can't handle indirect branch.
238
4.29M
  }
239
4.67M
240
4.67M
  // Get the instruction before it if it is a terminator.
241
4.67M
  MachineInstr *SecondLastInst = &*I;
242
4.67M
  unsigned SecondLastOpc = SecondLastInst->getOpcode();
243
4.67M
244
4.67M
  // If AllowModify is true and the block ends with two or more unconditional
245
4.67M
  // branches, delete all but the first unconditional branch.
246
4.67M
  if (AllowModify && 
isUncondBranchOpcode(LastOpc)2.49M
) {
247
2.49M
    while (isUncondBranchOpcode(SecondLastOpc)) {
248
6.02k
      LastInst->eraseFromParent();
249
6.02k
      LastInst = SecondLastInst;
250
6.02k
      LastOpc = LastInst->getOpcode();
251
6.02k
      if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
252
6.02k
        // Return now the only terminator is an unconditional branch.
253
6.02k
        TBB = LastInst->getOperand(0).getMBB();
254
6.02k
        return false;
255
6.02k
      } else {
256
0
        SecondLastInst = &*I;
257
0
        SecondLastOpc = SecondLastInst->getOpcode();
258
0
      }
259
6.02k
    }
260
2.49M
  }
261
4.67M
262
4.67M
  // If there are three terminators, we don't know what sort of block this is.
263
4.67M
  
if (4.66M
SecondLastInst4.66M
&&
I != MBB.begin()4.66M
&&
isUnpredicatedTerminator(*--I)4.35M
)
264
1.50k
    return true;
265
4.66M
266
4.66M
  // If the block ends with a B and a Bcc, handle it.
267
4.66M
  if (isCondBranchOpcode(SecondLastOpc) && 
isUncondBranchOpcode(LastOpc)4.66M
) {
268
4.66M
    parseCondBranch(SecondLastInst, TBB, Cond);
269
4.66M
    FBB = LastInst->getOperand(0).getMBB();
270
4.66M
    return false;
271
4.66M
  }
272
367
273
367
  // If the block ends with two unconditional branches, handle it.  The second
274
367
  // one is not executed, so remove it.
275
367
  if (isUncondBranchOpcode(SecondLastOpc) && 
isUncondBranchOpcode(LastOpc)0
) {
276
0
    TBB = SecondLastInst->getOperand(0).getMBB();
277
0
    I = LastInst;
278
0
    if (AllowModify)
279
0
      I->eraseFromParent();
280
0
    return false;
281
0
  }
282
367
283
367
  // ...likewise if it ends with an indirect branch followed by an unconditional
284
367
  // branch.
285
367
  if (isIndirectBranchOpcode(SecondLastOpc) && 
isUncondBranchOpcode(LastOpc)0
) {
286
0
    I = LastInst;
287
0
    if (AllowModify)
288
0
      I->eraseFromParent();
289
0
    return true;
290
0
  }
291
367
292
367
  // Otherwise, can't handle this.
293
367
  return true;
294
367
}
295
296
bool AArch64InstrInfo::reverseBranchCondition(
297
4.37M
    SmallVectorImpl<MachineOperand> &Cond) const {
298
4.37M
  if (Cond[0].getImm() != -1) {
299
2.19M
    // Regular Bcc
300
2.19M
    AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
301
2.19M
    Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
302
2.19M
  } else {
303
2.18M
    // Folded compare-and-branch
304
2.18M
    switch (Cond[1].getImm()) {
305
2.18M
    default:
306
0
      llvm_unreachable("Unknown conditional branch!");
307
2.18M
    case AArch64::CBZW:
308
374k
      Cond[1].setImm(AArch64::CBNZW);
309
374k
      break;
310
2.18M
    case AArch64::CBNZW:
311
356k
      Cond[1].setImm(AArch64::CBZW);
312
356k
      break;
313
2.18M
    case AArch64::CBZX:
314
510k
      Cond[1].setImm(AArch64::CBNZX);
315
510k
      break;
316
2.18M
    case AArch64::CBNZX:
317
496k
      Cond[1].setImm(AArch64::CBZX);
318
496k
      break;
319
2.18M
    case AArch64::TBZW:
320
204k
      Cond[1].setImm(AArch64::TBNZW);
321
204k
      break;
322
2.18M
    case AArch64::TBNZW:
323
233k
      Cond[1].setImm(AArch64::TBZW);
324
233k
      break;
325
2.18M
    case AArch64::TBZX:
326
2.45k
      Cond[1].setImm(AArch64::TBNZX);
327
2.45k
      break;
328
2.18M
    case AArch64::TBNZX:
329
2.33k
      Cond[1].setImm(AArch64::TBZX);
330
2.33k
      break;
331
4.37M
    }
332
4.37M
  }
333
4.37M
334
4.37M
  return false;
335
4.37M
}
336
337
unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
338
6.78M
                                        int *BytesRemoved) const {
339
6.78M
  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
340
6.78M
  if (I == MBB.end())
341
1.43k
    return 0;
342
6.77M
343
6.77M
  if (!isUncondBranchOpcode(I->getOpcode()) &&
344
6.77M
      
!isCondBranchOpcode(I->getOpcode())4.16M
)
345
132k
    return 0;
346
6.64M
347
6.64M
  // Remove the branch.
348
6.64M
  I->eraseFromParent();
349
6.64M
350
6.64M
  I = MBB.end();
351
6.64M
352
6.64M
  if (I == MBB.begin()) {
353
481k
    if (BytesRemoved)
354
2
      *BytesRemoved = 4;
355
481k
    return 1;
356
481k
  }
357
6.16M
  --I;
358
6.16M
  if (!isCondBranchOpcode(I->getOpcode())) {
359
5.10M
    if (BytesRemoved)
360
5
      *BytesRemoved = 4;
361
5.10M
    return 1;
362
5.10M
  }
363
1.06M
364
1.06M
  // Remove the branch.
365
1.06M
  I->eraseFromParent();
366
1.06M
  if (BytesRemoved)
367
2
    *BytesRemoved = 8;
368
1.06M
369
1.06M
  return 2;
370
1.06M
}
371
372
void AArch64InstrInfo::instantiateCondBranch(
373
    MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
374
5.08M
    ArrayRef<MachineOperand> Cond) const {
375
5.08M
  if (Cond[0].getImm() != -1) {
376
2.61M
    // Regular Bcc
377
2.61M
    BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
378
2.61M
  } else {
379
2.47M
    // Folded compare-and-branch
380
2.47M
    // Note that we use addOperand instead of addReg to keep the flags.
381
2.47M
    const MachineInstrBuilder MIB =
382
2.47M
        BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
383
2.47M
    if (Cond.size() > 3)
384
518k
      MIB.addImm(Cond[3].getImm());
385
2.47M
    MIB.addMBB(TBB);
386
2.47M
  }
387
5.08M
}
388
389
unsigned AArch64InstrInfo::insertBranch(
390
    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
391
7.02M
    ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
392
7.02M
  // Shouldn't be a fall through.
393
7.02M
  assert(TBB && "insertBranch must not be told to insert a fallthrough");
394
7.02M
395
7.02M
  if (!FBB) {
396
6.76M
    if (Cond.empty()) // Unconditional branch?
397
1.93M
      BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
398
4.82M
    else
399
4.82M
      instantiateCondBranch(MBB, DL, TBB, Cond);
400
6.76M
401
6.76M
    if (BytesAdded)
402
2
      *BytesAdded = 4;
403
6.76M
404
6.76M
    return 1;
405
6.76M
  }
406
255k
407
255k
  // Two-way conditional branch.
408
255k
  instantiateCondBranch(MBB, DL, TBB, Cond);
409
255k
  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
410
255k
411
255k
  if (BytesAdded)
412
9
    *BytesAdded = 8;
413
255k
414
255k
  return 2;
415
255k
}
416
417
// Find the original register that VReg is copied from.
418
235k
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
419
359k
  while (TargetRegisterInfo::isVirtualRegister(VReg)) {
420
298k
    const MachineInstr *DefMI = MRI.getVRegDef(VReg);
421
298k
    if (!DefMI->isFullCopy())
422
175k
      return VReg;
423
123k
    VReg = DefMI->getOperand(1).getReg();
424
123k
  }
425
235k
  
return VReg60.8k
;
426
235k
}
427
428
// Determine if VReg is defined by an instruction that can be folded into a
429
// csel instruction. If so, return the folded opcode, and the replacement
430
// register.
431
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
432
235k
                                unsigned *NewVReg = nullptr) {
433
235k
  VReg = removeCopies(MRI, VReg);
434
235k
  if (!TargetRegisterInfo::isVirtualRegister(VReg))
435
60.7k
    return 0;
436
174k
437
174k
  bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
438
174k
  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
439
174k
  unsigned Opc = 0;
440
174k
  unsigned SrcOpNum = 0;
441
174k
  switch (DefMI->getOpcode()) {
442
174k
  case AArch64::ADDSXri:
443
0
  case AArch64::ADDSWri:
444
0
    // if NZCV is used, do not fold.
445
0
    if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
446
0
      return 0;
447
0
    // fall-through to ADDXri and ADDWri.
448
0
    LLVM_FALLTHROUGH;
449
2.99k
  case AArch64::ADDXri:
450
2.99k
  case AArch64::ADDWri:
451
2.99k
    // add x, 1 -> csinc.
452
2.99k
    if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
453
2.99k
        
DefMI->getOperand(3).getImm() != 01.90k
)
454
1.08k
      return 0;
455
1.90k
    SrcOpNum = 1;
456
1.90k
    Opc = Is64Bit ? 
AArch64::CSINCXr394
:
AArch64::CSINCWr1.51k
;
457
1.90k
    break;
458
1.90k
459
1.90k
  case AArch64::ORNXrr:
460
36
  case AArch64::ORNWrr: {
461
36
    // not x -> csinv, represented as orn dst, xzr, src.
462
36
    unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
463
36
    if (ZReg != AArch64::XZR && 
ZReg != AArch64::WZR31
)
464
0
      return 0;
465
36
    SrcOpNum = 2;
466
36
    Opc = Is64Bit ? 
AArch64::CSINVXr5
:
AArch64::CSINVWr31
;
467
36
    break;
468
36
  }
469
36
470
7.77k
  case AArch64::SUBSXrr:
471
7.77k
  case AArch64::SUBSWrr:
472
7.77k
    // if NZCV is used, do not fold.
473
7.77k
    if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
474
7.39k
      return 0;
475
383
    // fall-through to SUBXrr and SUBWrr.
476
383
    LLVM_FALLTHROUGH;
477
383
  case AArch64::SUBXrr:
478
383
  case AArch64::SUBWrr: {
479
383
    // neg x -> csneg, represented as sub dst, xzr, src.
480
383
    unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
481
383
    if (ZReg != AArch64::XZR && 
ZReg != AArch64::WZR371
)
482
363
      return 0;
483
20
    SrcOpNum = 2;
484
20
    Opc = Is64Bit ? 
AArch64::CSNEGXr12
:
AArch64::CSNEGWr8
;
485
20
    break;
486
20
  }
487
163k
  default:
488
163k
    return 0;
489
1.96k
  }
490
1.96k
  assert(Opc && SrcOpNum && "Missing parameters");
491
1.96k
492
1.96k
  if (NewVReg)
493
211
    *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
494
1.96k
  return Opc;
495
1.96k
}
496
497
bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
498
                                       ArrayRef<MachineOperand> Cond,
499
                                       unsigned TrueReg, unsigned FalseReg,
500
                                       int &CondCycles, int &TrueCycles,
501
113k
                                       int &FalseCycles) const {
502
113k
  // Check register classes.
503
113k
  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
504
113k
  const TargetRegisterClass *RC =
505
113k
      RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
506
113k
  if (!RC)
507
209
    return false;
508
113k
509
113k
  // Expanding cbz/tbz requires an extra cycle of latency on the condition.
510
113k
  unsigned ExtraCondLat = Cond.size() != 1;
511
113k
512
113k
  // GPRs are handled by csel.
513
113k
  // FIXME: Fold in x+1, -x, and ~x when applicable.
514
113k
  if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
515
113k
      
AArch64::GPR32allRegClass.hasSubClassEq(RC)52.8k
) {
516
110k
    // Single-cycle csel, csinc, csinv, and csneg.
517
110k
    CondCycles = 1 + ExtraCondLat;
518
110k
    TrueCycles = FalseCycles = 1;
519
110k
    if (canFoldIntoCSel(MRI, TrueReg))
520
946
      TrueCycles = 0;
521
109k
    else if (canFoldIntoCSel(MRI, FalseReg))
522
804
      FalseCycles = 0;
523
110k
    return true;
524
110k
  }
525
3.45k
526
3.45k
  // Scalar floating point is handled by fcsel.
527
3.45k
  // FIXME: Form fabs, fmin, and fmax when applicable.
528
3.45k
  if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
529
3.45k
      
AArch64::FPR32RegClass.hasSubClassEq(RC)1.17k
) {
530
3.44k
    CondCycles = 5 + ExtraCondLat;
531
3.44k
    TrueCycles = FalseCycles = 2;
532
3.44k
    return true;
533
3.44k
  }
534
15
535
15
  // Can't do vectors.
536
15
  return false;
537
15
}
538
539
void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
540
                                    MachineBasicBlock::iterator I,
541
                                    const DebugLoc &DL, unsigned DstReg,
542
                                    ArrayRef<MachineOperand> Cond,
543
8.25k
                                    unsigned TrueReg, unsigned FalseReg) const {
544
8.25k
  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
545
8.25k
546
8.25k
  // Parse the condition code, see parseCondBranch() above.
547
8.25k
  AArch64CC::CondCode CC;
548
8.25k
  switch (Cond.size()) {
549
8.25k
  default:
550
0
    llvm_unreachable("Unknown condition opcode in Cond");
551
8.25k
  case 1: // b.cc
552
5.92k
    CC = AArch64CC::CondCode(Cond[0].getImm());
553
5.92k
    break;
554
8.25k
  case 3: { // cbz/cbnz
555
1.77k
    // We must insert a compare against 0.
556
1.77k
    bool Is64Bit;
557
1.77k
    switch (Cond[1].getImm()) {
558
1.77k
    default:
559
0
      llvm_unreachable("Unknown branch opcode in Cond");
560
1.77k
    case AArch64::CBZW:
561
1.57k
      Is64Bit = false;
562
1.57k
      CC = AArch64CC::EQ;
563
1.57k
      break;
564
1.77k
    case AArch64::CBZX:
565
43
      Is64Bit = true;
566
43
      CC = AArch64CC::EQ;
567
43
      break;
568
1.77k
    case AArch64::CBNZW:
569
124
      Is64Bit = false;
570
124
      CC = AArch64CC::NE;
571
124
      break;
572
1.77k
    case AArch64::CBNZX:
573
35
      Is64Bit = true;
574
35
      CC = AArch64CC::NE;
575
35
      break;
576
1.77k
    }
577
1.77k
    unsigned SrcReg = Cond[2].getReg();
578
1.77k
    if (Is64Bit) {
579
78
      // cmp reg, #0 is actually subs xzr, reg, #0.
580
78
      MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
581
78
      BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
582
78
          .addReg(SrcReg)
583
78
          .addImm(0)
584
78
          .addImm(0);
585
1.69k
    } else {
586
1.69k
      MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
587
1.69k
      BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
588
1.69k
          .addReg(SrcReg)
589
1.69k
          .addImm(0)
590
1.69k
          .addImm(0);
591
1.69k
    }
592
1.77k
    break;
593
1.77k
  }
594
1.77k
  case 4: { // tbz/tbnz
595
553
    // We must insert a tst instruction.
596
553
    switch (Cond[1].getImm()) {
597
553
    default:
598
0
      llvm_unreachable("Unknown branch opcode in Cond");
599
553
    case AArch64::TBZW:
600
105
    case AArch64::TBZX:
601
105
      CC = AArch64CC::EQ;
602
105
      break;
603
448
    case AArch64::TBNZW:
604
448
    case AArch64::TBNZX:
605
448
      CC = AArch64CC::NE;
606
448
      break;
607
553
    }
608
553
    // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
609
553
    if (Cond[1].getImm() == AArch64::TBZW || 
Cond[1].getImm() == AArch64::TBNZW465
)
610
533
      BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
611
533
          .addReg(Cond[2].getReg())
612
533
          .addImm(
613
533
              AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
614
20
    else
615
20
      BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
616
20
          .addReg(Cond[2].getReg())
617
20
          .addImm(
618
20
              AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
619
553
    break;
620
553
  }
621
8.25k
  }
622
8.25k
623
8.25k
  unsigned Opc = 0;
624
8.25k
  const TargetRegisterClass *RC = nullptr;
625
8.25k
  bool TryFold = false;
626
8.25k
  if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
627
3.49k
    RC = &AArch64::GPR64RegClass;
628
3.49k
    Opc = AArch64::CSELXr;
629
3.49k
    TryFold = true;
630
4.76k
  } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
631
4.50k
    RC = &AArch64::GPR32RegClass;
632
4.50k
    Opc = AArch64::CSELWr;
633
4.50k
    TryFold = true;
634
4.50k
  } else 
if (258
MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)258
) {
635
91
    RC = &AArch64::FPR64RegClass;
636
91
    Opc = AArch64::FCSELDrrr;
637
167
  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
638
167
    RC = &AArch64::FPR32RegClass;
639
167
    Opc = AArch64::FCSELSrrr;
640
167
  }
641
8.25k
  assert(RC && "Unsupported regclass");
642
8.25k
643
8.25k
  // Try folding simple instructions into the csel.
644
8.25k
  if (TryFold) {
645
8.00k
    unsigned NewVReg = 0;
646
8.00k
    unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
647
8.00k
    if (FoldedOpc) {
648
73
      // The folded opcodes csinc, csinc and csneg apply the operation to
649
73
      // FalseReg, so we need to invert the condition.
650
73
      CC = AArch64CC::getInvertedCondCode(CC);
651
73
      TrueReg = FalseReg;
652
73
    } else
653
7.92k
      FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
654
8.00k
655
8.00k
    // Fold the operation. Leave any dead instructions for DCE to clean up.
656
8.00k
    if (FoldedOpc) {
657
211
      FalseReg = NewVReg;
658
211
      Opc = FoldedOpc;
659
211
      // The extends the live range of NewVReg.
660
211
      MRI.clearKillFlags(NewVReg);
661
211
    }
662
8.00k
  }
663
8.25k
664
8.25k
  // Pull all virtual register into the appropriate class.
665
8.25k
  MRI.constrainRegClass(TrueReg, RC);
666
8.25k
  MRI.constrainRegClass(FalseReg, RC);
667
8.25k
668
8.25k
  // Insert the csel.
669
8.25k
  BuildMI(MBB, I, DL, get(Opc), DstReg)
670
8.25k
      .addReg(TrueReg)
671
8.25k
      .addReg(FalseReg)
672
8.25k
      .addImm(CC);
673
8.25k
}
674
675
/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
676
5
static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
677
5
  uint64_t Imm = MI.getOperand(1).getImm();
678
5
  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
679
5
  uint64_t Encoding;
680
5
  return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
681
5
}
682
683
// FIXME: this implementation should be micro-architecture dependent, so a
684
// micro-architecture target hook should be introduced here in future.
685
2.94M
bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
686
2.94M
  if (!Subtarget.hasCustomCheapAsMoveHandling())
687
2.94M
    return MI.isAsCheapAsAMove();
688
1.39k
689
1.39k
  const unsigned Opcode = MI.getOpcode();
690
1.39k
691
1.39k
  // Firstly, check cases gated by features.
692
1.39k
693
1.39k
  if (Subtarget.hasZeroCycleZeroingFP()) {
694
1.22k
    if (Opcode == AArch64::FMOVH0 ||
695
1.22k
        Opcode == AArch64::FMOVS0 ||
696
1.22k
        
Opcode == AArch64::FMOVD01.21k
)
697
24
      return true;
698
1.36k
  }
699
1.36k
700
1.36k
  if (Subtarget.hasZeroCycleZeroingGP()) {
701
391
    if (Opcode == TargetOpcode::COPY &&
702
391
        
(0
MI.getOperand(1).getReg() == AArch64::WZR0
||
703
0
         MI.getOperand(1).getReg() == AArch64::XZR))
704
0
      return true;
705
1.36k
  }
706
1.36k
707
1.36k
  // Secondly, check cases specific to sub-targets.
708
1.36k
709
1.36k
  if (Subtarget.hasExynosCheapAsMoveHandling()) {
710
832
    if (isExynosCheapAsMove(MI))
711
46
      return true;
712
786
713
786
    return MI.isAsCheapAsAMove();
714
786
  }
715
535
716
535
  // Finally, check generic cases.
717
535
718
535
  switch (Opcode) {
719
535
  default:
720
510
    return false;
721
535
722
535
  // add/sub on register without shift
723
535
  case AArch64::ADDWri:
724
20
  case AArch64::ADDXri:
725
20
  case AArch64::SUBWri:
726
20
  case AArch64::SUBXri:
727
20
    return (MI.getOperand(3).getImm() == 0);
728
20
729
20
  // logical ops on immediate
730
20
  case AArch64::ANDWri:
731
0
  case AArch64::ANDXri:
732
0
  case AArch64::EORWri:
733
0
  case AArch64::EORXri:
734
0
  case AArch64::ORRWri:
735
0
  case AArch64::ORRXri:
736
0
    return true;
737
0
738
0
  // logical ops on register without shift
739
0
  case AArch64::ANDWrr:
740
0
  case AArch64::ANDXrr:
741
0
  case AArch64::BICWrr:
742
0
  case AArch64::BICXrr:
743
0
  case AArch64::EONWrr:
744
0
  case AArch64::EONXrr:
745
0
  case AArch64::EORWrr:
746
0
  case AArch64::EORXrr:
747
0
  case AArch64::ORNWrr:
748
0
  case AArch64::ORNXrr:
749
0
  case AArch64::ORRWrr:
750
0
  case AArch64::ORRXrr:
751
0
    return true;
752
0
753
0
  // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
754
0
  // ORRXri, it is as cheap as MOV
755
5
  case AArch64::MOVi32imm:
756
5
    return canBeExpandedToORR(MI, 32);
757
0
  case AArch64::MOVi64imm:
758
0
    return canBeExpandedToORR(MI, 64);
759
0
  }
760
0
761
0
  llvm_unreachable("Unknown opcode to check as cheap as a move!");
762
0
}
763
764
112
bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
765
112
  switch (MI.getOpcode()) {
766
112
  default:
767
0
    return false;
768
112
769
112
  case AArch64::ADDWrs:
770
6
  case AArch64::ADDXrs:
771
6
  case AArch64::ADDSWrs:
772
6
  case AArch64::ADDSXrs: {
773
6
    unsigned Imm = MI.getOperand(3).getImm();
774
6
    unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
775
6
    if (ShiftVal == 0)
776
6
      return true;
777
0
    return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
778
0
  }
779
0
780
27
  case AArch64::ADDWrx:
781
27
  case AArch64::ADDXrx:
782
27
  case AArch64::ADDXrx64:
783
27
  case AArch64::ADDSWrx:
784
27
  case AArch64::ADDSXrx:
785
27
  case AArch64::ADDSXrx64: {
786
27
    unsigned Imm = MI.getOperand(3).getImm();
787
27
    switch (AArch64_AM::getArithExtendType(Imm)) {
788
27
    default:
789
0
      return false;
790
27
    case AArch64_AM::UXTB:
791
27
    case AArch64_AM::UXTH:
792
27
    case AArch64_AM::UXTW:
793
27
    case AArch64_AM::UXTX:
794
27
      return AArch64_AM::getArithShiftValue(Imm) <= 4;
795
0
    }
796
0
  }
797
0
798
10
  case AArch64::SUBWrs:
799
10
  case AArch64::SUBSWrs: {
800
10
    unsigned Imm = MI.getOperand(3).getImm();
801
10
    unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
802
10
    return ShiftVal == 0 ||
803
10
           
(0
AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR0
&&
ShiftVal == 310
);
804
10
  }
805
10
806
10
  case AArch64::SUBXrs:
807
0
  case AArch64::SUBSXrs: {
808
0
    unsigned Imm = MI.getOperand(3).getImm();
809
0
    unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
810
0
    return ShiftVal == 0 ||
811
0
           (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
812
0
  }
813
0
814
20
  case AArch64::SUBWrx:
815
20
  case AArch64::SUBXrx:
816
20
  case AArch64::SUBXrx64:
817
20
  case AArch64::SUBSWrx:
818
20
  case AArch64::SUBSXrx:
819
20
  case AArch64::SUBSXrx64: {
820
20
    unsigned Imm = MI.getOperand(3).getImm();
821
20
    switch (AArch64_AM::getArithExtendType(Imm)) {
822
20
    default:
823
0
      return false;
824
20
    case AArch64_AM::UXTB:
825
20
    case AArch64_AM::UXTH:
826
20
    case AArch64_AM::UXTW:
827
20
    case AArch64_AM::UXTX:
828
20
      return AArch64_AM::getArithShiftValue(Imm) == 0;
829
0
    }
830
0
  }
831
0
832
49
  case AArch64::LDRBBroW:
833
49
  case AArch64::LDRBBroX:
834
49
  case AArch64::LDRBroW:
835
49
  case AArch64::LDRBroX:
836
49
  case AArch64::LDRDroW:
837
49
  case AArch64::LDRDroX:
838
49
  case AArch64::LDRHHroW:
839
49
  case AArch64::LDRHHroX:
840
49
  case AArch64::LDRHroW:
841
49
  case AArch64::LDRHroX:
842
49
  case AArch64::LDRQroW:
843
49
  case AArch64::LDRQroX:
844
49
  case AArch64::LDRSBWroW:
845
49
  case AArch64::LDRSBWroX:
846
49
  case AArch64::LDRSBXroW:
847
49
  case AArch64::LDRSBXroX:
848
49
  case AArch64::LDRSHWroW:
849
49
  case AArch64::LDRSHWroX:
850
49
  case AArch64::LDRSHXroW:
851
49
  case AArch64::LDRSHXroX:
852
49
  case AArch64::LDRSWroW:
853
49
  case AArch64::LDRSWroX:
854
49
  case AArch64::LDRSroW:
855
49
  case AArch64::LDRSroX:
856
49
  case AArch64::LDRWroW:
857
49
  case AArch64::LDRWroX:
858
49
  case AArch64::LDRXroW:
859
49
  case AArch64::LDRXroX:
860
49
  case AArch64::PRFMroW:
861
49
  case AArch64::PRFMroX:
862
49
  case AArch64::STRBBroW:
863
49
  case AArch64::STRBBroX:
864
49
  case AArch64::STRBroW:
865
49
  case AArch64::STRBroX:
866
49
  case AArch64::STRDroW:
867
49
  case AArch64::STRDroX:
868
49
  case AArch64::STRHHroW:
869
49
  case AArch64::STRHHroX:
870
49
  case AArch64::STRHroW:
871
49
  case AArch64::STRHroX:
872
49
  case AArch64::STRQroW:
873
49
  case AArch64::STRQroX:
874
49
  case AArch64::STRSroW:
875
49
  case AArch64::STRSroX:
876
49
  case AArch64::STRWroW:
877
49
  case AArch64::STRWroX:
878
49
  case AArch64::STRXroW:
879
49
  case AArch64::STRXroX: {
880
49
    unsigned IsSigned = MI.getOperand(3).getImm();
881
49
    return !IsSigned;
882
49
  }
883
112
  }
884
112
}
885
886
12.0M
bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
887
12.0M
  unsigned Opc = MI.getOpcode();
888
12.0M
  switch (Opc) {
889
12.0M
    default:
890
12.0M
      return false;
891
12.0M
    case AArch64::SEH_StackAlloc:
892
986
    case AArch64::SEH_SaveFPLR:
893
986
    case AArch64::SEH_SaveFPLR_X:
894
986
    case AArch64::SEH_SaveReg:
895
986
    case AArch64::SEH_SaveReg_X:
896
986
    case AArch64::SEH_SaveRegP:
897
986
    case AArch64::SEH_SaveRegP_X:
898
986
    case AArch64::SEH_SaveFReg:
899
986
    case AArch64::SEH_SaveFReg_X:
900
986
    case AArch64::SEH_SaveFRegP:
901
986
    case AArch64::SEH_SaveFRegP_X:
902
986
    case AArch64::SEH_SetFP:
903
986
    case AArch64::SEH_AddFP:
904
986
    case AArch64::SEH_Nop:
905
986
    case AArch64::SEH_PrologEnd:
906
986
    case AArch64::SEH_EpilogStart:
907
986
    case AArch64::SEH_EpilogEnd:
908
986
      return true;
909
12.0M
  }
910
12.0M
}
911
912
bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
913
                                             unsigned &SrcReg, unsigned &DstReg,
914
17.8M
                                             unsigned &SubIdx) const {
915
17.8M
  switch (MI.getOpcode()) {
916
17.8M
  default:
917
17.6M
    return false;
918
17.8M
  case AArch64::SBFMXri: // aka sxtw
919
180k
  case AArch64::UBFMXri: // aka uxtw
920
180k
    // Check for the 32 -> 64 bit extension case, these instructions can do
921
180k
    // much more.
922
180k
    if (MI.getOperand(2).getImm() != 0 || 
MI.getOperand(3).getImm() != 31105k
)
923
74.3k
      return false;
924
105k
    // This is a signed or unsigned 32 -> 64 bit extension.
925
105k
    SrcReg = MI.getOperand(1).getReg();
926
105k
    DstReg = MI.getOperand(0).getReg();
927
105k
    SubIdx = AArch64::sub_32;
928
105k
    return true;
929
17.8M
  }
930
17.8M
}
931
932
bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
933
14.1M
    const MachineInstr &MIa, const MachineInstr &MIb, AliasAnalysis *AA) const {
934
14.1M
  const TargetRegisterInfo *TRI = &getRegisterInfo();
935
14.1M
  const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
936
14.1M
  int64_t OffsetA = 0, OffsetB = 0;
937
14.1M
  unsigned WidthA = 0, WidthB = 0;
938
14.1M
939
14.1M
  assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
940
14.1M
  assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
941
14.1M
942
14.1M
  if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
943
14.1M
      
MIa.hasOrderedMemoryRef()14.1M
||
MIb.hasOrderedMemoryRef()14.1M
)
944
10.0k
    return false;
945
14.1M
946
14.1M
  // Retrieve the base, offset from the base and width. Width
947
14.1M
  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
948
14.1M
  // base are identical, and the offset of a lower memory access +
949
14.1M
  // the width doesn't overlap the offset of a higher memory access,
950
14.1M
  // then the memory accesses are different.
951
14.1M
  if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
952
14.1M
      
getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)13.3M
) {
953
13.2M
    if (BaseOpA->isIdenticalTo(*BaseOpB)) {
954
10.8M
      int LowOffset = OffsetA < OffsetB ? 
OffsetA10.4M
:
OffsetB470k
;
955
10.8M
      int HighOffset = OffsetA < OffsetB ? 
OffsetB10.4M
:
OffsetA470k
;
956
10.8M
      int LowWidth = (LowOffset == OffsetA) ? 
WidthA10.5M
:
WidthB367k
;
957
10.8M
      if (LowOffset + LowWidth <= HighOffset)
958
10.7M
        return true;
959
3.38M
    }
960
13.2M
  }
961
3.38M
  return false;
962
3.38M
}
963
964
bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
965
                                            const MachineBasicBlock *MBB,
966
16.2M
                                            const MachineFunction &MF) const {
967
16.2M
  if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
968
4.71M
    return true;
969
11.5M
  switch (MI.getOpcode()) {
970
11.5M
  case AArch64::HINT:
971
31
    // CSDB hints are scheduling barriers.
972
31
    if (MI.getOperand(0).getImm() == 0x14)
973
17
      return true;
974
14
    break;
975
28
  case AArch64::DSB:
976
28
  case AArch64::ISB:
977
28
    // DSB and ISB also are scheduling barriers.
978
28
    return true;
979
11.5M
  default:;
980
11.5M
  }
981
11.5M
  
return isSEHInstruction(MI)11.5M
;
982
11.5M
}
983
984
/// analyzeCompare - For a comparison instruction, return the source registers
985
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
986
/// Return true if the comparison instruction can be analyzed.
987
bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
988
                                      unsigned &SrcReg2, int &CmpMask,
989
766k
                                      int &CmpValue) const {
990
766k
  // The first operand can be a frame index where we'd normally expect a
991
766k
  // register.
992
766k
  assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
993
766k
  if (!MI.getOperand(1).isReg())
994
1
    return false;
995
766k
996
766k
  switch (MI.getOpcode()) {
997
766k
  default:
998
0
    break;
999
766k
  case AArch64::SUBSWrr:
1000
411k
  case AArch64::SUBSWrs:
1001
411k
  case AArch64::SUBSWrx:
1002
411k
  case AArch64::SUBSXrr:
1003
411k
  case AArch64::SUBSXrs:
1004
411k
  case AArch64::SUBSXrx:
1005
411k
  case AArch64::ADDSWrr:
1006
411k
  case AArch64::ADDSWrs:
1007
411k
  case AArch64::ADDSWrx:
1008
411k
  case AArch64::ADDSXrr:
1009
411k
  case AArch64::ADDSXrs:
1010
411k
  case AArch64::ADDSXrx:
1011
411k
    // Replace SUBSWrr with SUBWrr if NZCV is not used.
1012
411k
    SrcReg = MI.getOperand(1).getReg();
1013
411k
    SrcReg2 = MI.getOperand(2).getReg();
1014
411k
    CmpMask = ~0;
1015
411k
    CmpValue = 0;
1016
411k
    return true;
1017
411k
  case AArch64::SUBSWri:
1018
327k
  case AArch64::ADDSWri:
1019
327k
  case AArch64::SUBSXri:
1020
327k
  case AArch64::ADDSXri:
1021
327k
    SrcReg = MI.getOperand(1).getReg();
1022
327k
    SrcReg2 = 0;
1023
327k
    CmpMask = ~0;
1024
327k
    // FIXME: In order to convert CmpValue to 0 or 1
1025
327k
    CmpValue = MI.getOperand(2).getImm() != 0;
1026
327k
    return true;
1027
327k
  case AArch64::ANDSWri:
1028
27.4k
  case AArch64::ANDSXri:
1029
27.4k
    // ANDS does not use the same encoding scheme as the others xxxS
1030
27.4k
    // instructions.
1031
27.4k
    SrcReg = MI.getOperand(1).getReg();
1032
27.4k
    SrcReg2 = 0;
1033
27.4k
    CmpMask = ~0;
1034
27.4k
    // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1035
27.4k
    // while the type of CmpValue is int. When converting uint64_t to int,
1036
27.4k
    // the high 32 bits of uint64_t will be lost.
1037
27.4k
    // In fact it causes a bug in spec2006-483.xalancbmk
1038
27.4k
    // CmpValue is only used to compare with zero in OptimizeCompareInstr
1039
27.4k
    CmpValue = AArch64_AM::decodeLogicalImmediate(
1040
27.4k
                   MI.getOperand(2).getImm(),
1041
27.4k
                   MI.getOpcode() == AArch64::ANDSWri ? 
3218.5k
:
648.93k
) != 0;
1042
27.4k
    return true;
1043
0
  }
1044
0
1045
0
  return false;
1046
0
}
1047
1048
42.4k
static bool UpdateOperandRegClass(MachineInstr &Instr) {
1049
42.4k
  MachineBasicBlock *MBB = Instr.getParent();
1050
42.4k
  assert(MBB && "Can't get MachineBasicBlock here");
1051
42.4k
  MachineFunction *MF = MBB->getParent();
1052
42.4k
  assert(MF && "Can't get MachineFunction here");
1053
42.4k
  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1054
42.4k
  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1055
42.4k
  MachineRegisterInfo *MRI = &MF->getRegInfo();
1056
42.4k
1057
189k
  for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1058
147k
       ++OpIdx) {
1059
147k
    MachineOperand &MO = Instr.getOperand(OpIdx);
1060
147k
    const TargetRegisterClass *OpRegCstraints =
1061
147k
        Instr.getRegClassConstraint(OpIdx, TII, TRI);
1062
147k
1063
147k
    // If there's no constraint, there's nothing to do.
1064
147k
    if (!OpRegCstraints)
1065
37.1k
      continue;
1066
110k
    // If the operand is a frame index, there's nothing to do here.
1067
110k
    // A frame index operand will resolve correctly during PEI.
1068
110k
    if (MO.isFI())
1069
2
      continue;
1070
110k
1071
110k
    assert(MO.isReg() &&
1072
110k
           "Operand has register constraints without being a register!");
1073
110k
1074
110k
    unsigned Reg = MO.getReg();
1075
110k
    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
1076
0
      if (!OpRegCstraints->contains(Reg))
1077
0
        return false;
1078
110k
    } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1079
110k
               
!MRI->constrainRegClass(Reg, OpRegCstraints)15.9k
)
1080
0
      return false;
1081
110k
  }
1082
42.4k
1083
42.4k
  return true;
1084
42.4k
}
1085
1086
/// Return the opcode that does not set flags when possible - otherwise
1087
/// return the original opcode. The caller is responsible to do the actual
1088
/// substitution and legality checking.
1089
100k
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1090
100k
  // Don't convert all compare instructions, because for some the zero register
1091
100k
  // encoding becomes the sp register.
1092
100k
  bool MIDefinesZeroReg = false;
1093
100k
  if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1094
0
    MIDefinesZeroReg = true;
1095
100k
1096
100k
  switch (MI.getOpcode()) {
1097
100k
  default:
1098
0
    return MI.getOpcode();
1099
100k
  case AArch64::ADDSWrr:
1100
0
    return AArch64::ADDWrr;
1101
100k
  case AArch64::ADDSWri:
1102
0
    return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1103
100k
  case AArch64::ADDSWrs:
1104
0
    return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1105
100k
  case AArch64::ADDSWrx:
1106
0
    return AArch64::ADDWrx;
1107
100k
  case AArch64::ADDSXrr:
1108
0
    return AArch64::ADDXrr;
1109
100k
  case AArch64::ADDSXri:
1110
0
    return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1111
100k
  case AArch64::ADDSXrs:
1112
0
    return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1113
100k
  case AArch64::ADDSXrx:
1114
0
    return AArch64::ADDXrx;
1115
100k
  case AArch64::SUBSWrr:
1116
21.7k
    return AArch64::SUBWrr;
1117
100k
  case AArch64::SUBSWri:
1118
21.3k
    return MIDefinesZeroReg ? 
AArch64::SUBSWri0
: AArch64::SUBWri;
1119
100k
  case AArch64::SUBSWrs:
1120
282
    return MIDefinesZeroReg ? 
AArch64::SUBSWrs0
: AArch64::SUBWrs;
1121
100k
  case AArch64::SUBSWrx:
1122
16
    return AArch64::SUBWrx;
1123
100k
  case AArch64::SUBSXrr:
1124
26.3k
    return AArch64::SUBXrr;
1125
100k
  case AArch64::SUBSXri:
1126
28.7k
    return MIDefinesZeroReg ? 
AArch64::SUBSXri0
: AArch64::SUBXri;
1127
100k
  case AArch64::SUBSXrs:
1128
1.96k
    return MIDefinesZeroReg ? 
AArch64::SUBSXrs0
: AArch64::SUBXrs;
1129
100k
  case AArch64::SUBSXrx:
1130
91
    return AArch64::SUBXrx;
1131
100k
  }
1132
100k
}
1133
1134
enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1135
1136
/// True when condition flags are accessed (either by writing or reading)
1137
/// on the instruction trace starting at From and ending at To.
1138
///
1139
/// Note: If From and To are from different blocks it's assumed CC are accessed
1140
///       on the path.
1141
static bool areCFlagsAccessedBetweenInstrs(
1142
    MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1143
62.2k
    const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1144
62.2k
  // Early exit if To is at the beginning of the BB.
1145
62.2k
  if (To == To->getParent()->begin())
1146
3.96k
    return true;
1147
58.2k
1148
58.2k
  // Check whether the instructions are in the same basic block
1149
58.2k
  // If not, assume the condition flags might get modified somewhere.
1150
58.2k
  if (To->getParent() != From->getParent())
1151
1.84k
    return true;
1152
56.4k
1153
56.4k
  // From must be above To.
1154
56.4k
  assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1155
56.4k
                      [From](MachineInstr &MI) {
1156
56.4k
                        return MI.getIterator() == From;
1157
56.4k
                      }) != To->getParent()->rend());
1158
56.4k
1159
56.4k
  // We iterate backward starting \p To until we hit \p From.
1160
68.3k
  for (--To; To != From; 
--To11.9k
) {
1161
15.0k
    const MachineInstr &Instr = *To;
1162
15.0k
1163
15.0k
    if (((AccessToCheck & AK_Write) &&
1164
15.0k
         Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1165
15.0k
        
(11.9k
(AccessToCheck & AK_Read)11.9k
&&
Instr.readsRegister(AArch64::NZCV, TRI)1.08k
))
1166
3.08k
      return true;
1167
15.0k
  }
1168
56.4k
  
return false53.3k
;
1169
56.4k
}
1170
1171
/// Try to optimize a compare instruction. A compare instruction is an
1172
/// instruction which produces AArch64::NZCV. It can be truly compare
1173
/// instruction
1174
/// when there are no uses of its destination register.
1175
///
1176
/// The following steps are tried in order:
1177
/// 1. Convert CmpInstr into an unconditional version.
1178
/// 2. Remove CmpInstr if above there is an instruction producing a needed
1179
///    condition code or an instruction which can be converted into such an
1180
///    instruction.
1181
///    Only comparison with zero is supported.
1182
bool AArch64InstrInfo::optimizeCompareInstr(
1183
    MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
1184
766k
    int CmpValue, const MachineRegisterInfo *MRI) const {
1185
766k
  assert(CmpInstr.getParent());
1186
766k
  assert(MRI);
1187
766k
1188
766k
  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1189
766k
  int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1190
766k
  if (DeadNZCVIdx != -1) {
1191
42.3k
    if (CmpInstr.definesRegister(AArch64::WZR) ||
1192
42.3k
        CmpInstr.definesRegister(AArch64::XZR)) {
1193
0
      CmpInstr.eraseFromParent();
1194
0
      return true;
1195
0
    }
1196
42.3k
    unsigned Opc = CmpInstr.getOpcode();
1197
42.3k
    unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1198
42.3k
    if (NewOpc == Opc)
1199
0
      return false;
1200
42.3k
    const MCInstrDesc &MCID = get(NewOpc);
1201
42.3k
    CmpInstr.setDesc(MCID);
1202
42.3k
    CmpInstr.RemoveOperand(DeadNZCVIdx);
1203
42.3k
    bool succeeded = UpdateOperandRegClass(CmpInstr);
1204
42.3k
    (void)succeeded;
1205
42.3k
    assert(succeeded && "Some operands reg class are incompatible!");
1206
42.3k
    return true;
1207
42.3k
  }
1208
723k
1209
723k
  // Continue only if we have a "ri" where immediate is zero.
1210
723k
  // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1211
723k
  // function.
1212
723k
  assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1213
723k
  if (CmpValue != 0 || 
SrcReg2 != 0501k
)
1214
609k
    return false;
1215
114k
1216
114k
  // CmpInstr is a Compare instruction if destination register is not used.
1217
114k
  if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1218
72.1k
    return false;
1219
42.3k
1220
42.3k
  return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1221
42.3k
}
1222
1223
/// Get opcode of S version of Instr.
1224
/// If Instr is S version its opcode is returned.
1225
/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1226
/// or we are not interested in it.
1227
43.4k
static unsigned sForm(MachineInstr &Instr) {
1228
43.4k
  switch (Instr.getOpcode()) {
1229
43.4k
  default:
1230
40.9k
    return AArch64::INSTRUCTION_LIST_END;
1231
43.4k
1232
43.4k
  case AArch64::ADDSWrr:
1233
437
  case AArch64::ADDSWri:
1234
437
  case AArch64::ADDSXrr:
1235
437
  case AArch64::ADDSXri:
1236
437
  case AArch64::SUBSWrr:
1237
437
  case AArch64::SUBSWri:
1238
437
  case AArch64::SUBSXrr:
1239
437
  case AArch64::SUBSXri:
1240
437
    return Instr.getOpcode();
1241
437
1242
437
  case AArch64::ADDWrr:
1243
139
    return AArch64::ADDSWrr;
1244
437
  case AArch64::ADDWri:
1245
108
    return AArch64::ADDSWri;
1246
540
  case AArch64::ADDXrr:
1247
540
    return AArch64::ADDSXrr;
1248
437
  case AArch64::ADDXri:
1249
133
    return AArch64::ADDSXri;
1250
437
  case AArch64::ADCWr:
1251
0
    return AArch64::ADCSWr;
1252
437
  case AArch64::ADCXr:
1253
0
    return AArch64::ADCSXr;
1254
437
  case AArch64::SUBWrr:
1255
240
    return AArch64::SUBSWrr;
1256
437
  case AArch64::SUBWri:
1257
260
    return AArch64::SUBSWri;
1258
437
  case AArch64::SUBXrr:
1259
63
    return AArch64::SUBSXrr;
1260
437
  case AArch64::SUBXri:
1261
233
    return AArch64::SUBSXri;
1262
437
  case AArch64::SBCWr:
1263
0
    return AArch64::SBCSWr;
1264
437
  case AArch64::SBCXr:
1265
0
    return AArch64::SBCSXr;
1266
437
  case AArch64::ANDWri:
1267
19
    return AArch64::ANDSWri;
1268
437
  case AArch64::ANDXri:
1269
250
    return AArch64::ANDSXri;
1270
43.4k
  }
1271
43.4k
}
1272
1273
/// Check if AArch64::NZCV should be alive in successors of MBB.
1274
1.05k
static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1275
1.05k
  for (auto *BB : MBB->successors())
1276
1.86k
    if (BB->isLiveIn(AArch64::NZCV))
1277
2
      return true;
1278
1.05k
  
return false1.05k
;
1279
1.05k
}
1280
1281
namespace {
1282
1283
struct UsedNZCV {
1284
  bool N = false;
1285
  bool Z = false;
1286
  bool C = false;
1287
  bool V = false;
1288
1289
1.89k
  UsedNZCV() = default;
1290
1291
904
  UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1292
904
    this->N |= UsedFlags.N;
1293
904
    this->Z |= UsedFlags.Z;
1294
904
    this->C |= UsedFlags.C;
1295
904
    this->V |= UsedFlags.V;
1296
904
    return *this;
1297
904
  }
1298
};
1299
1300
} // end anonymous namespace
1301
1302
/// Find a condition code used by the instruction.
1303
/// Returns AArch64CC::Invalid if either the instruction does not use condition
1304
/// codes or we don't optimize CmpInstr in the presence of such instructions.
1305
1.01k
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1306
1.01k
  switch (Instr.getOpcode()) {
1307
1.01k
  default:
1308
109
    return AArch64CC::Invalid;
1309
1.01k
1310
1.01k
  case AArch64::Bcc: {
1311
225
    int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1312
225
    assert(Idx >= 2);
1313
225
    return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1314
1.01k
  }
1315
1.01k
1316
1.01k
  case AArch64::CSINVWr:
1317
679
  case AArch64::CSINVXr:
1318
679
  case AArch64::CSINCWr:
1319
679
  case AArch64::CSINCXr:
1320
679
  case AArch64::CSELWr:
1321
679
  case AArch64::CSELXr:
1322
679
  case AArch64::CSNEGWr:
1323
679
  case AArch64::CSNEGXr:
1324
679
  case AArch64::FCSELSrrr:
1325
679
  case AArch64::FCSELDrrr: {
1326
679
    int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1327
679
    assert(Idx >= 1);
1328
679
    return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1329
679
  }
1330
1.01k
  }
1331
1.01k
}
1332
1333
904
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1334
904
  assert(CC != AArch64CC::Invalid);
1335
904
  UsedNZCV UsedFlags;
1336
904
  switch (CC) {
1337
904
  default:
1338
0
    break;
1339
904
1340
904
  case AArch64CC::EQ: // Z set
1341
64
  case AArch64CC::NE: // Z clear
1342
64
    UsedFlags.Z = true;
1343
64
    break;
1344
64
1345
64
  case AArch64CC::HI: // Z clear and C set
1346
1
  case AArch64CC::LS: // Z set   or  C clear
1347
1
    UsedFlags.Z = true;
1348
1
    LLVM_FALLTHROUGH;
1349
1
  case AArch64CC::HS: // C set
1350
1
  case AArch64CC::LO: // C clear
1351
1
    UsedFlags.C = true;
1352
1
    break;
1353
1
1354
68
  case AArch64CC::MI: // N set
1355
68
  case AArch64CC::PL: // N clear
1356
68
    UsedFlags.N = true;
1357
68
    break;
1358
68
1359
68
  case AArch64CC::VS: // V set
1360
0
  case AArch64CC::VC: // V clear
1361
0
    UsedFlags.V = true;
1362
0
    break;
1363
0
1364
553
  case AArch64CC::GT: // Z clear, N and V the same
1365
553
  case AArch64CC::LE: // Z set,   N and V differ
1366
553
    UsedFlags.Z = true;
1367
553
    LLVM_FALLTHROUGH;
1368
771
  case AArch64CC::GE: // N and V the same
1369
771
  case AArch64CC::LT: // N and V differ
1370
771
    UsedFlags.N = true;
1371
771
    UsedFlags.V = true;
1372
771
    break;
1373
904
  }
1374
904
  return UsedFlags;
1375
904
}
1376
1377
1.36k
static bool isADDSRegImm(unsigned Opcode) {
1378
1.36k
  return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1379
1.36k
}
1380
1381
1.36k
static bool isSUBSRegImm(unsigned Opcode) {
1382
1.36k
  return Opcode == AArch64::SUBSWri || 
Opcode == AArch64::SUBSXri909
;
1383
1.36k
}
1384
1385
/// Check if CmpInstr can be substituted by MI.
1386
///
1387
/// CmpInstr can be substituted:
1388
/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1389
/// - and, MI and CmpInstr are from the same MachineBB
1390
/// - and, condition flags are not alive in successors of the CmpInstr parent
1391
/// - and, if MI opcode is the S form there must be no defs of flags between
1392
///        MI and CmpInstr
1393
///        or if MI opcode is not the S form there must be neither defs of flags
1394
///        nor uses of flags between MI and CmpInstr.
1395
/// - and  C/V flags are not used after CmpInstr
1396
static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1397
1.36k
                                       const TargetRegisterInfo *TRI) {
1398
1.36k
  assert(MI);
1399
1.36k
  assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1400
1.36k
  assert(CmpInstr);
1401
1.36k
1402
1.36k
  const unsigned CmpOpcode = CmpInstr->getOpcode();
1403
1.36k
  if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1404
0
    return false;
1405
1.36k
1406
1.36k
  if (MI->getParent() != CmpInstr->getParent())
1407
306
    return false;
1408
1.05k
1409
1.05k
  if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1410
2
    return false;
1411
1.05k
1412
1.05k
  AccessKind AccessToCheck = AK_Write;
1413
1.05k
  if (sForm(*MI) != MI->getOpcode())
1414
910
    AccessToCheck = AK_All;
1415
1.05k
  if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1416
71
    return false;
1417
986
1418
986
  UsedNZCV NZCVUsedAfterCmp;
1419
986
  for (auto I = std::next(CmpInstr->getIterator()),
1420
986
            E = CmpInstr->getParent()->instr_end();
1421
3.39k
       I != E; 
++I2.40k
) {
1422
2.88k
    const MachineInstr &Instr = *I;
1423
2.88k
    if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1424
1.01k
      AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1425
1.01k
      if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1426
109
        return false;
1427
904
      NZCVUsedAfterCmp |= getUsedNZCV(CC);
1428
904
    }
1429
2.88k
1430
2.88k
    
if (2.77k
Instr.modifiesRegister(AArch64::NZCV, TRI)2.77k
)
1431
366
      break;
1432
2.77k
  }
1433
986
1434
986
  
return 877
!NZCVUsedAfterCmp.C877
&&
!NZCVUsedAfterCmp.V876
;
1435
986
}
1436
1437
/// Substitute an instruction comparing to zero with another instruction
1438
/// which produces needed condition flags.
1439
///
1440
/// Return true on success.
1441
bool AArch64InstrInfo::substituteCmpToZero(
1442
    MachineInstr &CmpInstr, unsigned SrcReg,
1443
42.3k
    const MachineRegisterInfo *MRI) const {
1444
42.3k
  assert(MRI);
1445
42.3k
  // Get the unique definition of SrcReg.
1446
42.3k
  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1447
42.3k
  if (!MI)
1448
0
    return false;
1449
42.3k
1450
42.3k
  const TargetRegisterInfo *TRI = &getRegisterInfo();
1451
42.3k
1452
42.3k
  unsigned NewOpc = sForm(*MI);
1453
42.3k
  if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1454
40.9k
    return false;
1455
1.36k
1456
1.36k
  if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1457
1.24k
    return false;
1458
125
1459
125
  // Update the instruction to set NZCV.
1460
125
  MI->setDesc(get(NewOpc));
1461
125
  CmpInstr.eraseFromParent();
1462
125
  bool succeeded = UpdateOperandRegClass(*MI);
1463
125
  (void)succeeded;
1464
125
  assert(succeeded && "Some operands reg class are incompatible!");
1465
125
  MI->addRegisterDefined(AArch64::NZCV, TRI);
1466
125
  return true;
1467
125
}
1468
1469
2.31M
bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1470
2.31M
  if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1471
2.31M
      
MI.getOpcode() != AArch64::CATCHRET2.30M
)
1472
2.30M
    return false;
1473
3.69k
1474
3.69k
  MachineBasicBlock &MBB = *MI.getParent();
1475
3.69k
  DebugLoc DL = MI.getDebugLoc();
1476
3.69k
1477
3.69k
  if (MI.getOpcode() == AArch64::CATCHRET) {
1478
7
    // Skip to the first instruction before the epilog.
1479
7
    const TargetInstrInfo *TII =
1480
7
      MBB.getParent()->getSubtarget().getInstrInfo();
1481
7
    MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1482
7
    auto MBBI = MachineBasicBlock::iterator(MI);
1483
7
    MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1484
51
    while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1485
51
           
FirstEpilogSEH != MBB.begin()44
)
1486
44
      FirstEpilogSEH = std::prev(FirstEpilogSEH);
1487
7
    if (FirstEpilogSEH != MBB.begin())
1488
7
      FirstEpilogSEH = std::next(FirstEpilogSEH);
1489
7
    BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1490
7
        .addReg(AArch64::X0, RegState::Define)
1491
7
        .addMBB(TargetMBB);
1492
7
    BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1493
7
        .addReg(AArch64::X0, RegState::Define)
1494
7
        .addReg(AArch64::X0)
1495
7
        .addMBB(TargetMBB)
1496
7
        .addImm(0);
1497
7
    return true;
1498
7
  }
1499
3.68k
1500
3.68k
  unsigned Reg = MI.getOperand(0).getReg();
1501
3.68k
  const GlobalValue *GV =
1502
3.68k
      cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1503
3.68k
  const TargetMachine &TM = MBB.getParent()->getTarget();
1504
3.68k
  unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1505
3.68k
  const unsigned char MO_NC = AArch64II::MO_NC;
1506
3.68k
1507
3.68k
  if ((OpFlags & AArch64II::MO_GOT) != 0) {
1508
3.67k
    BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1509
3.67k
        .addGlobalAddress(GV, 0, OpFlags);
1510
3.67k
    BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1511
3.67k
        .addReg(Reg, RegState::Kill)
1512
3.67k
        .addImm(0)
1513
3.67k
        .addMemOperand(*MI.memoperands_begin());
1514
3.67k
  } else 
if (17
TM.getCodeModel() == CodeModel::Large17
) {
1515
4
    BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1516
4
        .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1517
4
        .addImm(0);
1518
4
    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1519
4
        .addReg(Reg, RegState::Kill)
1520
4
        .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1521
4
        .addImm(16);
1522
4
    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1523
4
        .addReg(Reg, RegState::Kill)
1524
4
        .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1525
4
        .addImm(32);
1526
4
    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1527
4
        .addReg(Reg, RegState::Kill)
1528
4
        .addGlobalAddress(GV, 0, AArch64II::MO_G3)
1529
4
        .addImm(48);
1530
4
    BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1531
4
        .addReg(Reg, RegState::Kill)
1532
4
        .addImm(0)
1533
4
        .addMemOperand(*MI.memoperands_begin());
1534
13
  } else if (TM.getCodeModel() == CodeModel::Tiny) {
1535
0
    BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
1536
0
        .addGlobalAddress(GV, 0, OpFlags);
1537
13
  } else {
1538
13
    BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1539
13
        .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1540
13
    unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1541
13
    BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1542
13
        .addReg(Reg, RegState::Kill)
1543
13
        .addGlobalAddress(GV, 0, LoFlags)
1544
13
        .addMemOperand(*MI.memoperands_begin());
1545
13
  }
1546
3.68k
1547
3.68k
  MBB.erase(MI);
1548
3.68k
1549
3.68k
  return true;
1550
3.68k
}
1551
1552
// Return true if this instruction simply sets its single destination register
1553
// to zero. This is equivalent to a register rename of the zero-register.
1554
269k
bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
1555
269k
  switch (MI.getOpcode()) {
1556
269k
  default:
1557
0
    break;
1558
269k
  case AArch64::MOVZWi:
1559
26
  case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1560
26
    if (MI.getOperand(1).isImm() && 
MI.getOperand(1).getImm() == 00
) {
1561
0
      assert(MI.getDesc().getNumOperands() == 3 &&
1562
0
             MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1563
0
      return true;
1564
0
    }
1565
26
    break;
1566
115k
  case AArch64::ANDWri: // and Rd, Rzr, #imm
1567
115k
    return MI.getOperand(1).getReg() == AArch64::WZR;
1568
153k
  case AArch64::ANDXri:
1569
153k
    return MI.getOperand(1).getReg() == AArch64::XZR;
1570
26
  case TargetOpcode::COPY:
1571
0
    return MI.getOperand(1).getReg() == AArch64::WZR;
1572
26
  }
1573
26
  return false;
1574
26
}
1575
1576
// Return true if this instruction simply renames a general register without
1577
// modifying bits.
1578
33.1M
bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
1579
33.1M
  switch (MI.getOpcode()) {
1580
33.1M
  default:
1581
5.43M
    break;
1582
33.1M
  case TargetOpcode::COPY: {
1583
27.6M
    // GPR32 copies will by lowered to ORRXrs
1584
27.6M
    unsigned DstReg = MI.getOperand(0).getReg();
1585
27.6M
    return (AArch64::GPR32RegClass.contains(DstReg) ||
1586
27.6M
            
AArch64::GPR64RegClass.contains(DstReg)26.3M
);
1587
33.1M
  }
1588
33.1M
  case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1589
0
    if (MI.getOperand(1).getReg() == AArch64::XZR) {
1590
0
      assert(MI.getDesc().getNumOperands() == 4 &&
1591
0
             MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1592
0
      return true;
1593
0
    }
1594
0
    break;
1595
0
  case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1596
0
    if (MI.getOperand(2).getImm() == 0) {
1597
0
      assert(MI.getDesc().getNumOperands() == 4 &&
1598
0
             MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1599
0
      return true;
1600
0
    }
1601
0
    break;
1602
5.43M
  }
1603
5.43M
  return false;
1604
5.43M
}
1605
1606
// Return true if this instruction simply renames a general register without
1607
// modifying bits.
1608
12.8M
bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
1609
12.8M
  switch (MI.getOpcode()) {
1610
12.8M
  default:
1611
5.43M
    break;
1612
12.8M
  case TargetOpcode::COPY: {
1613
7.40M
    // FPR64 copies will by lowered to ORR.16b
1614
7.40M
    unsigned DstReg = MI.getOperand(0).getReg();
1615
7.40M
    return (AArch64::FPR64RegClass.contains(DstReg) ||
1616
7.40M
            
AArch64::FPR128RegClass.contains(DstReg)7.20M
);
1617
12.8M
  }
1618
12.8M
  case AArch64::ORRv16i8:
1619
4.80k
    if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1620
0
      assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1621
0
             "invalid ORRv16i8 operands");
1622
0
      return true;
1623
0
    }
1624
4.80k
    break;
1625
5.44M
  }
1626
5.44M
  return false;
1627
5.44M
}
1628
1629
unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1630
12.2M
                                               int &FrameIndex) const {
1631
12.2M
  switch (MI.getOpcode()) {
1632
12.2M
  default:
1633
11.2M
    break;
1634
12.2M
  case AArch64::LDRWui:
1635
919k
  case AArch64::LDRXui:
1636
919k
  case AArch64::LDRBui:
1637
919k
  case AArch64::LDRHui:
1638
919k
  case AArch64::LDRSui:
1639
919k
  case AArch64::LDRDui:
1640
919k
  case AArch64::LDRQui:
1641
919k
    if (MI.getOperand(0).getSubReg() == 0 && 
MI.getOperand(1).isFI()902k
&&
1642
919k
        
MI.getOperand(2).isImm()250k
&&
MI.getOperand(2).getImm() == 0250k
) {
1643
235k
      FrameIndex = MI.getOperand(1).getIndex();
1644
235k
      return MI.getOperand(0).getReg();
1645
235k
    }
1646
684k
    break;
1647
11.9M
  }
1648
11.9M
1649
11.9M
  return 0;
1650
11.9M
}
1651
1652
unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1653
5.30M
                                              int &FrameIndex) const {
1654
5.30M
  switch (MI.getOpcode()) {
1655
5.30M
  default:
1656
4.97M
    break;
1657
5.30M
  case AArch64::STRWui:
1658
330k
  case AArch64::STRXui:
1659
330k
  case AArch64::STRBui:
1660
330k
  case AArch64::STRHui:
1661
330k
  case AArch64::STRSui:
1662
330k
  case AArch64::STRDui:
1663
330k
  case AArch64::STRQui:
1664
330k
    if (MI.getOperand(0).getSubReg() == 0 && 
MI.getOperand(1).isFI()319k
&&
1665
330k
        
MI.getOperand(2).isImm()43.1k
&&
MI.getOperand(2).getImm() == 043.1k
) {
1666
34.1k
      FrameIndex = MI.getOperand(1).getIndex();
1667
34.1k
      return MI.getOperand(0).getReg();
1668
34.1k
    }
1669
296k
    break;
1670
5.27M
  }
1671
5.27M
  return 0;
1672
5.27M
}
1673
1674
/// Check all MachineMemOperands for a hint to suppress pairing.
1675
13.9M
bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
1676
13.9M
  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1677
8.27M
    return MMO->getFlags() & MOSuppressPair;
1678
8.27M
  });
1679
13.9M
}
1680
1681
/// Set a flag on the first MachineMemOperand to suppress pairing.
1682
3.57k
void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
1683
3.57k
  if (MI.memoperands_empty())
1684
0
    return;
1685
3.57k
  (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1686
3.57k
}
1687
1688
/// Check all MachineMemOperands for a hint that the load/store is strided.
1689
133
bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
1690
133
  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1691
43
    return MMO->getFlags() & MOStridedAccess;
1692
43
  });
1693
133
}
1694
1695
19.1M
bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
1696
19.1M
  switch (Opc) {
1697
19.1M
  default:
1698
18.3M
    return false;
1699
19.1M
  case AArch64::STURSi:
1700
817k
  case AArch64::STURDi:
1701
817k
  case AArch64::STURQi:
1702
817k
  case AArch64::STURBBi:
1703
817k
  case AArch64::STURHHi:
1704
817k
  case AArch64::STURWi:
1705
817k
  case AArch64::STURXi:
1706
817k
  case AArch64::LDURSi:
1707
817k
  case AArch64::LDURDi:
1708
817k
  case AArch64::LDURQi:
1709
817k
  case AArch64::LDURWi:
1710
817k
  case AArch64::LDURXi:
1711
817k
  case AArch64::LDURSWi:
1712
817k
  case AArch64::LDURHHi:
1713
817k
  case AArch64::LDURBBi:
1714
817k
  case AArch64::LDURSBWi:
1715
817k
  case AArch64::LDURSHWi:
1716
817k
    return true;
1717
19.1M
  }
1718
19.1M
}
1719
1720
1.45M
Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
1721
1.45M
  switch (Opc) {
1722
1.45M
  
default: return {}5.01k
;
1723
1.45M
  
case AArch64::PRFMui: return AArch64::PRFUMi24
;
1724
1.45M
  
case AArch64::LDRXui: return AArch64::LDURXi386k
;
1725
1.45M
  
case AArch64::LDRWui: return AArch64::LDURWi119k
;
1726
1.45M
  
case AArch64::LDRBui: return AArch64::LDURBi0
;
1727
1.45M
  
case AArch64::LDRHui: return AArch64::LDURHi32
;
1728
1.45M
  
case AArch64::LDRSui: return AArch64::LDURSi18.7k
;
1729
1.45M
  
case AArch64::LDRDui: return AArch64::LDURDi20.1k
;
1730
1.45M
  
case AArch64::LDRQui: return AArch64::LDURQi66.5k
;
1731
1.45M
  
case AArch64::LDRBBui: return AArch64::LDURBBi12.4k
;
1732
1.45M
  
case AArch64::LDRHHui: return AArch64::LDURHHi3.54k
;
1733
1.45M
  
case AArch64::LDRSBXui: return AArch64::LDURSBXi52
;
1734
1.45M
  
case AArch64::LDRSBWui: return AArch64::LDURSBWi12.6k
;
1735
1.45M
  
case AArch64::LDRSHXui: return AArch64::LDURSHXi40
;
1736
1.45M
  
case AArch64::LDRSHWui: return AArch64::LDURSHWi1.03k
;
1737
1.45M
  
case AArch64::LDRSWui: return AArch64::LDURSWi7.84k
;
1738
1.45M
  
case AArch64::STRXui: return AArch64::STURXi210k
;
1739
1.45M
  
case AArch64::STRWui: return AArch64::STURWi211k
;
1740
1.45M
  
case AArch64::STRBui: return AArch64::STURBi0
;
1741
1.45M
  
case AArch64::STRHui: return AArch64::STURHi24
;
1742
1.45M
  
case AArch64::STRSui: return AArch64::STURSi13.1k
;
1743
1.45M
  
case AArch64::STRDui: return AArch64::STURDi51.6k
;
1744
1.45M
  
case AArch64::STRQui: return AArch64::STURQi66.3k
;
1745
1.45M
  
case AArch64::STRBBui: return AArch64::STURBBi235k
;
1746
1.45M
  
case AArch64::STRHHui: return AArch64::STURHHi9.16k
;
1747
1.45M
  }
1748
1.45M
}
1749
1750
1.45M
unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
1751
1.45M
  switch (Opc) {
1752
1.45M
  default:
1753
1.45M
    return 2;
1754
1.45M
  case AArch64::LDPXi:
1755
24
  case AArch64::LDPDi:
1756
24
  case AArch64::STPXi:
1757
24
  case AArch64::STPDi:
1758
24
  case AArch64::LDNPXi:
1759
24
  case AArch64::LDNPDi:
1760
24
  case AArch64::STNPXi:
1761
24
  case AArch64::STNPDi:
1762
24
  case AArch64::LDPQi:
1763
24
  case AArch64::STPQi:
1764
24
  case AArch64::LDNPQi:
1765
24
  case AArch64::STNPQi:
1766
24
  case AArch64::LDPWi:
1767
24
  case AArch64::LDPSi:
1768
24
  case AArch64::STPWi:
1769
24
  case AArch64::STPSi:
1770
24
  case AArch64::LDNPWi:
1771
24
  case AArch64::LDNPSi:
1772
24
  case AArch64::STNPWi:
1773
24
  case AArch64::STNPSi:
1774
24
  case AArch64::LDG:
1775
24
  case AArch64::STGPi:
1776
24
    return 3;
1777
24
  case AArch64::ADDG:
1778
21
  case AArch64::STGOffset:
1779
21
    return 2;
1780
1.45M
  }
1781
1.45M
}
1782
1783
30.3M
bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
1784
30.3M
  switch (MI.getOpcode()) {
1785
30.3M
  default:
1786
25.8M
    return false;
1787
30.3M
  // Scaled instructions.
1788
30.3M
  case AArch64::STRSui:
1789
4.50M
  case AArch64::STRDui:
1790
4.50M
  case AArch64::STRQui:
1791
4.50M
  case AArch64::STRXui:
1792
4.50M
  case AArch64::STRWui:
1793
4.50M
  case AArch64::LDRSui:
1794
4.50M
  case AArch64::LDRDui:
1795
4.50M
  case AArch64::LDRQui:
1796
4.50M
  case AArch64::LDRXui:
1797
4.50M
  case AArch64::LDRWui:
1798
4.50M
  case AArch64::LDRSWui:
1799
4.50M
  // Unscaled instructions.
1800
4.50M
  case AArch64::STURSi:
1801
4.50M
  case AArch64::STURDi:
1802
4.50M
  case AArch64::STURQi:
1803
4.50M
  case AArch64::STURWi:
1804
4.50M
  case AArch64::STURXi:
1805
4.50M
  case AArch64::LDURSi:
1806
4.50M
  case AArch64::LDURDi:
1807
4.50M
  case AArch64::LDURQi:
1808
4.50M
  case AArch64::LDURWi:
1809
4.50M
  case AArch64::LDURXi:
1810
4.50M
  case AArch64::LDURSWi:
1811
4.50M
    return true;
1812
30.3M
  }
1813
30.3M
}
1814
1815
unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
1816
42.5k
                                                   bool &Is64Bit) {
1817
42.5k
  switch (Opc) {
1818
42.5k
  default:
1819
0
    llvm_unreachable("Opcode has no flag setting equivalent!");
1820
42.5k
  // 32-bit cases:
1821
42.5k
  case AArch64::ADDWri:
1822
15
    Is64Bit = false;
1823
15
    return AArch64::ADDSWri;
1824
42.5k
  case AArch64::ADDWrr:
1825
6.42k
    Is64Bit = false;
1826
6.42k
    return AArch64::ADDSWrr;
1827
42.5k
  case AArch64::ADDWrs:
1828
0
    Is64Bit = false;
1829
0
    return AArch64::ADDSWrs;
1830
42.5k
  case AArch64::ADDWrx:
1831
0
    Is64Bit = false;
1832
0
    return AArch64::ADDSWrx;
1833
42.5k
  case AArch64::ANDWri:
1834
3.12k
    Is64Bit = false;
1835
3.12k
    return AArch64::ANDSWri;
1836
42.5k
  case AArch64::ANDWrr:
1837
6.24k
    Is64Bit = false;
1838
6.24k
    return AArch64::ANDSWrr;
1839
42.5k
  case AArch64::ANDWrs:
1840
0
    Is64Bit = false;
1841
0
    return AArch64::ANDSWrs;
1842
42.5k
  case AArch64::BICWrr:
1843
36
    Is64Bit = false;
1844
36
    return AArch64::BICSWrr;
1845
42.5k
  case AArch64::BICWrs:
1846
0
    Is64Bit = false;
1847
0
    return AArch64::BICSWrs;
1848
42.5k
  case AArch64::SUBWri:
1849
0
    Is64Bit = false;
1850
0
    return AArch64::SUBSWri;
1851
42.5k
  case AArch64::SUBWrr:
1852
0
    Is64Bit = false;
1853
0
    return AArch64::SUBSWrr;
1854
42.5k
  case AArch64::SUBWrs:
1855
0
    Is64Bit = false;
1856
0
    return AArch64::SUBSWrs;
1857
42.5k
  case AArch64::SUBWrx:
1858
0
    Is64Bit = false;
1859
0
    return AArch64::SUBSWrx;
1860
42.5k
  // 64-bit cases:
1861
42.5k
  case AArch64::ADDXri:
1862
118
    Is64Bit = true;
1863
118
    return AArch64::ADDSXri;
1864
42.5k
  case AArch64::ADDXrr:
1865
17.3k
    Is64Bit = true;
1866
17.3k
    return AArch64::ADDSXrr;
1867
42.5k
  case AArch64::ADDXrs:
1868
3
    Is64Bit = true;
1869
3
    return AArch64::ADDSXrs;
1870
42.5k
  case AArch64::ADDXrx:
1871
0
    Is64Bit = true;
1872
0
    return AArch64::ADDSXrx;
1873
42.5k
  case AArch64::ANDXri:
1874
5.93k
    Is64Bit = true;
1875
5.93k
    return AArch64::ANDSXri;
1876
42.5k
  case AArch64::ANDXrr:
1877
3.30k
    Is64Bit = true;
1878
3.30k
    return AArch64::ANDSXrr;
1879
42.5k
  case AArch64::ANDXrs:
1880
0
    Is64Bit = true;
1881
0
    return AArch64::ANDSXrs;
1882
42.5k
  case AArch64::BICXrr:
1883
9
    Is64Bit = true;
1884
9
    return AArch64::BICSXrr;
1885
42.5k
  case AArch64::BICXrs:
1886
0
    Is64Bit = true;
1887
0
    return AArch64::BICSXrs;
1888
42.5k
  case AArch64::SUBXri:
1889
0
    Is64Bit = true;
1890
0
    return AArch64::SUBSXri;
1891
42.5k
  case AArch64::SUBXrr:
1892
0
    Is64Bit = true;
1893
0
    return AArch64::SUBSXrr;
1894
42.5k
  case AArch64::SUBXrs:
1895
0
    Is64Bit = true;
1896
0
    return AArch64::SUBSXrs;
1897
42.5k
  case AArch64::SUBXrx:
1898
0
    Is64Bit = true;
1899
0
    return AArch64::SUBSXrx;
1900
42.5k
  }
1901
42.5k
}
1902
1903
// Is this a candidate for ld/st merging or pairing?  For example, we don't
1904
// touch volatiles or load/stores that have a hint to avoid pair formation.
1905
4.40M
bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
1906
4.40M
  // If this is a volatile load/store, don't mess with it.
1907
4.40M
  if (MI.hasOrderedMemoryRef())
1908
341k
    return false;
1909
4.05M
1910
4.05M
  // Make sure this is a reg/fi+imm (as opposed to an address reloc).
1911
4.05M
  assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) &&
1912
4.05M
         "Expected a reg or frame index operand.");
1913
4.05M
  if (!MI.getOperand(2).isImm())
1914
79.4k
    return false;
1915
3.98M
1916
3.98M
  // Can't merge/pair if the instruction modifies the base register.
1917
3.98M
  // e.g., ldr x0, [x0]
1918
3.98M
  // This case will never occur with an FI base.
1919
3.98M
  if (MI.getOperand(1).isReg()) {
1920
3.93M
    unsigned BaseReg = MI.getOperand(1).getReg();
1921
3.93M
    const TargetRegisterInfo *TRI = &getRegisterInfo();
1922
3.93M
    if (MI.modifiesRegister(BaseReg, TRI))
1923
202k
      return false;
1924
3.77M
  }
1925
3.77M
1926
3.77M
  // Check if this load/store has a hint to avoid pair formation.
1927
3.77M
  // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1928
3.77M
  if (isLdStPairSuppressed(MI))
1929
7.85k
    return false;
1930
3.76M
1931
3.76M
  // On some CPUs quad load/store pairs are slower than two single load/stores.
1932
3.76M
  if (Subtarget.isPaired128Slow()) {
1933
124
    switch (MI.getOpcode()) {
1934
124
    default:
1935
26
      break;
1936
124
    case AArch64::LDURQi:
1937
98
    case AArch64::STURQi:
1938
98
    case AArch64::LDRQui:
1939
98
    case AArch64::STRQui:
1940
98
      return false;
1941
3.76M
    }
1942
3.76M
  }
1943
3.76M
1944
3.76M
  return true;
1945
3.76M
}
1946
1947
bool AArch64InstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
1948
                                          const MachineOperand *&BaseOp,
1949
                                          int64_t &Offset,
1950
2.23M
                                          const TargetRegisterInfo *TRI) const {
1951
2.23M
  unsigned Width;
1952
2.23M
  return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
1953
2.23M
}
1954
1955
bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
1956
    const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
1957
29.6M
    unsigned &Width, const TargetRegisterInfo *TRI) const {
1958
29.6M
  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1959
29.6M
  // Handle only loads/stores with base register followed by immediate offset.
1960
29.6M
  if (LdSt.getNumExplicitOperands() == 3) {
1961
28.5M
    // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1962
28.5M
    if ((!LdSt.getOperand(1).isReg() && 
!LdSt.getOperand(1).isFI()1.08M
) ||
1963
28.5M
        
!LdSt.getOperand(2).isImm()28.5M
)
1964
102k
      return false;
1965
1.17M
  } else if (LdSt.getNumExplicitOperands() == 4) {
1966
115k
    // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
1967
115k
    if (!LdSt.getOperand(1).isReg() ||
1968
115k
        
(115k
!LdSt.getOperand(2).isReg()115k
&&
!LdSt.getOperand(2).isFI()1.26k
) ||
1969
115k
        
!LdSt.getOperand(3).isImm()113k
)
1970
2.19k
      return false;
1971
1.06M
  } else
1972
1.06M
    return false;
1973
28.5M
1974
28.5M
  // Get the scaling factor for the instruction and set the width for the
1975
28.5M
  // instruction.
1976
28.5M
  unsigned Scale = 0;
1977
28.5M
  int64_t Dummy1, Dummy2;
1978
28.5M
1979
28.5M
  // If this returns false, then it's an instruction we don't want to handle.
1980
28.5M
  if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
1981
14.9k
    return false;
1982
28.5M
1983
28.5M
  // Compute the offset. Offset is calculated as the immediate operand
1984
28.5M
  // multiplied by the scaling factor. Unscaled instructions have scaling factor
1985
28.5M
  // set to 1.
1986
28.5M
  if (LdSt.getNumExplicitOperands() == 3) {
1987
28.4M
    BaseOp = &LdSt.getOperand(1);
1988
28.4M
    Offset = LdSt.getOperand(2).getImm() * Scale;
1989
28.4M
  } else {
1990
97.9k
    assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1991
97.9k
    BaseOp = &LdSt.getOperand(2);
1992
97.9k
    Offset = LdSt.getOperand(3).getImm() * Scale;
1993
97.9k
  }
1994
28.5M
1995
28.5M
  assert((BaseOp->isReg() || BaseOp->isFI()) &&
1996
28.5M
         "getMemOperandWithOffset only supports base "
1997
28.5M
         "operands of type register or frame index.");
1998
28.5M
1999
28.5M
  return true;
2000
28.5M
}
2001
2002
MachineOperand &
2003
5
AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
2004
5
  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2005
5
  MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
2006
5
  assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
2007
5
  return OfsOp;
2008
5
}
2009
2010
bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
2011
                                    unsigned &Width, int64_t &MinOffset,
2012
30.2M
                                    int64_t &MaxOffset) {
2013
30.2M
  switch (Opcode) {
2014
30.2M
  // Not a memory operation or something we want to handle.
2015
30.2M
  default:
2016
14.9k
    Scale = Width = 0;
2017
14.9k
    MinOffset = MaxOffset = 0;
2018
14.9k
    return false;
2019
30.2M
  case AArch64::STRWpost:
2020
6.73k
  case AArch64::LDRWpost:
2021
6.73k
    Width = 32;
2022
6.73k
    Scale = 4;
2023
6.73k
    MinOffset = -256;
2024
6.73k
    MaxOffset = 255;
2025
6.73k
    break;
2026
244k
  case AArch64::LDURQi:
2027
244k
  case AArch64::STURQi:
2028
244k
    Width = 16;
2029
244k
    Scale = 1;
2030
244k
    MinOffset = -256;
2031
244k
    MaxOffset = 255;
2032
244k
    break;
2033
356k
  case AArch64::PRFUMi:
2034
356k
  case AArch64::LDURXi:
2035
356k
  case AArch64::LDURDi:
2036
356k
  case AArch64::STURXi:
2037
356k
  case AArch64::STURDi:
2038
356k
    Width = 8;
2039
356k
    Scale = 1;
2040
356k
    MinOffset = -256;
2041
356k
    MaxOffset = 255;
2042
356k
    break;
2043
1.92M
  case AArch64::LDURWi:
2044
1.92M
  case AArch64::LDURSi:
2045
1.92M
  case AArch64::LDURSWi:
2046
1.92M
  case AArch64::STURWi:
2047
1.92M
  case AArch64::STURSi:
2048
1.92M
    Width = 4;
2049
1.92M
    Scale = 1;
2050
1.92M
    MinOffset = -256;
2051
1.92M
    MaxOffset = 255;
2052
1.92M
    break;
2053
1.92M
  case AArch64::LDURHi:
2054
15.3k
  case AArch64::LDURHHi:
2055
15.3k
  case AArch64::LDURSHXi:
2056
15.3k
  case AArch64::LDURSHWi:
2057
15.3k
  case AArch64::STURHi:
2058
15.3k
  case AArch64::STURHHi:
2059
15.3k
    Width = 2;
2060
15.3k
    Scale = 1;
2061
15.3k
    MinOffset = -256;
2062
15.3k
    MaxOffset = 255;
2063
15.3k
    break;
2064
70.9k
  case AArch64::LDURBi:
2065
70.9k
  case AArch64::LDURBBi:
2066
70.9k
  case AArch64::LDURSBXi:
2067
70.9k
  case AArch64::LDURSBWi:
2068
70.9k
  case AArch64::STURBi:
2069
70.9k
  case AArch64::STURBBi:
2070
70.9k
    Width = 1;
2071
70.9k
    Scale = 1;
2072
70.9k
    MinOffset = -256;
2073
70.9k
    MaxOffset = 255;
2074
70.9k
    break;
2075
70.9k
  case AArch64::LDPQi:
2076
2.51k
  case AArch64::LDNPQi:
2077
2.51k
  case AArch64::STPQi:
2078
2.51k
  case AArch64::STNPQi:
2079
2.51k
    Scale = 16;
2080
2.51k
    Width = 32;
2081
2.51k
    MinOffset = -64;
2082
2.51k
    MaxOffset = 63;
2083
2.51k
    break;
2084
14.6M
  case AArch64::LDRQui:
2085
14.6M
  case AArch64::STRQui:
2086
14.6M
    Scale = Width = 16;
2087
14.6M
    MinOffset = 0;
2088
14.6M
    MaxOffset = 4095;
2089
14.6M
    break;
2090
14.6M
  case AArch64::LDPXi:
2091
79.0k
  case AArch64::LDPDi:
2092
79.0k
  case AArch64::LDNPXi:
2093
79.0k
  case AArch64::LDNPDi:
2094
79.0k
  case AArch64::STPXi:
2095
79.0k
  case AArch64::STPDi:
2096
79.0k
  case AArch64::STNPXi:
2097
79.0k
  case AArch64::STNPDi:
2098
79.0k
    Scale = 8;
2099
79.0k
    Width = 16;
2100
79.0k
    MinOffset = -64;
2101
79.0k
    MaxOffset = 63;
2102
79.0k
    break;
2103
4.54M
  case AArch64::PRFMui:
2104
4.54M
  case AArch64::LDRXui:
2105
4.54M
  case AArch64::LDRDui:
2106
4.54M
  case AArch64::STRXui:
2107
4.54M
  case AArch64::STRDui:
2108
4.54M
    Scale = Width = 8;
2109
4.54M
    MinOffset = 0;
2110
4.54M
    MaxOffset = 4095;
2111
4.54M
    break;
2112
4.54M
  case AArch64::LDPWi:
2113
9.64k
  case AArch64::LDPSi:
2114
9.64k
  case AArch64::LDNPWi:
2115
9.64k
  case AArch64::LDNPSi:
2116
9.64k
  case AArch64::STPWi:
2117
9.64k
  case AArch64::STPSi:
2118
9.64k
  case AArch64::STNPWi:
2119
9.64k
  case AArch64::STNPSi:
2120
9.64k
    Scale = 4;
2121
9.64k
    Width = 8;
2122
9.64k
    MinOffset = -64;
2123
9.64k
    MaxOffset = 63;
2124
9.64k
    break;
2125
2.87M
  case AArch64::LDRWui:
2126
2.87M
  case AArch64::LDRSui:
2127
2.87M
  case AArch64::LDRSWui:
2128
2.87M
  case AArch64::STRWui:
2129
2.87M
  case AArch64::STRSui:
2130
2.87M
    Scale = Width = 4;
2131
2.87M
    MinOffset = 0;
2132
2.87M
    MaxOffset = 4095;
2133
2.87M
    break;
2134
4.12M
  case AArch64::LDRHui:
2135
4.12M
  case AArch64::LDRHHui:
2136
4.12M
  case AArch64::LDRSHWui:
2137
4.12M
  case AArch64::LDRSHXui:
2138
4.12M
  case AArch64::STRHui:
2139
4.12M
  case AArch64::STRHHui:
2140
4.12M
    Scale = Width = 2;
2141
4.12M
    MinOffset = 0;
2142
4.12M
    MaxOffset = 4095;
2143
4.12M
    break;
2144
4.12M
  case AArch64::LDRBui:
2145
1.27M
  case AArch64::LDRBBui:
2146
1.27M
  case AArch64::LDRSBWui:
2147
1.27M
  case AArch64::LDRSBXui:
2148
1.27M
  case AArch64::STRBui:
2149
1.27M
  case AArch64::STRBBui:
2150
1.27M
    Scale = Width = 1;
2151
1.27M
    MinOffset = 0;
2152
1.27M
    MaxOffset = 4095;
2153
1.27M
    break;
2154
1.27M
  case AArch64::ADDG:
2155
18
  case AArch64::TAGPstack:
2156
18
    Scale = 16;
2157
18
    Width = 0;
2158
18
    MinOffset = 0;
2159
18
    MaxOffset = 63;
2160
18
    break;
2161
66
  case AArch64::LDG:
2162
66
  case AArch64::STGOffset:
2163
66
  case AArch64::STZGOffset:
2164
66
    Scale = Width = 16;
2165
66
    MinOffset = -256;
2166
66
    MaxOffset = 255;
2167
66
    break;
2168
66
  case AArch64::ST2GOffset:
2169
39
  case AArch64::STZ2GOffset:
2170
39
    Scale = 16;
2171
39
    Width = 32;
2172
39
    MinOffset = -256;
2173
39
    MaxOffset = 255;
2174
39
    break;
2175
39
  case AArch64::STGPi:
2176
14
    Scale = Width = 16;
2177
14
    MinOffset = -64;
2178
14
    MaxOffset = 63;
2179
14
    break;
2180
30.2M
  }
2181
30.2M
2182
30.2M
  return true;
2183
30.2M
}
2184
2185
15.9k
static unsigned getOffsetStride(unsigned Opc) {
2186
15.9k
  switch (Opc) {
2187
15.9k
  default:
2188
491
    return 0;
2189
15.9k
  case AArch64::LDURQi:
2190
3.81k
  case AArch64::STURQi:
2191
3.81k
    return 16;
2192
3.81k
  case AArch64::LDURXi:
2193
1.41k
  case AArch64::LDURDi:
2194
1.41k
  case AArch64::STURXi:
2195
1.41k
  case AArch64::STURDi:
2196
1.41k
    return 8;
2197
10.1k
  case AArch64::LDURWi:
2198
10.1k
  case AArch64::LDURSi:
2199
10.1k
  case AArch64::LDURSWi:
2200
10.1k
  case AArch64::STURWi:
2201
10.1k
  case AArch64::STURSi:
2202
10.1k
    return 4;
2203
15.9k
  }
2204
15.9k
}
2205
2206
// Scale the unscaled offsets.  Returns false if the unscaled offset can't be
2207
// scaled.
2208
15.4k
static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2209
15.4k
  unsigned OffsetStride = getOffsetStride(Opc);
2210
15.4k
  if (OffsetStride == 0)
2211
0
    return false;
2212
15.4k
  // If the byte-offset isn't a multiple of the stride, we can't scale this
2213
15.4k
  // offset.
2214
15.4k
  if (Offset % OffsetStride != 0)
2215
9.64k
    return false;
2216
5.77k
2217
5.77k
  // Convert the byte-offset used by unscaled into an "element" offset used
2218
5.77k
  // by the scaled pair load/store instructions.
2219
5.77k
  Offset /= OffsetStride;
2220
5.77k
  return true;
2221
5.77k
}
2222
2223
// Unscale the scaled offsets. Returns false if the scaled offset can't be
2224
// unscaled.
2225
491
static bool unscaleOffset(unsigned Opc, int64_t &Offset) {
2226
491
  unsigned OffsetStride = getOffsetStride(Opc);
2227
491
  if (OffsetStride == 0)
2228
491
    return false;
2229
0
2230
0
  // Convert the "element" offset used by scaled pair load/store instructions
2231
0
  // into the byte-offset used by unscaled.
2232
0
  Offset *= OffsetStride;
2233
0
  return true;
2234
0
}
2235
2236
360k
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2237
360k
  if (FirstOpc == SecondOpc)
2238
286k
    return true;
2239
74.5k
  // We can also pair sign-ext and zero-ext instructions.
2240
74.5k
  switch (FirstOpc) {
2241
74.5k
  default:
2242
63.6k
    return false;
2243
74.5k
  case AArch64::LDRWui:
2244
6.45k
  case AArch64::LDURWi:
2245
6.45k
    return SecondOpc == AArch64::LDRSWui || 
SecondOpc == AArch64::LDURSWi3.27k
;
2246
6.45k
  case AArch64::LDRSWui:
2247
4.43k
  case AArch64::LDURSWi:
2248
4.43k
    return SecondOpc == AArch64::LDRWui || 
SecondOpc == AArch64::LDURWi1.16k
;
2249
0
  }
2250
0
  // These instructions can't be paired based on their opcodes.
2251
0
  return false;
2252
0
}
2253
2254
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
2255
                            int64_t Offset1, unsigned Opcode1, int FI2,
2256
21.0k
                            int64_t Offset2, unsigned Opcode2) {
2257
21.0k
  // Accesses through fixed stack object frame indices may access a different
2258
21.0k
  // fixed stack slot. Check that the object offsets + offsets match.
2259
21.0k
  if (MFI.isFixedObjectIndex(FI1) && 
MFI.isFixedObjectIndex(FI2)491
) {
2260
491
    int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
2261
491
    int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
2262
491
    assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
2263
491
    // Get the byte-offset from the object offset.
2264
491
    if (!unscaleOffset(Opcode1, Offset1) || 
!unscaleOffset(Opcode2, Offset2)0
)
2265
491
      return false;
2266
0
    ObjectOffset1 += Offset1;
2267
0
    ObjectOffset2 += Offset2;
2268
0
    // Get the "element" index in the object.
2269
0
    if (!scaleOffset(Opcode1, ObjectOffset1) ||
2270
0
        !scaleOffset(Opcode2, ObjectOffset2))
2271
0
      return false;
2272
0
    return ObjectOffset1 + 1 == ObjectOffset2;
2273
0
  }
2274
20.5k
2275
20.5k
  return FI1 == FI2;
2276
20.5k
}
2277
2278
/// Detect opportunities for ldp/stp formation.
2279
///
2280
/// Only called for LdSt for which getMemOperandWithOffset returns true.
2281
bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
2282
                                           const MachineOperand &BaseOp2,
2283
779k
                                           unsigned NumLoads) const {
2284
779k
  const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2285
779k
  const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2286
779k
  if (BaseOp1.getType() != BaseOp2.getType())
2287
9.80k
    return false;
2288
769k
2289
769k
  assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2290
769k
         "Only base registers and frame indices are supported.");
2291
769k
2292
769k
  // Check for both base regs and base FI.
2293
769k
  if (BaseOp1.isReg() && 
BaseOp1.getReg() != BaseOp2.getReg()722k
)
2294
161k
    return false;
2295
608k
2296
608k
  // Only cluster up to a single pair.
2297
608k
  if (NumLoads > 1)
2298
138k
    return false;
2299
469k
2300
469k
  if (!isPairableLdStInst(FirstLdSt) || 
!isPairableLdStInst(SecondLdSt)387k
)
2301
109k
    return false;
2302
360k
2303
360k
  // Can we pair these instructions based on their opcodes?
2304
360k
  unsigned FirstOpc = FirstLdSt.getOpcode();
2305
360k
  unsigned SecondOpc = SecondLdSt.getOpcode();
2306
360k
  if (!canPairLdStOpc(FirstOpc, SecondOpc))
2307
68.1k
    return false;
2308
292k
2309
292k
  // Can't merge volatiles or load/stores that have a hint to avoid pair
2310
292k
  // formation, for example.
2311
292k
  if (!isCandidateToMergeOrPair(FirstLdSt) ||
2312
292k
      
!isCandidateToMergeOrPair(SecondLdSt)291k
)
2313
986
    return false;
2314
291k
2315
291k
  // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2316
291k
  int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2317
291k
  if (isUnscaledLdSt(FirstOpc) && 
!scaleOffset(FirstOpc, Offset1)12.5k
)
2318
9.61k
    return false;
2319
281k
2320
281k
  int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2321
281k
  if (isUnscaledLdSt(SecondOpc) && 
!scaleOffset(SecondOpc, Offset2)2.88k
)
2322
22
    return false;
2323
281k
2324
281k
  // Pairwise instructions have a 7-bit signed offset field.
2325
281k
  if (Offset1 > 63 || 
Offset1 < -64226k
)
2326
55.0k
    return false;
2327
226k
2328
226k
  // The caller should already have ordered First/SecondLdSt by offset.
2329
226k
  // Note: except for non-equal frame index bases
2330
226k
  if (BaseOp1.isFI()) {
2331
21.0k
    assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 >= Offset2) &&
2332
21.0k
           "Caller should have ordered offsets.");
2333
21.0k
2334
21.0k
    const MachineFrameInfo &MFI =
2335
21.0k
        FirstLdSt.getParent()->getParent()->getFrameInfo();
2336
21.0k
    return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
2337
21.0k
                           BaseOp2.getIndex(), Offset2, SecondOpc);
2338
21.0k
  }
2339
205k
2340
205k
  assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
2341
205k
         "Caller should have ordered offsets.");
2342
205k
2343
205k
  return Offset1 + 1 == Offset2;
2344
205k
}
2345
2346
static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
2347
                                            unsigned Reg, unsigned SubIdx,
2348
                                            unsigned State,
2349
95
                                            const TargetRegisterInfo *TRI) {
2350
95
  if (!SubIdx)
2351
0
    return MIB.addReg(Reg, State);
2352
95
2353
95
  if (TargetRegisterInfo::isPhysicalRegister(Reg))
2354
95
    return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2355
0
  return MIB.addReg(Reg, State, SubIdx);
2356
0
}
2357
2358
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2359
11
                                        unsigned NumRegs) {
2360
11
  // We really want the positive remainder mod 32 here, that happens to be
2361
11
  // easily obtainable with a mask.
2362
11
  return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2363
11
}
2364
2365
void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
2366
                                        MachineBasicBlock::iterator I,
2367
                                        const DebugLoc &DL, unsigned DestReg,
2368
                                        unsigned SrcReg, bool KillSrc,
2369
                                        unsigned Opcode,
2370
11
                                        ArrayRef<unsigned> Indices) const {
2371
11
  assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2372
11
  const TargetRegisterInfo *TRI = &getRegisterInfo();
2373
11
  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2374
11
  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2375
11
  unsigned NumRegs = Indices.size();
2376
11
2377
11
  int SubReg = 0, End = NumRegs, Incr = 1;
2378
11
  if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2379
5
    SubReg = NumRegs - 1;
2380
5
    End = -1;
2381
5
    Incr = -1;
2382
5
  }
2383
11
2384
40
  for (; SubReg != End; 
SubReg += Incr29
) {
2385
29
    const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2386
29
    AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2387
29
    AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2388
29
    AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2389
29
  }
2390
11
}
2391
2392
void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
2393
                                       MachineBasicBlock::iterator I,
2394
                                       DebugLoc DL, unsigned DestReg,
2395
                                       unsigned SrcReg, bool KillSrc,
2396
                                       unsigned Opcode, unsigned ZeroReg,
2397
2
                                       llvm::ArrayRef<unsigned> Indices) const {
2398
2
  const TargetRegisterInfo *TRI = &getRegisterInfo();
2399
2
  unsigned NumRegs = Indices.size();
2400
2
2401
#ifndef NDEBUG
2402
  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2403
  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2404
  assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
2405
         "GPR reg sequences should not be able to overlap");
2406
#endif
2407
2408
6
  for (unsigned SubReg = 0; SubReg != NumRegs; 
++SubReg4
) {
2409
4
    const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2410
4
    AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2411
4
    MIB.addReg(ZeroReg);
2412
4
    AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2413
4
    MIB.addImm(0);
2414
4
  }
2415
2
}
2416
2417
void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
2418
                                   MachineBasicBlock::iterator I,
2419
                                   const DebugLoc &DL, unsigned DestReg,
2420
1.95M
                                   unsigned SrcReg, bool KillSrc) const {
2421
1.95M
  if (AArch64::GPR32spRegClass.contains(DestReg) &&
2422
1.95M
      
(234k
AArch64::GPR32spRegClass.contains(SrcReg)234k
||
SrcReg == AArch64::WZR33.9k
)) {
2423
231k
    const TargetRegisterInfo *TRI = &getRegisterInfo();
2424
231k
2425
231k
    if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2426
0
      // If either operand is WSP, expand to ADD #0.
2427
0
      if (Subtarget.hasZeroCycleRegMove()) {
2428
0
        // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2429
0
        unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2430
0
                                                     &AArch64::GPR64spRegClass);
2431
0
        unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2432
0
                                                    &AArch64::GPR64spRegClass);
2433
0
        // This instruction is reading and writing X registers.  This may upset
2434
0
        // the register scavenger and machine verifier, so we need to indicate
2435
0
        // that we are reading an undefined value from SrcRegX, but a proper
2436
0
        // value from SrcReg.
2437
0
        BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2438
0
            .addReg(SrcRegX, RegState::Undef)
2439
0
            .addImm(0)
2440
0
            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2441
0
            .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2442
0
      } else {
2443
0
        BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2444
0
            .addReg(SrcReg, getKillRegState(KillSrc))
2445
0
            .addImm(0)
2446
0
            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2447
0
      }
2448
231k
    } else if (SrcReg == AArch64::WZR && 
Subtarget.hasZeroCycleZeroingGP()31.6k
) {
2449
31.2k
      BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2450
31.2k
          .addImm(0)
2451
31.2k
          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2452
200k
    } else {
2453
200k
      if (Subtarget.hasZeroCycleRegMove()) {
2454
199k
        // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2455
199k
        unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2456
199k
                                                     &AArch64::GPR64spRegClass);
2457
199k
        unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2458
199k
                                                    &AArch64::GPR64spRegClass);
2459
199k
        // This instruction is reading and writing X registers.  This may upset
2460
199k
        // the register scavenger and machine verifier, so we need to indicate
2461
199k
        // that we are reading an undefined value from SrcRegX, but a proper
2462
199k
        // value from SrcReg.
2463
199k
        BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2464
199k
            .addReg(AArch64::XZR)
2465
199k
            .addReg(SrcRegX, RegState::Undef)
2466
199k
            .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2467
199k
      } else {
2468
959
        // Otherwise, expand to ORR WZR.
2469
959
        BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2470
959
            .addReg(AArch64::WZR)
2471
959
            .addReg(SrcReg, getKillRegState(KillSrc));
2472
959
      }
2473
200k
    }
2474
231k
    return;
2475
231k
  }
2476
1.72M
2477
1.72M
  if (AArch64::GPR64spRegClass.contains(DestReg) &&
2478
1.72M
      
(1.64M
AArch64::GPR64spRegClass.contains(SrcReg)1.64M
||
SrcReg == AArch64::XZR73.6k
)) {
2479
1.64M
    if (DestReg == AArch64::SP || 
SrcReg == AArch64::SP1.64M
) {
2480
963
      // If either operand is SP, expand to ADD #0.
2481
963
      BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2482
963
          .addReg(SrcReg, getKillRegState(KillSrc))
2483
963
          .addImm(0)
2484
963
          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2485
1.64M
    } else if (SrcReg == AArch64::XZR && 
Subtarget.hasZeroCycleZeroingGP()70.0k
) {
2486
69.9k
      BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2487
69.9k
          .addImm(0)
2488
69.9k
          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2489
1.57M
    } else {
2490
1.57M
      // Otherwise, expand to ORR XZR.
2491
1.57M
      BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2492
1.57M
          .addReg(AArch64::XZR)
2493
1.57M
          .addReg(SrcReg, getKillRegState(KillSrc));
2494
1.57M
    }
2495
1.64M
    return;
2496
1.64M
  }
2497
75.4k
2498
75.4k
  // Copy a DDDD register quad by copying the individual sub-registers.
2499
75.4k
  if (AArch64::DDDDRegClass.contains(DestReg) &&
2500
75.4k
      
AArch64::DDDDRegClass.contains(SrcReg)0
) {
2501
0
    static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2502
0
                                       AArch64::dsub2, AArch64::dsub3};
2503
0
    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2504
0
                     Indices);
2505
0
    return;
2506
0
  }
2507
75.4k
2508
75.4k
  // Copy a DDD register triple by copying the individual sub-registers.
2509
75.4k
  if (AArch64::DDDRegClass.contains(DestReg) &&
2510
75.4k
      
AArch64::DDDRegClass.contains(SrcReg)1
) {
2511
1
    static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2512
1
                                       AArch64::dsub2};
2513
1
    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2514
1
                     Indices);
2515
1
    return;
2516
1
  }
2517
75.4k
2518
75.4k
  // Copy a DD register pair by copying the individual sub-registers.
2519
75.4k
  if (AArch64::DDRegClass.contains(DestReg) &&
2520
75.4k
      
AArch64::DDRegClass.contains(SrcReg)4
) {
2521
4
    static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2522
4
    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2523
4
                     Indices);
2524
4
    return;
2525
4
  }
2526
75.4k
2527
75.4k
  // Copy a QQQQ register quad by copying the individual sub-registers.
2528
75.4k
  if (AArch64::QQQQRegClass.contains(DestReg) &&
2529
75.4k
      
AArch64::QQQQRegClass.contains(SrcReg)2
) {
2530
2
    static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2531
2
                                       AArch64::qsub2, AArch64::qsub3};
2532
2
    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2533
2
                     Indices);
2534
2
    return;
2535
2
  }
2536
75.4k
2537
75.4k
  // Copy a QQQ register triple by copying the individual sub-registers.
2538
75.4k
  if (AArch64::QQQRegClass.contains(DestReg) &&
2539
75.4k
      
AArch64::QQQRegClass.contains(SrcReg)2
) {
2540
2
    static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2541
2
                                       AArch64::qsub2};
2542
2
    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2543
2
                     Indices);
2544
2
    return;
2545
2
  }
2546
75.4k
2547
75.4k
  // Copy a QQ register pair by copying the individual sub-registers.
2548
75.4k
  if (AArch64::QQRegClass.contains(DestReg) &&
2549
75.4k
      
AArch64::QQRegClass.contains(SrcReg)2
) {
2550
2
    static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2551
2
    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2552
2
                     Indices);
2553
2
    return;
2554
2
  }
2555
75.4k
2556
75.4k
  if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
2557
75.4k
      
AArch64::XSeqPairsClassRegClass.contains(SrcReg)1
) {
2558
1
    static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
2559
1
    copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
2560
1
                    AArch64::XZR, Indices);
2561
1
    return;
2562
1
  }
2563
75.4k
2564
75.4k
  if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
2565
75.4k
      
AArch64::WSeqPairsClassRegClass.contains(SrcReg)1
) {
2566
1
    static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
2567
1
    copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
2568
1
                    AArch64::WZR, Indices);
2569
1
    return;
2570
1
  }
2571
75.4k
2572
75.4k
  if (AArch64::FPR128RegClass.contains(DestReg) &&
2573
75.4k
      
AArch64::FPR128RegClass.contains(SrcReg)4.28k
) {
2574
4.28k
    if (Subtarget.hasNEON()) {
2575
4.28k
      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2576
4.28k
          .addReg(SrcReg)
2577
4.28k
          .addReg(SrcReg, getKillRegState(KillSrc));
2578
4.28k
    } else {
2579
1
      BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2580
1
          .addReg(AArch64::SP, RegState::Define)
2581
1
          .addReg(SrcReg, getKillRegState(KillSrc))
2582
1
          .addReg(AArch64::SP)
2583
1
          .addImm(-16);
2584
1
      BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2585
1
          .addReg(AArch64::SP, RegState::Define)
2586
1
          .addReg(DestReg, RegState::Define)
2587
1
          .addReg(AArch64::SP)
2588
1
          .addImm(16);
2589
1
    }
2590
4.28k
    return;
2591
4.28k
  }
2592
71.1k
2593
71.1k
  if (AArch64::FPR64RegClass.contains(DestReg) &&
2594
71.1k
      
AArch64::FPR64RegClass.contains(SrcReg)28.5k
) {
2595
16.5k
    if (Subtarget.hasNEON()) {
2596
16.5k
      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2597
16.5k
                                       &AArch64::FPR128RegClass);
2598
16.5k
      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2599
16.5k
                                      &AArch64::FPR128RegClass);
2600
16.5k
      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2601
16.5k
          .addReg(SrcReg)
2602
16.5k
          .addReg(SrcReg, getKillRegState(KillSrc));
2603
16.5k
    } else {
2604
3
      BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2605
3
          .addReg(SrcReg, getKillRegState(KillSrc));
2606
3
    }
2607
16.5k
    return;
2608
16.5k
  }
2609
54.6k
2610
54.6k
  if (AArch64::FPR32RegClass.contains(DestReg) &&
2611
54.6k
      
AArch64::FPR32RegClass.contains(SrcReg)36.7k
) {
2612
2.01k
    if (Subtarget.hasNEON()) {
2613
2.00k
      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2614
2.00k
                                       &AArch64::FPR128RegClass);
2615
2.00k
      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2616
2.00k
                                      &AArch64::FPR128RegClass);
2617
2.00k
      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2618
2.00k
          .addReg(SrcReg)
2619
2.00k
          .addReg(SrcReg, getKillRegState(KillSrc));
2620
2.00k
    } else {
2621
1
      BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2622
1
          .addReg(SrcReg, getKillRegState(KillSrc));
2623
1
    }
2624
2.01k
    return;
2625
2.01k
  }
2626
52.6k
2627
52.6k
  if (AArch64::FPR16RegClass.contains(DestReg) &&
2628
52.6k
      
AArch64::FPR16RegClass.contains(SrcReg)50
) {
2629
50
    if (Subtarget.hasNEON()) {
2630
50
      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2631
50
                                       &AArch64::FPR128RegClass);
2632
50
      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2633
50
                                      &AArch64::FPR128RegClass);
2634
50
      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2635
50
          .addReg(SrcReg)
2636
50
          .addReg(SrcReg, getKillRegState(KillSrc));
2637
50
    } else {
2638
0
      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2639
0
                                       &AArch64::FPR32RegClass);
2640
0
      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2641
0
                                      &AArch64::FPR32RegClass);
2642
0
      BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2643
0
          .addReg(SrcReg, getKillRegState(KillSrc));
2644
0
    }
2645
50
    return;
2646
50
  }
2647
52.5k
2648
52.5k
  if (AArch64::FPR8RegClass.contains(DestReg) &&
2649
52.5k
      
AArch64::FPR8RegClass.contains(SrcReg)0
) {
2650
0
    if (Subtarget.hasNEON()) {
2651
0
      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2652
0
                                       &AArch64::FPR128RegClass);
2653
0
      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2654
0
                                      &AArch64::FPR128RegClass);
2655
0
      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2656
0
          .addReg(SrcReg)
2657
0
          .addReg(SrcReg, getKillRegState(KillSrc));
2658
0
    } else {
2659
0
      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2660
0
                                       &AArch64::FPR32RegClass);
2661
0
      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2662
0
                                      &AArch64::FPR32RegClass);
2663
0
      BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2664
0
          .addReg(SrcReg, getKillRegState(KillSrc));
2665
0
    }
2666
0
    return;
2667
0
  }
2668
52.5k
2669
52.5k
  // Copies between GPR64 and FPR64.
2670
52.5k
  if (AArch64::FPR64RegClass.contains(DestReg) &&
2671
52.5k
      
AArch64::GPR64RegClass.contains(SrcReg)12.0k
) {
2672
12.0k
    BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2673
12.0k
        .addReg(SrcReg, getKillRegState(KillSrc));
2674
12.0k
    return;
2675
12.0k
  }
2676
40.5k
  if (AArch64::GPR64RegClass.contains(DestReg) &&
2677
40.5k
      
AArch64::FPR64RegClass.contains(SrcReg)3.51k
) {
2678
3.51k
    BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2679
3.51k
        .addReg(SrcReg, getKillRegState(KillSrc));
2680
3.51k
    return;
2681
3.51k
  }
2682
37.0k
  // Copies between GPR32 and FPR32.
2683
37.0k
  if (AArch64::FPR32RegClass.contains(DestReg) &&
2684
37.0k
      
AArch64::GPR32RegClass.contains(SrcReg)34.7k
) {
2685
34.7k
    BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2686
34.7k
        .addReg(SrcReg, getKillRegState(KillSrc));
2687
34.7k
    return;
2688
34.7k
  }
2689
2.33k
  if (AArch64::GPR32RegClass.contains(DestReg) &&
2690
2.33k
      
AArch64::FPR32RegClass.contains(SrcReg)2.33k
) {
2691
2.33k
    BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2692
2.33k
        .addReg(SrcReg, getKillRegState(KillSrc));
2693
2.33k
    return;
2694
2.33k
  }
2695
2
2696
2
  if (DestReg == AArch64::NZCV) {
2697
1
    assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2698
1
    BuildMI(MBB, I, DL, get(AArch64::MSR))
2699
1
        .addImm(AArch64SysReg::NZCV)
2700
1
        .addReg(SrcReg, getKillRegState(KillSrc))
2701
1
        .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2702
1
    return;
2703
1
  }
2704
1
2705
1
  if (SrcReg == AArch64::NZCV) {
2706
1
    assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2707
1
    BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2708
1
        .addImm(AArch64SysReg::NZCV)
2709
1
        .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2710
1
    return;
2711
1
  }
2712
0
2713
0
  llvm_unreachable("unimplemented reg-to-reg copy");
2714
0
}
2715
2716
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
2717
                                    MachineBasicBlock &MBB,
2718
                                    MachineBasicBlock::iterator InsertBefore,
2719
                                    const MCInstrDesc &MCID,
2720
                                    unsigned SrcReg, bool IsKill,
2721
                                    unsigned SubIdx0, unsigned SubIdx1, int FI,
2722
2
                                    MachineMemOperand *MMO) {
2723
2
  unsigned SrcReg0 = SrcReg;
2724
2
  unsigned SrcReg1 = SrcReg;
2725
2
  if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
2726
0
    SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
2727
0
    SubIdx0 = 0;
2728
0
    SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
2729
0
    SubIdx1 = 0;
2730
0
  }
2731
2
  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2732
2
      .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
2733
2
      .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
2734
2
      .addFrameIndex(FI)
2735
2
      .addImm(0)
2736
2
      .addMemOperand(MMO);
2737
2
}
2738
2739
void AArch64InstrInfo::storeRegToStackSlot(
2740
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2741
    bool isKill, int FI, const TargetRegisterClass *RC,
2742
125k
    const TargetRegisterInfo *TRI) const {
2743
125k
  MachineFunction &MF = *MBB.getParent();
2744
125k
  MachineFrameInfo &MFI = MF.getFrameInfo();
2745
125k
  unsigned Align = MFI.getObjectAlignment(FI);
2746
125k
2747
125k
  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2748
125k
  MachineMemOperand *MMO = MF.getMachineMemOperand(
2749
125k
      PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2750
125k
  unsigned Opc = 0;
2751
125k
  bool Offset = true;
2752
125k
  switch (TRI->getSpillSize(*RC)) {
2753
125k
  case 1:
2754
0
    if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2755
0
      Opc = AArch64::STRBui;
2756
0
    break;
2757
125k
  case 2:
2758
12
    if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2759
12
      Opc = AArch64::STRHui;
2760
12
    break;
2761
125k
  case 4:
2762
35.0k
    if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2763
34.4k
      Opc = AArch64::STRWui;
2764
34.4k
      if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2765
32.3k
        MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2766
34.4k
      else
2767
34.4k
        assert(SrcReg != AArch64::WSP);
2768
34.4k
    } else 
if (594
AArch64::FPR32RegClass.hasSubClassEq(RC)594
)
2769
594
      Opc = AArch64::STRSui;
2770
35.0k
    break;
2771
125k
  case 8:
2772
66.4k
    if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2773
60.7k
      Opc = AArch64::STRXui;
2774
60.7k
      if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2775
51.2k
        MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2776
60.7k
      else
2777
60.7k
        assert(SrcReg != AArch64::SP);
2778
60.7k
    } else 
if (5.74k
AArch64::FPR64RegClass.hasSubClassEq(RC)5.74k
) {
2779
5.73k
      Opc = AArch64::STRDui;
2780
5.73k
    } else 
if (1
AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)1
) {
2781
1
      storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
2782
1
                              get(AArch64::STPWi), SrcReg, isKill,
2783
1
                              AArch64::sube32, AArch64::subo32, FI, MMO);
2784
1
      return;
2785
1
    }
2786
66.4k
    break;
2787
66.4k
  case 16:
2788
23.7k
    if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2789
23.7k
      Opc = AArch64::STRQui;
2790
1
    else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2791
0
      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2792
0
      Opc = AArch64::ST1Twov1d;
2793
0
      Offset = false;
2794
1
    } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2795
1
      storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
2796
1
                              get(AArch64::STPXi), SrcReg, isKill,
2797
1
                              AArch64::sube64, AArch64::subo64, FI, MMO);
2798
1
      return;
2799
1
    }
2800
23.7k
    break;
2801
23.7k
  case 24:
2802
0
    if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2803
0
      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2804
0
      Opc = AArch64::ST1Threev1d;
2805
0
      Offset = false;
2806
0
    }
2807
0
    break;
2808
23.7k
  case 32:
2809
102
    if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2810
0
      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2811
0
      Opc = AArch64::ST1Fourv1d;
2812
0
      Offset = false;
2813
102
    } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2814
102
      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2815
102
      Opc = AArch64::ST1Twov2d;
2816
102
      Offset = false;
2817
102
    }
2818
102
    break;
2819
23.7k
  case 48:
2820
80
    if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2821
80
      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2822
80
      Opc = AArch64::ST1Threev2d;
2823
80
      Offset = false;
2824
80
    }
2825
80
    break;
2826
23.7k
  case 64:
2827
82
    if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2828
82
      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2829
82
      Opc = AArch64::ST1Fourv2d;
2830
82
      Offset = false;
2831
82
    }
2832
82
    break;
2833
125k
  }
2834
125k
  assert(Opc && "Unknown register class");
2835
125k
2836
125k
  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2837
125k
                                     .addReg(SrcReg, getKillRegState(isKill))
2838
125k
                                     .addFrameIndex(FI);
2839
125k
2840
125k
  if (Offset)
2841
125k
    MI.addImm(0);
2842
125k
  MI.addMemOperand(MMO);
2843
125k
}
2844
2845
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
2846
                                     MachineBasicBlock &MBB,
2847
                                     MachineBasicBlock::iterator InsertBefore,
2848
                                     const MCInstrDesc &MCID,
2849
                                     unsigned DestReg, unsigned SubIdx0,
2850
                                     unsigned SubIdx1, int FI,
2851
2
                                     MachineMemOperand *MMO) {
2852
2
  unsigned DestReg0 = DestReg;
2853
2
  unsigned DestReg1 = DestReg;
2854
2
  bool IsUndef = true;
2855
2
  if (TargetRegisterInfo::isPhysicalRegister(DestReg)) {
2856
0
    DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
2857
0
    SubIdx0 = 0;
2858
0
    DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
2859
0
    SubIdx1 = 0;
2860
0
    IsUndef = false;
2861
0
  }
2862
2
  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2863
2
      .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
2864
2
      .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
2865
2
      .addFrameIndex(FI)
2866
2
      .addImm(0)
2867
2
      .addMemOperand(MMO);
2868
2
}
2869
2870
void AArch64InstrInfo::loadRegFromStackSlot(
2871
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2872
    int FI, const TargetRegisterClass *RC,
2873
157k
    const TargetRegisterInfo *TRI) const {
2874
157k
  MachineFunction &MF = *MBB.getParent();
2875
157k
  MachineFrameInfo &MFI = MF.getFrameInfo();
2876
157k
  unsigned Align = MFI.getObjectAlignment(FI);
2877
157k
  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2878
157k
  MachineMemOperand *MMO = MF.getMachineMemOperand(
2879
157k
      PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2880
157k
2881
157k
  unsigned Opc = 0;
2882
157k
  bool Offset = true;
2883
157k
  switch (TRI->getSpillSize(*RC)) {
2884
157k
  case 1:
2885
0
    if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2886
0
      Opc = AArch64::LDRBui;
2887
0
    break;
2888
157k
  case 2:
2889
12
    if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2890
12
      Opc = AArch64::LDRHui;
2891
12
    break;
2892
157k
  case 4:
2893
34.1k
    if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2894
29.4k
      Opc = AArch64::LDRWui;
2895
29.4k
      if (TargetRegisterInfo::isVirtualRegister(DestReg))
2896
27.6k
        MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2897
29.4k
      else
2898
29.4k
        assert(DestReg != AArch64::WSP);
2899
29.4k
    } else 
if (4.68k
AArch64::FPR32RegClass.hasSubClassEq(RC)4.68k
)
2900
4.68k
      Opc = AArch64::LDRSui;
2901
34.1k
    break;
2902
157k
  case 8:
2903
95.4k
    if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2904
89.2k
      Opc = AArch64::LDRXui;
2905
89.2k
      if (TargetRegisterInfo::isVirtualRegister(DestReg))
2906
64.8k
        MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2907
89.2k
      else
2908
89.2k
        assert(DestReg != AArch64::SP);
2909
89.2k
    } else 
if (6.17k
AArch64::FPR64RegClass.hasSubClassEq(RC)6.17k
) {
2910
6.17k
      Opc = AArch64::LDRDui;
2911
6.17k
    } else 
if (1
AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)1
) {
2912
1
      loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
2913
1
                               get(AArch64::LDPWi), DestReg, AArch64::sube32,
2914
1
                               AArch64::subo32, FI, MMO);
2915
1
      return;
2916
1
    }
2917
95.4k
    break;
2918
95.4k
  case 16:
2919
26.5k
    if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2920
26.5k
      Opc = AArch64::LDRQui;
2921
1
    else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2922
0
      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2923
0
      Opc = AArch64::LD1Twov1d;
2924
0
      Offset = false;
2925
1
    } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2926
1
      loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
2927
1
                               get(AArch64::LDPXi), DestReg, AArch64::sube64,
2928
1
                               AArch64::subo64, FI, MMO);
2929
1
      return;
2930
1
    }
2931
26.5k
    break;
2932
26.5k
  case 24:
2933
0
    if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2934
0
      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2935
0
      Opc = AArch64::LD1Threev1d;
2936
0
      Offset = false;
2937
0
    }
2938
0
    break;
2939
26.5k
  case 32:
2940
324
    if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2941
0
      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2942
0
      Opc = AArch64::LD1Fourv1d;
2943
0
      Offset = false;
2944
324
    } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2945
324
      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2946
324
      Opc = AArch64::LD1Twov2d;
2947
324
      Offset = false;
2948
324
    }
2949
324
    break;
2950
26.5k
  case 48:
2951
408
    if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2952
408
      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2953
408
      Opc = AArch64::LD1Threev2d;
2954
408
      Offset = false;
2955
408
    }
2956
408
    break;
2957
26.5k
  case 64:
2958
551
    if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2959
551
      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2960
551
      Opc = AArch64::LD1Fourv2d;
2961
551
      Offset = false;
2962
551
    }
2963
551
    break;
2964
157k
  }
2965
157k
  assert(Opc && "Unknown register class");
2966
157k
2967
157k
  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2968
157k
                                     .addReg(DestReg, getDefRegState(true))
2969
157k
                                     .addFrameIndex(FI);
2970
157k
  if (Offset)
2971
156k
    MI.addImm(0);
2972
157k
  MI.addMemOperand(MMO);
2973
157k
}
2974
2975
void llvm::emitFrameOffset(MachineBasicBlock &MBB,
2976
                           MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
2977
                           unsigned DestReg, unsigned SrcReg, int Offset,
2978
                           const TargetInstrInfo *TII,
2979
                           MachineInstr::MIFlag Flag, bool SetNZCV,
2980
553k
                           bool NeedsWinCFI, bool *HasWinCFI) {
2981
553k
  if (DestReg == SrcReg && 
Offset == 0253k
)
2982
129k
    return;
2983
424k
2984
424k
  assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2985
424k
         "SP increment/decrement not 16-byte aligned");
2986
424k
2987
424k
  bool isSub = Offset < 0;
2988
424k
  if (isSub)
2989
56.8k
    Offset = -Offset;
2990
424k
2991
424k
  // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2992
424k
  // scratch register.  If DestReg is a virtual register, use it as the
2993
424k
  // scratch register; otherwise, create a new virtual register (to be
2994
424k
  // replaced by the scavenger at the end of PEI).  That case can be optimized
2995
424k
  // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2996
424k
  // register can be loaded with offset%8 and the add/sub can use an extending
2997
424k
  // instruction with LSL#3.
2998
424k
  // Currently the function handles any offsets but generates a poor sequence
2999
424k
  // of code.
3000
424k
  //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
3001
424k
3002
424k
  unsigned Opc;
3003
424k
  if (SetNZCV)
3004
3
    Opc = isSub ? 
AArch64::SUBSXri0
: AArch64::ADDSXri;
3005
424k
  else
3006
424k
    Opc = isSub ? 
AArch64::SUBXri56.8k
:
AArch64::ADDXri367k
;
3007
424k
  const unsigned MaxEncoding = 0xfff;
3008
424k
  const unsigned ShiftSize = 12;
3009
424k
  const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
3010
440k
  while (((unsigned)Offset) >= (1 << ShiftSize)) {
3011
19.2k
    unsigned ThisVal;
3012
19.2k
    if (((unsigned)Offset) > MaxEncodableValue) {
3013
12
      ThisVal = MaxEncodableValue;
3014
19.2k
    } else {
3015
19.2k
      ThisVal = Offset & MaxEncodableValue;
3016
19.2k
    }
3017
19.2k
    assert((ThisVal >> ShiftSize) <= MaxEncoding &&
3018
19.2k
           "Encoding cannot handle value that big");
3019
19.2k
    BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
3020
19.2k
        .addReg(SrcReg)
3021
19.2k
        .addImm(ThisVal >> ShiftSize)
3022
19.2k
        .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
3023
19.2k
        .setMIFlag(Flag);
3024
19.2k
3025
19.2k
    if (NeedsWinCFI && 
SrcReg == AArch64::SP7
&&
DestReg == AArch64::SP7
) {
3026
7
      if (HasWinCFI)
3027
7
        *HasWinCFI = true;
3028
7
      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
3029
7
          .addImm(ThisVal)
3030
7
          .setMIFlag(Flag);
3031
7
    }
3032
19.2k
3033
19.2k
    SrcReg = DestReg;
3034
19.2k
    Offset -= ThisVal;
3035
19.2k
    if (Offset == 0)
3036
2.84k
      return;
3037
19.2k
  }
3038
424k
  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
3039
421k
      .addReg(SrcReg)
3040
421k
      .addImm(Offset)
3041
421k
      .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
3042
421k
      .setMIFlag(Flag);
3043
421k
3044
421k
  if (NeedsWinCFI) {
3045
128
    if ((DestReg == AArch64::FP && 
SrcReg == AArch64::SP31
) ||
3046
128
        
(97
SrcReg == AArch64::FP97
&&
DestReg == AArch64::SP9
)) {
3047
40
      if (HasWinCFI)
3048
31
        *HasWinCFI = true;
3049
40
      if (Offset == 0)
3050
15
        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).
3051
15
                setMIFlag(Flag);
3052
25
      else
3053
25
        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)).
3054
25
                addImm(Offset).setMIFlag(Flag);
3055
88
    } else if (DestReg == AArch64::SP) {
3056
85
      if (HasWinCFI)
3057
83
        *HasWinCFI = true;
3058
85
      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)).
3059
85
              addImm(Offset).setMIFlag(Flag);
3060
85
    }
3061
128
  }
3062
421k
}
3063
3064
MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
3065
    MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
3066
    MachineBasicBlock::iterator InsertPt, int FrameIndex,
3067
253k
    LiveIntervals *LIS, VirtRegMap *VRM) const {
3068
253k
  // This is a bit of a hack. Consider this instruction:
3069
253k
  //
3070
253k
  //   %0 = COPY %sp; GPR64all:%0
3071
253k
  //
3072
253k
  // We explicitly chose GPR64all for the virtual register so such a copy might
3073
253k
  // be eliminated by RegisterCoalescer. However, that may not be possible, and
3074
253k
  // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
3075
253k
  // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
3076
253k
  //
3077
253k
  // To prevent that, we are going to constrain the %0 register class here.
3078
253k
  //
3079
253k
  // <rdar://problem/11522048>
3080
253k
  //
3081
253k
  if (MI.isFullCopy()) {
3082
128k
    unsigned DstReg = MI.getOperand(0).getReg();
3083
128k
    unsigned SrcReg = MI.getOperand(1).getReg();
3084
128k
    if (SrcReg == AArch64::SP &&
3085
128k
        
TargetRegisterInfo::isVirtualRegister(DstReg)3
) {
3086
3
      MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
3087
3
      return nullptr;
3088
3
    }
3089
128k
    if (DstReg == AArch64::SP &&
3090
128k
        
TargetRegisterInfo::isVirtualRegister(SrcReg)3
) {
3091
3
      MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
3092
3
      return nullptr;
3093
3
    }
3094
253k
  }
3095
253k
3096
253k
  // Handle the case where a copy is being spilled or filled but the source
3097
253k
  // and destination register class don't match.  For example:
3098
253k
  //
3099
253k
  //   %0 = COPY %xzr; GPR64common:%0
3100
253k
  //
3101
253k
  // In this case we can still safely fold away the COPY and generate the
3102
253k
  // following spill code:
3103
253k
  //
3104
253k
  //   STRXui %xzr, %stack.0
3105
253k
  //
3106
253k
  // This also eliminates spilled cross register class COPYs (e.g. between x and
3107
253k
  // d regs) of the same size.  For example:
3108
253k
  //
3109
253k
  //   %0 = COPY %1; GPR64:%0, FPR64:%1
3110
253k
  //
3111
253k
  // will be filled as
3112
253k
  //
3113
253k
  //   LDRDui %0, fi<#0>
3114
253k
  //
3115
253k
  // instead of
3116
253k
  //
3117
253k
  //   LDRXui %Temp, fi<#0>
3118
253k
  //   %0 = FMOV %Temp
3119
253k
  //
3120
253k
  if (MI.isCopy() && 
Ops.size() == 1138k
&&
3121
253k
      // Make sure we're only folding the explicit COPY defs/uses.
3122
253k
      
(138k
Ops[0] == 0138k
||
Ops[0] == 181.0k
)) {
3123
138k
    bool IsSpill = Ops[0] == 0;
3124
138k
    bool IsFill = !IsSpill;
3125
138k
    const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
3126
138k
    const MachineRegisterInfo &MRI = MF.getRegInfo();
3127
138k
    MachineBasicBlock &MBB = *MI.getParent();
3128
138k
    const MachineOperand &DstMO = MI.getOperand(0);
3129
138k
    const MachineOperand &SrcMO = MI.getOperand(1);
3130
138k
    unsigned DstReg = DstMO.getReg();
3131
138k
    unsigned SrcReg = SrcMO.getReg();
3132
138k
    // This is slightly expensive to compute for physical regs since
3133
138k
    // getMinimalPhysRegClass is slow.
3134
138k
    auto getRegClass = [&](unsigned Reg) {
3135
128k
      return TargetRegisterInfo::isVirtualRegister(Reg)
3136
128k
                 ? 
MRI.getRegClass(Reg)94.4k
3137
128k
                 : 
TRI.getMinimalPhysRegClass(Reg)34.4k
;
3138
128k
    };
3139
138k
3140
138k
    if (DstMO.getSubReg() == 0 && 
SrcMO.getSubReg() == 0131k
) {
3141
128k
      assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
3142
128k
                 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
3143
128k
             "Mismatched register size in non subreg COPY");
3144
128k
      if (IsSpill)
3145
54.1k
        storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
3146
54.1k
                            getRegClass(SrcReg), &TRI);
3147
74.7k
      else
3148
74.7k
        loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
3149
74.7k
                             getRegClass(DstReg), &TRI);
3150
128k
      return &*--InsertPt;
3151
128k
    }
3152
9.91k
3153
9.91k
    // Handle cases like spilling def of:
3154
9.91k
    //
3155
9.91k
    //   %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
3156
9.91k
    //
3157
9.91k
    // where the physical register source can be widened and stored to the full
3158
9.91k
    // virtual reg destination stack slot, in this case producing:
3159
9.91k
    //
3160
9.91k
    //   STRXui %xzr, %stack.0
3161
9.91k
    //
3162
9.91k
    if (IsSpill && 
DstMO.isUndef()3.64k
&&
3163
9.91k
        
TargetRegisterInfo::isPhysicalRegister(SrcReg)3.46k
) {
3164
3.21k
      assert(SrcMO.getSubReg() == 0 &&
3165
3.21k
             "Unexpected subreg on physical register");
3166
3.21k
      const TargetRegisterClass *SpillRC;
3167
3.21k
      unsigned SpillSubreg;
3168
3.21k
      switch (DstMO.getSubReg()) {
3169
3.21k
      default:
3170
0
        SpillRC = nullptr;
3171
0
        break;
3172
3.21k
      case AArch64::sub_32:
3173
3.06k
      case AArch64::ssub:
3174
3.06k
        if (AArch64::GPR32RegClass.contains(SrcReg)) {
3175
2.92k
          SpillRC = &AArch64::GPR64RegClass;
3176
2.92k
          SpillSubreg = AArch64::sub_32;
3177
2.92k
        } else 
if (140
AArch64::FPR32RegClass.contains(SrcReg)140
) {
3178
140
          SpillRC = &AArch64::FPR64RegClass;
3179
140
          SpillSubreg = AArch64::ssub;
3180
140
        } else
3181
0
          SpillRC = nullptr;
3182
3.06k
        break;
3183
3.06k
      case AArch64::dsub:
3184
147
        if (AArch64::FPR64RegClass.contains(SrcReg)) {
3185
147
          SpillRC = &AArch64::FPR128RegClass;
3186
147
          SpillSubreg = AArch64::dsub;
3187
147
        } else
3188
0
          SpillRC = nullptr;
3189
147
        break;
3190
3.21k
      }
3191
3.21k
3192
3.21k
      if (SpillRC)
3193
3.21k
        if (unsigned WidenedSrcReg =
3194
3.21k
                TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
3195
3.21k
          storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
3196
3.21k
                              FrameIndex, SpillRC, &TRI);
3197
3.21k
          return &*--InsertPt;
3198
3.21k
        }
3199
6.70k
    }
3200
6.70k
3201
6.70k
    // Handle cases like filling use of:
3202
6.70k
    //
3203
6.70k
    //   %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
3204
6.70k
    //
3205
6.70k
    // where we can load the full virtual reg source stack slot, into the subreg
3206
6.70k
    // destination, in this case producing:
3207
6.70k
    //
3208
6.70k
    //   LDRWui %0:sub_32<def,read-undef>, %stack.0
3209
6.70k
    //
3210
6.70k
    if (IsFill && 
SrcMO.getSubReg() == 06.27k
&&
DstMO.isUndef()3.94k
) {
3211
3.94k
      const TargetRegisterClass *FillRC;
3212
3.94k
      switch (DstMO.getSubReg()) {
3213
3.94k
      default:
3214
2
        FillRC = nullptr;
3215
2
        break;
3216
3.94k
      case AArch64::sub_32:
3217
74
        FillRC = &AArch64::GPR32RegClass;
3218
74
        break;
3219
3.94k
      case AArch64::ssub:
3220
3.86k
        FillRC = &AArch64::FPR32RegClass;
3221
3.86k
        break;
3222
3.94k
      case AArch64::dsub:
3223
0
        FillRC = &AArch64::FPR64RegClass;
3224
0
        break;
3225
3.94k
      }
3226
3.94k
3227
3.94k
      if (FillRC) {
3228
3.94k
        assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
3229
3.94k
                   TRI.getRegSizeInBits(*FillRC) &&
3230
3.94k
               "Mismatched regclass size on folded subreg COPY");
3231
3.94k
        loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
3232
3.94k
        MachineInstr &LoadMI = *--InsertPt;
3233
3.94k
        MachineOperand &LoadDst = LoadMI.getOperand(0);
3234
3.94k
        assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
3235
3.94k
        LoadDst.setSubReg(DstMO.getSubReg());
3236
3.94k
        LoadDst.setIsUndef();
3237
3.94k
        return &LoadMI;
3238
3.94k
      }
3239
117k
    }
3240
6.70k
  }
3241
117k
3242
117k
  // Cannot fold.
3243
117k
  return nullptr;
3244
117k
}
3245
3246
int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
3247
                                    bool *OutUseUnscaledOp,
3248
                                    unsigned *OutUnscaledOp,
3249
1.45M
                                    int *EmittableOffset) {
3250
1.45M
  // Set output values in case of early exit.
3251
1.45M
  if (EmittableOffset)
3252
483k
    *EmittableOffset = 0;
3253
1.45M
  if (OutUseUnscaledOp)
3254
483k
    *OutUseUnscaledOp = false;
3255
1.45M
  if (OutUnscaledOp)
3256
483k
    *OutUnscaledOp = 0;
3257
1.45M
3258
1.45M
  // Exit early for structured vector spills/fills as they can't take an
3259
1.45M
  // immediate offset.
3260
1.45M
  switch (MI.getOpcode()) {
3261
1.45M
  default:
3262
1.45M
    break;
3263
1.45M
  case AArch64::LD1Twov2d:
3264
1.79k
  case AArch64::LD1Threev2d:
3265
1.79k
  case AArch64::LD1Fourv2d:
3266
1.79k
  case AArch64::LD1Twov1d:
3267
1.79k
  case AArch64::LD1Threev1d:
3268
1.79k
  case AArch64::LD1Fourv1d:
3269
1.79k
  case AArch64::ST1Twov2d:
3270
1.79k
  case AArch64::ST1Threev2d:
3271
1.79k
  case AArch64::ST1Fourv2d:
3272
1.79k
  case AArch64::ST1Twov1d:
3273
1.79k
  case AArch64::ST1Threev1d:
3274
1.79k
  case AArch64::ST1Fourv1d:
3275
1.79k
  case AArch64::IRG:
3276
1.79k
  case AArch64::IRGstack:
3277
1.79k
    return AArch64FrameOffsetCannotUpdate;
3278
1.45M
  }
3279
1.45M
3280
1.45M
  // Get the min/max offset and the scale.
3281
1.45M
  unsigned Scale, Width;
3282
1.45M
  int64_t MinOff, MaxOff;
3283
1.45M
  if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), Scale, Width, MinOff,
3284
1.45M
                                      MaxOff))
3285
1.45M
    
llvm_unreachable0
("unhandled opcode in isAArch64FrameOffsetLegal");
3286
1.45M
3287
1.45M
  // Construct the complete offset.
3288
1.45M
  const MachineOperand &ImmOpnd =
3289
1.45M
      MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
3290
1.45M
  Offset += ImmOpnd.getImm() * Scale;
3291
1.45M
3292
1.45M
  // If the offset doesn't match the scale, we rewrite the instruction to
3293
1.45M
  // use the unscaled instruction instead. Likewise, if we have a negative
3294
1.45M
  // offset and there is an unscaled op to use.
3295
1.45M
  Optional<unsigned> UnscaledOp =
3296
1.45M
      AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode());
3297
1.45M
  bool useUnscaledOp = UnscaledOp && 
(1.44M
Offset % Scale1.44M
||
Offset < 01.43M
);
3298
1.45M
  if (useUnscaledOp &&
3299
1.45M
      
!AArch64InstrInfo::getMemOpInfo(*UnscaledOp, Scale, Width, MinOff, MaxOff)239k
)
3300
1.45M
    
llvm_unreachable0
("unhandled opcode in isAArch64FrameOffsetLegal");
3301
1.45M
3302
1.45M
  int64_t Remainder = Offset % Scale;
3303
1.45M
  assert(!(Remainder && useUnscaledOp) &&
3304
1.45M
         "Cannot have remainder when using unscaled op");
3305
1.45M
3306
1.45M
  assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
3307
1.45M
  int NewOffset = Offset / Scale;
3308
1.45M
  if (MinOff <= NewOffset && 
NewOffset <= MaxOff1.25M
)
3309
1.24M
    Offset = Remainder;
3310
204k
  else {
3311
204k
    NewOffset = NewOffset < 0 ? 
MinOff200k
:
MaxOff4.87k
;
3312
204k
    Offset = Offset - NewOffset * Scale + Remainder;
3313
204k
  }
3314
1.45M
3315
1.45M
  if (EmittableOffset)
3316
482k
    *EmittableOffset = NewOffset;
3317
1.45M
  if (OutUseUnscaledOp)
3318
482k
    *OutUseUnscaledOp = useUnscaledOp;
3319
1.45M
  if (OutUnscaledOp && 
UnscaledOp482k
)
3320
481k
    *OutUnscaledOp = *UnscaledOp;
3321
1.45M
3322
1.45M
  return AArch64FrameOffsetCanUpdate |
3323
1.45M
         (Offset == 0 ? 
AArch64FrameOffsetIsLegal1.24M
:
0204k
);
3324
1.45M
}
3325
3326
bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3327
                                    unsigned FrameReg, int &Offset,
3328
596k
                                    const AArch64InstrInfo *TII) {
3329
596k
  unsigned Opcode = MI.getOpcode();
3330
596k
  unsigned ImmIdx = FrameRegIdx + 1;
3331
596k
3332
596k
  if (Opcode == AArch64::ADDSXri || 
Opcode == AArch64::ADDXri596k
) {
3333
112k
    Offset += MI.getOperand(ImmIdx).getImm();
3334
112k
    emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3335
112k
                    MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3336
112k
                    MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3337
112k
    MI.eraseFromParent();
3338
112k
    Offset = 0;
3339
112k
    return true;
3340
112k
  }
3341
483k
3342
483k
  int NewOffset;
3343
483k
  unsigned UnscaledOp;
3344
483k
  bool UseUnscaledOp;
3345
483k
  int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3346
483k
                                         &UnscaledOp, &NewOffset);
3347
483k
  if (Status & AArch64FrameOffsetCanUpdate) {
3348
482k
    if (Status & AArch64FrameOffsetIsLegal)
3349
480k
      // Replace the FrameIndex with FrameReg.
3350
480k
      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3351
482k
    if (UseUnscaledOp)
3352
35.8k
      MI.setDesc(TII->get(UnscaledOp));
3353
482k
3354
482k
    MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3355
482k
    return Offset == 0;
3356
482k
  }
3357
1.54k
3358
1.54k
  return false;
3359
1.54k
}
3360
3361
11
void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3362
11
  NopInst.setOpcode(AArch64::HINT);
3363
11
  NopInst.addOperand(MCOperand::createImm(0));
3364
11
}
3365
3366
// AArch64 supports MachineCombiner.
3367
257k
bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3368
3369
// True when Opc sets flag
3370
1.51M
static bool isCombineInstrSettingFlag(unsigned Opc) {
3371
1.51M
  switch (Opc) {
3372
1.51M
  case AArch64::ADDSWrr:
3373
710k
  case AArch64::ADDSWri:
3374
710k
  case AArch64::ADDSXrr:
3375
710k
  case AArch64::ADDSXri:
3376
710k
  case AArch64::SUBSWrr:
3377
710k
  case AArch64::SUBSXrr:
3378
710k
  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3379
710k
  case AArch64::SUBSWri:
3380
710k
  case AArch64::SUBSXri:
3381
710k
    return true;
3382
809k
  default:
3383
809k
    break;
3384
809k
  }
3385
809k
  return false;
3386
809k
}
3387
3388
// 32b Opcodes that can be combined with a MUL
3389
19.4M
static bool isCombineInstrCandidate32(unsigned Opc) {
3390
19.4M
  switch (Opc) {
3391
19.4M
  case AArch64::ADDWrr:
3392
558k
  case AArch64::ADDWri:
3393
558k
  case AArch64::SUBWrr:
3394
558k
  case AArch64::ADDSWrr:
3395
558k
  case AArch64::ADDSWri:
3396
558k
  case AArch64::SUBSWrr:
3397
558k
  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3398
558k
  case AArch64::SUBWri:
3399
558k
  case AArch64::SUBSWri:
3400
558k
    return true;
3401
18.8M
  default:
3402
18.8M
    break;
3403
18.8M
  }
3404
18.8M
  return false;
3405
18.8M
}
3406
3407
// 64b Opcodes that can be combined with a MUL
3408
18.8M
static bool isCombineInstrCandidate64(unsigned Opc) {
3409
18.8M
  switch (Opc) {
3410
18.8M
  case AArch64::ADDXrr:
3411
960k
  case AArch64::ADDXri:
3412
960k
  case AArch64::SUBXrr:
3413
960k
  case AArch64::ADDSXrr:
3414
960k
  case AArch64::ADDSXri:
3415
960k
  case AArch64::SUBSXrr:
3416
960k
  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3417
960k
  case AArch64::SUBXri:
3418
960k
  case AArch64::SUBSXri:
3419
960k
    return true;
3420
17.8M
  default:
3421
17.8M
    break;
3422
17.8M
  }
3423
17.8M
  return false;
3424
17.8M
}
3425
3426
// FP Opcodes that can be combined with a FMUL
3427
19.3M
static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3428
19.3M
  switch (Inst.getOpcode()) {
3429
19.3M
  default:
3430
19.2M
    break;
3431
19.3M
  case AArch64::FADDSrr:
3432
63.1k
  case AArch64::FADDDrr:
3433
63.1k
  case AArch64::FADDv2f32:
3434
63.1k
  case AArch64::FADDv2f64:
3435
63.1k
  case AArch64::FADDv4f32:
3436
63.1k
  case AArch64::FSUBSrr:
3437
63.1k
  case AArch64::FSUBDrr:
3438
63.1k
  case AArch64::FSUBv2f32:
3439
63.1k
  case AArch64::FSUBv2f64:
3440
63.1k
  case AArch64::FSUBv4f32:
3441
63.1k
    TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3442
63.1k
    return (Options.UnsafeFPMath ||
3443
63.1k
            
Options.AllowFPOpFusion == FPOpFusion::Fast62.8k
);
3444
19.2M
  }
3445
19.2M
  return false;
3446
19.2M
}
3447
3448
// Opcodes that can be combined with a MUL
3449
19.4M
static bool isCombineInstrCandidate(unsigned Opc) {
3450
19.4M
  return (isCombineInstrCandidate32(Opc) || 
isCombineInstrCandidate64(Opc)18.8M
);
3451
19.4M
}
3452
3453
//
3454
// Utility routine that checks if \param MO is defined by an
3455
// \param CombineOpc instruction in the basic block \param MBB
3456
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
3457
                       unsigned CombineOpc, unsigned ZeroReg = 0,
3458
1.42M
                       bool CheckZeroReg = false) {
3459
1.42M
  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3460
1.42M
  MachineInstr *MI = nullptr;
3461
1.42M
3462
1.42M
  if (MO.isReg() && 
TargetRegisterInfo::isVirtualRegister(MO.getReg())1.35M
)
3463
1.35M
    MI = MRI.getUniqueVRegDef(MO.getReg());
3464
1.42M
  // And it needs to be in the trace (otherwise, it won't have a depth).
3465
1.42M
  if (!MI || 
MI->getParent() != &MBB1.35M
||
(unsigned)MI->getOpcode() != CombineOpc970k
)
3466
1.29M
    return false;
3467
126k
  // Must only used by the user we combine with.
3468
126k
  if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3469
27.4k
    return false;
3470
99.0k
3471
99.0k
  if (CheckZeroReg) {
3472
98.9k
    assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3473
98.9k
           MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3474
98.9k
           MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3475
98.9k
    // The third input reg must be zero.
3476
98.9k
    if (MI->getOperand(3).getReg() != ZeroReg)
3477
13.5k
      return false;
3478
85.5k
  }
3479
85.5k
3480
85.5k
  return true;
3481
85.5k
}
3482
3483
//
3484
// Is \param MO defined by an integer multiply and can be combined?
3485
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3486
1.42M
                              unsigned MulOpc, unsigned ZeroReg) {
3487
1.42M
  return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3488
1.42M
}
3489
3490
//
3491
// Is \param MO defined by a floating-point multiply and can be combined?
3492
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3493
1.21k
                               unsigned MulOpc) {
3494
1.21k
  return canCombine(MBB, MO, MulOpc);
3495
1.21k
}
3496
3497
// TODO: There are many more machine instruction opcodes to match:
3498
//       1. Other data types (integer, vectors)
3499
//       2. Other math / logic operations (xor, or)
3500
//       3. Other forms of the same operation (intrinsics and other variants)
3501
bool AArch64InstrInfo::isAssociativeAndCommutative(
3502
19.3M
    const MachineInstr &Inst) const {
3503
19.3M
  switch (Inst.getOpcode()) {
3504
19.3M
  case AArch64::FADDDrr:
3505
100k
  case AArch64::FADDSrr:
3506
100k
  case AArch64::FADDv2f32:
3507
100k
  case AArch64::FADDv2f64:
3508
100k
  case AArch64::FADDv4f32:
3509
100k
  case AArch64::FMULDrr:
3510
100k
  case AArch64::FMULSrr:
3511
100k
  case AArch64::FMULX32:
3512
100k
  case AArch64::FMULX64:
3513
100k
  case AArch64::FMULXv2f32:
3514
100k
  case AArch64::FMULXv2f64:
3515
100k
  case AArch64::FMULXv4f32:
3516
100k
  case AArch64::FMULv2f32:
3517
100k
  case AArch64::FMULv2f64:
3518
100k
  case AArch64::FMULv4f32:
3519
100k
    return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3520
19.2M
  default:
3521
19.2M
    return false;
3522
19.3M
  }
3523
19.3M
}
3524
3525
/// Find instructions that can be turned into madd.
3526
static bool getMaddPatterns(MachineInstr &Root,
3527
19.4M
                            SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3528
19.4M
  unsigned Opc = Root.getOpcode();
3529
19.4M
  MachineBasicBlock &MBB = *Root.getParent();
3530
19.4M
  bool Found = false;
3531
19.4M
3532
19.4M
  if (!isCombineInstrCandidate(Opc))
3533
17.8M
    return false;
3534
1.51M
  if (isCombineInstrSettingFlag(Opc)) {
3535
710k
    int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3536
710k
    // When NZCV is live bail out.
3537
710k
    if (Cmp_NZCV == -1)
3538
652k
      return false;
3539
58.3k
    unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3540
58.3k
    // When opcode can't change bail out.
3541
58.3k
    // CHECKME: do we miss any cases for opcode conversion?
3542
58.3k
    if (NewOpc == Opc)
3543
0
      return false;
3544
58.3k
    Opc = NewOpc;
3545
58.3k
  }
3546
1.51M
3547
1.51M
  switch (Opc) {
3548
867k
  default:
3549
0
    break;
3550
867k
  case AArch64::ADDWrr:
3551
90.5k
    assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3552
90.5k
           "ADDWrr does not have register operands");
3553
90.5k
    if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3554
90.5k
                          AArch64::WZR)) {
3555
5.18k
      Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
3556
5.18k
      Found = true;
3557
5.18k
    }
3558
90.5k
    if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3559
90.5k
                          AArch64::WZR)) {
3560
6.28k
      Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
3561
6.28k
      Found = true;
3562
6.28k
    }
3563
90.5k
    break;
3564
867k
  case AArch64::ADDXrr:
3565
440k
    if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3566
440k
                          AArch64::XZR)) {
3567
2.29k
      Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
3568
2.29k
      Found = true;
3569
2.29k
    }
3570
440k
    if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3571
440k
                          AArch64::XZR)) {
3572
69.1k
      Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
3573
69.1k
      Found = true;
3574
69.1k
    }
3575
440k
    break;
3576
867k
  case AArch64::SUBWrr:
3577
11.9k
    if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3578
11.9k
                          AArch64::WZR)) {
3579
23
      Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
3580
23
      Found = true;
3581
23
    }
3582
11.9k
    if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3583
11.9k
                          AArch64::WZR)) {
3584
1.79k
      Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
3585
1.79k
      Found = true;
3586
1.79k
    }
3587
11.9k
    break;
3588
867k
  case AArch64::SUBXrr:
3589
13.7k
    if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3590
13.7k
                          AArch64::XZR)) {
3591
8
      Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
3592
8
      Found = true;
3593
8
    }
3594
13.7k
    if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3595
13.7k
                          AArch64::XZR)) {
3596
172
      Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
3597
172
      Found = true;
3598
172
    }
3599
13.7k
    break;
3600
867k
  case AArch64::ADDWri:
3601
69.9k
    if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3602
69.9k
                          AArch64::WZR)) {
3603
286
      Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
3604
286
      Found = true;
3605
286
    }
3606
69.9k
    break;
3607
867k
  case AArch64::ADDXri:
3608
208k
    if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3609
208k
                          AArch64::XZR)) {
3610
159
      Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
3611
159
      Found = true;
3612
159
    }
3613
208k
    break;
3614
867k
  case AArch64::SUBWri:
3615
11.1k
    if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3616
11.1k
                          AArch64::WZR)) {
3617
17
      Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
3618
17
      Found = true;
3619
17
    }
3620
11.1k
    break;
3621
867k
  case AArch64::SUBXri:
3622
21.6k
    if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3623
21.6k
                          AArch64::XZR)) {
3624
2
      Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
3625
2
      Found = true;
3626
2
    }
3627
21.6k
    break;
3628
867k
  }
3629
867k
  return Found;
3630
867k
}
3631
/// Floating-Point Support
3632
3633
/// Find instructions that can be turned into madd.
3634
static bool getFMAPatterns(MachineInstr &Root,
3635
19.3M
                           SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3636
19.3M
3637
19.3M
  if (!isCombineInstrCandidateFP(Root))
3638
19.3M
    return false;
3639
317
3640
317
  MachineBasicBlock &MBB = *Root.getParent();
3641
317
  bool Found = false;
3642
317
3643
317
  switch (Root.getOpcode()) {
3644
317
  default:
3645
0
    assert(false && "Unsupported FP instruction in combiner\n");
3646
0
    break;
3647
317
  case AArch64::FADDSrr:
3648
126
    assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3649
126
           "FADDWrr does not have register operands");
3650
126
    if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3651
27
      Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3652
27
      Found = true;
3653
99
    } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3654
99
                                  AArch64::FMULv1i32_indexed)) {
3655
1
      Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3656
1
      Found = true;
3657
1
    }
3658
126
    if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3659
8
      Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3660
8
      Found = true;
3661
118
    } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3662
118
                                  AArch64::FMULv1i32_indexed)) {
3663
0
      Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3664
0
      Found = true;
3665
0
    }
3666
126
    break;
3667
317
  case AArch64::FADDDrr:
3668
32
    if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3669
2
      Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3670
2
      Found = true;
3671
30
    } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3672
30
                                  AArch64::FMULv1i64_indexed)) {
3673
1
      Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3674
1
      Found = true;
3675
1
    }
3676
32
    if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3677
2
      Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3678
2
      Found = true;
3679
30
    } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3680
30
                                  AArch64::FMULv1i64_indexed)) {
3681
0
      Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3682
0
      Found = true;
3683
0
    }
3684
32
    break;
3685
317
  case AArch64::FADDv2f32:
3686
5
    if (canCombineWithFMUL(MBB, Root.getOperand(1),
3687
5
                           AArch64::FMULv2i32_indexed)) {
3688
1
      Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3689
1
      Found = true;
3690
4
    } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3691
4
                                  AArch64::FMULv2f32)) {
3692
1
      Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3693
1
      Found = true;
3694
1
    }
3695
5
    if (canCombineWithFMUL(MBB, Root.getOperand(2),
3696
5
                           AArch64::FMULv2i32_indexed)) {
3697
0
      Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3698
0
      Found = true;
3699
5
    } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3700
5
                                  AArch64::FMULv2f32)) {
3701
0
      Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3702
0
      Found = true;
3703
0
    }
3704
5
    break;
3705
317
  case AArch64::FADDv2f64:
3706
10
    if (canCombineWithFMUL(MBB, Root.getOperand(1),
3707
10
                           AArch64::FMULv2i64_indexed)) {
3708
1
      Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3709
1
      Found = true;
3710
9
    } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3711
9
                                  AArch64::FMULv2f64)) {
3712
1
      Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3713
1
      Found = true;
3714
1
    }
3715
10
    if (canCombineWithFMUL(MBB, Root.getOperand(2),
3716
10
                           AArch64::FMULv2i64_indexed)) {
3717
0
      Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3718
0
      Found = true;
3719
10
    } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3720
10
                                  AArch64::FMULv2f64)) {
3721
0
      Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3722
0
      Found = true;
3723
0
    }
3724
10
    break;
3725
317
  case AArch64::FADDv4f32:
3726
61
    if (canCombineWithFMUL(MBB, Root.getOperand(1),
3727
61
                           AArch64::FMULv4i32_indexed)) {
3728
1
      Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3729
1
      Found = true;
3730
60
    } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3731
60
                                  AArch64::FMULv4f32)) {
3732
3
      Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3733
3
      Found = true;
3734
3
    }
3735
61
    if (canCombineWithFMUL(MBB, Root.getOperand(2),
3736
61
                           AArch64::FMULv4i32_indexed)) {
3737
0
      Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3738
0
      Found = true;
3739
61
    } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3740
61
                                  AArch64::FMULv4f32)) {
3741
2
      Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3742
2
      Found = true;
3743
2
    }
3744
61
    break;
3745
317
3746
317
  case AArch64::FSUBSrr:
3747
27
    if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3748
1
      Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3749
1
      Found = true;
3750
1
    }
3751
27
    if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3752
6
      Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3753
6
      Found = true;
3754
21
    } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3755
21
                                  AArch64::FMULv1i32_indexed)) {
3756
2
      Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3757
2
      Found = true;
3758
2
    }
3759
27
    if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
3760
2
      Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
3761
2
      Found = true;
3762
2
    }
3763
27
    break;
3764
317
  case AArch64::FSUBDrr:
3765
4
    if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3766
0
      Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3767
0
      Found = true;
3768
0
    }
3769
4
    if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3770
0
      Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3771
0
      Found = true;
3772
4
    } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3773
4
                                  AArch64::FMULv1i64_indexed)) {
3774
2
      Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3775
2
      Found = true;
3776
2
    }
3777
4
    if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
3778
2
      Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
3779
2
      Found = true;
3780
2
    }
3781
4
    break;
3782
317
  case AArch64::FSUBv2f32:
3783
14
    if (canCombineWithFMUL(MBB, Root.getOperand(2),
3784
14
                           AArch64::FMULv2i32_indexed)) {
3785
2
      Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3786
2
      Found = true;
3787
12
    } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3788
12
                                  AArch64::FMULv2f32)) {
3789
6
      Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3790
6
      Found = true;
3791
6
    }
3792
14
    if (canCombineWithFMUL(MBB, Root.getOperand(1),
3793
14
                           AArch64::FMULv2i32_indexed)) {
3794
0
      Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
3795
0
      Found = true;
3796
14
    } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3797
14
                                  AArch64::FMULv2f32)) {
3798
8
      Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
3799
8
      Found = true;
3800
8
    }
3801
14
    break;
3802
317
  case AArch64::FSUBv2f64:
3803
16
    if (canCombineWithFMUL(MBB, Root.getOperand(2),
3804
16
                           AArch64::FMULv2i64_indexed)) {
3805
2
      Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3806
2
      Found = true;
3807
14
    } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3808
14
                                  AArch64::FMULv2f64)) {
3809
6
      Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3810
6
      Found = true;
3811
6
    }
3812
16
    if (canCombineWithFMUL(MBB, Root.getOperand(1),
3813
16
                           AArch64::FMULv2i64_indexed)) {
3814
0
      Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
3815
0
      Found = true;
3816
16
    } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3817
16
                                  AArch64::FMULv2f64)) {
3818
8
      Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
3819
8
      Found = true;
3820
8
    }
3821
16
    break;
3822
317
  case AArch64::FSUBv4f32:
3823
22
    if (canCombineWithFMUL(MBB, Root.getOperand(2),
3824
22
                           AArch64::FMULv4i32_indexed)) {
3825
2
      Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3826
2
      Found = true;
3827
20
    } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3828
20
                                  AArch64::FMULv4f32)) {
3829
10
      Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3830
10
      Found = true;
3831
10
    }
3832
22
    if (canCombineWithFMUL(MBB, Root.getOperand(1),
3833
22
                           AArch64::FMULv4i32_indexed)) {
3834
0
      Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
3835
0
      Found = true;
3836
22
    } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3837
22
                                  AArch64::FMULv4f32)) {
3838
10
      Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
3839
10
      Found = true;
3840
10
    }
3841
22
    break;
3842
317
  }
3843
317
  return Found;
3844
317
}
3845
3846
/// Return true when a code sequence can improve throughput. It
3847
/// should be called only for instructions in loops.
3848
/// \param Pattern - combiner pattern
3849
bool AArch64InstrInfo::isThroughputPattern(
3850
45.0k
    MachineCombinerPattern Pattern) const {
3851
45.0k
  switch (Pattern) {
3852
45.0k
  default:
3853
44.9k
    break;
3854
45.0k
  case MachineCombinerPattern::FMULADDS_OP1:
3855
66
  case MachineCombinerPattern::FMULADDS_OP2:
3856
66
  case MachineCombinerPattern::FMULSUBS_OP1:
3857
66
  case MachineCombinerPattern::FMULSUBS_OP2:
3858
66
  case MachineCombinerPattern::FMULADDD_OP1:
3859
66
  case MachineCombinerPattern::FMULADDD_OP2:
3860
66
  case MachineCombinerPattern::FMULSUBD_OP1:
3861
66
  case MachineCombinerPattern::FMULSUBD_OP2:
3862
66
  case MachineCombinerPattern::FNMULSUBS_OP1:
3863
66
  case MachineCombinerPattern::FNMULSUBD_OP1:
3864
66
  case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3865
66
  case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3866
66
  case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3867
66
  case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3868
66
  case MachineCombinerPattern::FMLAv2f32_OP2:
3869
66
  case MachineCombinerPattern::FMLAv2f32_OP1:
3870
66
  case MachineCombinerPattern::FMLAv2f64_OP1:
3871
66
  case MachineCombinerPattern::FMLAv2f64_OP2:
3872
66
  case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3873
66
  case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3874
66
  case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3875
66
  case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3876
66
  case MachineCombinerPattern::FMLAv4f32_OP1:
3877
66
  case MachineCombinerPattern::FMLAv4f32_OP2:
3878
66
  case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3879
66
  case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3880
66
  case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3881
66
  case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3882
66
  case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3883
66
  case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3884
66
  case MachineCombinerPattern::FMLSv2f32_OP2:
3885
66
  case MachineCombinerPattern::FMLSv2f64_OP2:
3886
66
  case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3887
66
  case MachineCombinerPattern::FMLSv4f32_OP2:
3888
66
    return true;
3889
44.9k
  } // end switch (Pattern)
3890
44.9k
  return false;
3891
44.9k
}
3892
/// Return true when there is potentially a faster code sequence for an
3893
/// instruction chain ending in \p Root. All potential patterns are listed in
3894
/// the \p Pattern vector. Pattern should be sorted in priority order since the
3895
/// pattern evaluator stops checking as soon as it finds a faster sequence.
3896
3897
bool AArch64InstrInfo::getMachineCombinerPatterns(
3898
    MachineInstr &Root,
3899
19.4M
    SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
3900
19.4M
  // Integer patterns
3901
19.4M
  if (getMaddPatterns(Root, Patterns))
3902
83.6k
    return true;
3903
19.3M
  // Floating point patterns
3904
19.3M
  if (getFMAPatterns(Root, Patterns))
3905
97
    return true;
3906
19.3M
3907
19.3M
  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3908
19.3M
}
3909
3910
enum class FMAInstKind { Default, Indexed, Accumulator };
3911
/// genFusedMultiply - Generate fused multiply instructions.
3912
/// This function supports both integer and floating point instructions.
3913
/// A typical example:
3914
///  F|MUL I=A,B,0
3915
///  F|ADD R,I,C
3916
///  ==> F|MADD R,A,B,C
3917
/// \param MF Containing MachineFunction
3918
/// \param MRI Register information
3919
/// \param TII Target information
3920
/// \param Root is the F|ADD instruction
3921
/// \param [out] InsInstrs is a vector of machine instructions and will
3922
/// contain the generated madd instruction
3923
/// \param IdxMulOpd is index of operand in Root that is the result of
3924
/// the F|MUL. In the example above IdxMulOpd is 1.
3925
/// \param MaddOpc the opcode fo the f|madd instruction
3926
/// \param RC Register class of operands
3927
/// \param kind of fma instruction (addressing mode) to be generated
3928
/// \param ReplacedAddend is the result register from the instruction
3929
/// replacing the non-combined operand, if any.
3930
static MachineInstr *
3931
genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
3932
                 const TargetInstrInfo *TII, MachineInstr &Root,
3933
                 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3934
                 unsigned MaddOpc, const TargetRegisterClass *RC,
3935
                 FMAInstKind kind = FMAInstKind::Default,
3936
83.2k
                 const unsigned *ReplacedAddend = nullptr) {
3937
83.2k
  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3938
83.2k
3939
83.2k
  unsigned IdxOtherOpd = IdxMulOpd == 1 ? 
27.55k
:
175.7k
;
3940
83.2k
  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3941
83.2k
  unsigned ResultReg = Root.getOperand(0).getReg();
3942
83.2k
  unsigned SrcReg0 = MUL->getOperand(1).getReg();
3943
83.2k
  bool Src0IsKill = MUL->getOperand(1).isKill();
3944
83.2k
  unsigned SrcReg1 = MUL->getOperand(2).getReg();
3945
83.2k
  bool Src1IsKill = MUL->getOperand(2).isKill();
3946
83.2k
3947
83.2k
  unsigned SrcReg2;
3948
83.2k
  bool Src2IsKill;
3949
83.2k
  if (ReplacedAddend) {
3950
36
    // If we just generated a new addend, we must be it's only use.
3951
36
    SrcReg2 = *ReplacedAddend;
3952
36
    Src2IsKill = true;
3953
83.2k
  } else {
3954
83.2k
    SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3955
83.2k
    Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3956
83.2k
  }
3957
83.2k
3958
83.2k
  if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3959
83.2k
    MRI.constrainRegClass(ResultReg, RC);
3960
83.2k
  if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3961
83.2k
    MRI.constrainRegClass(SrcReg0, RC);
3962
83.2k
  if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3963
83.2k
    MRI.constrainRegClass(SrcReg1, RC);
3964
83.2k
  if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
3965
83.2k
    MRI.constrainRegClass(SrcReg2, RC);
3966
83.2k
3967
83.2k
  MachineInstrBuilder MIB;
3968
83.2k
  if (kind == FMAInstKind::Default)
3969
83.1k
    MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3970
83.1k
              .addReg(SrcReg0, getKillRegState(Src0IsKill))
3971
83.1k
              .addReg(SrcReg1, getKillRegState(Src1IsKill))
3972
83.1k
              .addReg(SrcReg2, getKillRegState(Src2IsKill));
3973
90
  else if (kind == FMAInstKind::Indexed)
3974
15
    MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3975
15
              .addReg(SrcReg2, getKillRegState(Src2IsKill))
3976
15
              .addReg(SrcReg0, getKillRegState(Src0IsKill))
3977
15
              .addReg(SrcReg1, getKillRegState(Src1IsKill))
3978
15
              .addImm(MUL->getOperand(3).getImm());
3979
75
  else if (kind == FMAInstKind::Accumulator)
3980
75
    MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3981
75
              .addReg(SrcReg2, getKillRegState(Src2IsKill))
3982
75
              .addReg(SrcReg0, getKillRegState(Src0IsKill))
3983
75
              .addReg(SrcReg1, getKillRegState(Src1IsKill));
3984
75
  else
3985
75
    assert(false && "Invalid FMA instruction kind \n");
3986
83.2k
  // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
3987
83.2k
  InsInstrs.push_back(MIB);
3988
83.2k
  return MUL;
3989
83.2k
}
3990
3991
/// genMaddR - Generate madd instruction and combine mul and add using
3992
/// an extra virtual register
3993
/// Example - an ADD intermediate needs to be stored in a register:
3994
///   MUL I=A,B,0
3995
///   ADD R,I,Imm
3996
///   ==> ORR  V, ZR, Imm
3997
///   ==> MADD R,A,B,V
3998
/// \param MF Containing MachineFunction
3999
/// \param MRI Register information
4000
/// \param TII Target information
4001
/// \param Root is the ADD instruction
4002
/// \param [out] InsInstrs is a vector of machine instructions and will
4003
/// contain the generated madd instruction
4004
/// \param IdxMulOpd is index of operand in Root that is the result of
4005
/// the MUL. In the example above IdxMulOpd is 1.
4006
/// \param MaddOpc the opcode fo the madd instruction
4007
/// \param VR is a virtual register that holds the value of an ADD operand
4008
/// (V in the example above).
4009
/// \param RC Register class of operands
4010
static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
4011
                              const TargetInstrInfo *TII, MachineInstr &Root,
4012
                              SmallVectorImpl<MachineInstr *> &InsInstrs,
4013
                              unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
4014
423
                              const TargetRegisterClass *RC) {
4015
423
  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4016
423
4017
423
  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4018
423
  unsigned ResultReg = Root.getOperand(0).getReg();
4019
423
  unsigned SrcReg0 = MUL->getOperand(1).getReg();
4020
423
  bool Src0IsKill = MUL->getOperand(1).isKill();
4021
423
  unsigned SrcReg1 = MUL->getOperand(2).getReg();
4022
423
  bool Src1IsKill = MUL->getOperand(2).isKill();
4023
423
4024
423
  if (TargetRegisterInfo::isVirtualRegister(ResultReg))
4025
423
    MRI.constrainRegClass(ResultReg, RC);
4026
423
  if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
4027
423
    MRI.constrainRegClass(SrcReg0, RC);
4028
423
  if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
4029
423
    MRI.constrainRegClass(SrcReg1, RC);
4030
423
  if (TargetRegisterInfo::isVirtualRegister(VR))
4031
423
    MRI.constrainRegClass(VR, RC);
4032
423
4033
423
  MachineInstrBuilder MIB =
4034
423
      BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4035
423
          .addReg(SrcReg0, getKillRegState(Src0IsKill))
4036
423
          .addReg(SrcReg1, getKillRegState(Src1IsKill))
4037
423
          .addReg(VR);
4038
423
  // Insert the MADD
4039
423
  InsInstrs.push_back(MIB);
4040
423
  return MUL;
4041
423
}
4042
4043
/// When getMachineCombinerPatterns() finds potential patterns,
4044
/// this function generates the instructions that could replace the
4045
/// original code sequence
4046
void AArch64InstrInfo::genAlternativeCodeSequence(
4047
    MachineInstr &Root, MachineCombinerPattern Pattern,
4048
    SmallVectorImpl<MachineInstr *> &InsInstrs,
4049
    SmallVectorImpl<MachineInstr *> &DelInstrs,
4050
84.0k
    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
4051
84.0k
  MachineBasicBlock &MBB = *Root.getParent();
4052
84.0k
  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4053
84.0k
  MachineFunction &MF = *MBB.getParent();
4054
84.0k
  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4055
84.0k
4056
84.0k
  MachineInstr *MUL;
4057
84.0k
  const TargetRegisterClass *RC;
4058
84.0k
  unsigned Opc;
4059
84.0k
  switch (Pattern) {
4060
84.0k
  default:
4061
242
    // Reassociate instructions.
4062
242
    TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
4063
242
                                                DelInstrs, InstrIdxForVirtReg);
4064
242
    return;
4065
84.0k
  case MachineCombinerPattern::MULADDW_OP1:
4066
7.47k
  case MachineCombinerPattern::MULADDX_OP1:
4067
7.47k
    // MUL I=A,B,0
4068
7.47k
    // ADD R,I,C
4069
7.47k
    // ==> MADD R,A,B,C
4070
7.47k
    // --- Create(MADD);
4071
7.47k
    if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
4072
5.18k
      Opc = AArch64::MADDWrrr;
4073
5.18k
      RC = &AArch64::GPR32RegClass;
4074
5.18k
    } else {
4075
2.29k
      Opc = AArch64::MADDXrrr;
4076
2.29k
      RC = &AArch64::GPR64RegClass;
4077
2.29k
    }
4078
7.47k
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4079
7.47k
    break;
4080
73.7k
  case MachineCombinerPattern::MULADDW_OP2:
4081
73.7k
  case MachineCombinerPattern::MULADDX_OP2:
4082
73.7k
    // MUL I=A,B,0
4083
73.7k
    // ADD R,C,I
4084
73.7k
    // ==> MADD R,A,B,C
4085
73.7k
    // --- Create(MADD);
4086
73.7k
    if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
4087
4.62k
      Opc = AArch64::MADDWrrr;
4088
4.62k
      RC = &AArch64::GPR32RegClass;
4089
69.0k
    } else {
4090
69.0k
      Opc = AArch64::MADDXrrr;
4091
69.0k
      RC = &AArch64::GPR64RegClass;
4092
69.0k
    }
4093
73.7k
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4094
73.7k
    break;
4095
73.7k
  case MachineCombinerPattern::MULADDWI_OP1:
4096
445
  case MachineCombinerPattern::MULADDXI_OP1: {
4097
445
    // MUL I=A,B,0
4098
445
    // ADD R,I,Imm
4099
445
    // ==> ORR  V, ZR, Imm
4100
445
    // ==> MADD R,A,B,V
4101
445
    // --- Create(MADD);
4102
445
    const TargetRegisterClass *OrrRC;
4103
445
    unsigned BitSize, OrrOpc, ZeroReg;
4104
445
    if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
4105
286
      OrrOpc = AArch64::ORRWri;
4106
286
      OrrRC = &AArch64::GPR32spRegClass;
4107
286
      BitSize = 32;
4108
286
      ZeroReg = AArch64::WZR;
4109
286
      Opc = AArch64::MADDWrrr;
4110
286
      RC = &AArch64::GPR32RegClass;
4111
286
    } else {
4112
159
      OrrOpc = AArch64::ORRXri;
4113
159
      OrrRC = &AArch64::GPR64spRegClass;
4114
159
      BitSize = 64;
4115
159
      ZeroReg = AArch64::XZR;
4116
159
      Opc = AArch64::MADDXrrr;
4117
159
      RC = &AArch64::GPR64RegClass;
4118
159
    }
4119
445
    unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4120
445
    uint64_t Imm = Root.getOperand(2).getImm();
4121
445
4122
445
    if (Root.getOperand(3).isImm()) {
4123
445
      unsigned Val = Root.getOperand(3).getImm();
4124
445
      Imm = Imm << Val;
4125
445
    }
4126
445
    uint64_t UImm = SignExtend64(Imm, BitSize);
4127
445
    uint64_t Encoding;
4128
445
    if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4129
391
      MachineInstrBuilder MIB1 =
4130
391
          BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4131
391
              .addReg(ZeroReg)
4132
391
              .addImm(Encoding);
4133
391
      InsInstrs.push_back(MIB1);
4134
391
      InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4135
391
      MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4136
391
    }
4137
445
    break;
4138
445
  }
4139
445
  case MachineCombinerPattern::MULSUBW_OP1:
4140
31
  case MachineCombinerPattern::MULSUBX_OP1: {
4141
31
    // MUL I=A,B,0
4142
31
    // SUB R,I, C
4143
31
    // ==> SUB  V, 0, C
4144
31
    // ==> MADD R,A,B,V // = -C + A*B
4145
31
    // --- Create(MADD);
4146
31
    const TargetRegisterClass *SubRC;
4147
31
    unsigned SubOpc, ZeroReg;
4148
31
    if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4149
23
      SubOpc = AArch64::SUBWrr;
4150
23
      SubRC = &AArch64::GPR32spRegClass;
4151
23
      ZeroReg = AArch64::WZR;
4152
23
      Opc = AArch64::MADDWrrr;
4153
23
      RC = &AArch64::GPR32RegClass;
4154
23
    } else {
4155
8
      SubOpc = AArch64::SUBXrr;
4156
8
      SubRC = &AArch64::GPR64spRegClass;
4157
8
      ZeroReg = AArch64::XZR;
4158
8
      Opc = AArch64::MADDXrrr;
4159
8
      RC = &AArch64::GPR64RegClass;
4160
8
    }
4161
31
    unsigned NewVR = MRI.createVirtualRegister(SubRC);
4162
31
    // SUB NewVR, 0, C
4163
31
    MachineInstrBuilder MIB1 =
4164
31
        BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4165
31
            .addReg(ZeroReg)
4166
31
            .add(Root.getOperand(2));
4167
31
    InsInstrs.push_back(MIB1);
4168
31
    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4169
31
    MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4170
31
    break;
4171
31
  }
4172
1.96k
  case MachineCombinerPattern::MULSUBW_OP2:
4173
1.96k
  case MachineCombinerPattern::MULSUBX_OP2:
4174
1.96k
    // MUL I=A,B,0
4175
1.96k
    // SUB R,C,I
4176
1.96k
    // ==> MSUB R,A,B,C (computes C - A*B)
4177
1.96k
    // --- Create(MSUB);
4178
1.96k
    if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4179
1.79k
      Opc = AArch64::MSUBWrrr;
4180
1.79k
      RC = &AArch64::GPR32RegClass;
4181
1.79k
    } else {
4182
172
      Opc = AArch64::MSUBXrrr;
4183
172
      RC = &AArch64::GPR64RegClass;
4184
172
    }
4185
1.96k
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4186
1.96k
    break;
4187
1.96k
  case MachineCombinerPattern::MULSUBWI_OP1:
4188
19
  case MachineCombinerPattern::MULSUBXI_OP1: {
4189
19
    // MUL I=A,B,0
4190
19
    // SUB R,I, Imm
4191
19
    // ==> ORR  V, ZR, -Imm
4192
19
    // ==> MADD R,A,B,V // = -Imm + A*B
4193
19
    // --- Create(MADD);
4194
19
    const TargetRegisterClass *OrrRC;
4195
19
    unsigned BitSize, OrrOpc, ZeroReg;
4196
19
    if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4197
17
      OrrOpc = AArch64::ORRWri;
4198
17
      OrrRC = &AArch64::GPR32spRegClass;
4199
17
      BitSize = 32;
4200
17
      ZeroReg = AArch64::WZR;
4201
17
      Opc = AArch64::MADDWrrr;
4202
17
      RC = &AArch64::GPR32RegClass;
4203
17
    } else {
4204
2
      OrrOpc = AArch64::ORRXri;
4205
2
      OrrRC = &AArch64::GPR64spRegClass;
4206
2
      BitSize = 64;
4207
2
      ZeroReg = AArch64::XZR;
4208
2
      Opc = AArch64::MADDXrrr;
4209
2
      RC = &AArch64::GPR64RegClass;
4210
2
    }
4211
19
    unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4212
19
    uint64_t Imm = Root.getOperand(2).getImm();
4213
19
    if (Root.getOperand(3).isImm()) {
4214
19
      unsigned Val = Root.getOperand(3).getImm();
4215
19
      Imm = Imm << Val;
4216
19
    }
4217
19
    uint64_t UImm = SignExtend64(-Imm, BitSize);
4218
19
    uint64_t Encoding;
4219
19
    if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4220
1
      MachineInstrBuilder MIB1 =
4221
1
          BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4222
1
              .addReg(ZeroReg)
4223
1
              .addImm(Encoding);
4224
1
      InsInstrs.push_back(MIB1);
4225
1
      InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4226
1
      MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4227
1
    }
4228
19
    break;
4229
19
  }
4230
19
  // Floating Point Support
4231
29
  case MachineCombinerPattern::FMULADDS_OP1:
4232
29
  case MachineCombinerPattern::FMULADDD_OP1:
4233
29
    // MUL I=A,B,0
4234
29
    // ADD R,I,C
4235
29
    // ==> MADD R,A,B,C
4236
29
    // --- Create(MADD);
4237
29
    if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
4238
27
      Opc = AArch64::FMADDSrrr;
4239
27
      RC = &AArch64::FPR32RegClass;
4240
27
    } else {
4241
2
      Opc = AArch64::FMADDDrrr;
4242
2
      RC = &AArch64::FPR64RegClass;
4243
2
    }
4244
29
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4245
29
    break;
4246
29
  case MachineCombinerPattern::FMULADDS_OP2:
4247
5
  case MachineCombinerPattern::FMULADDD_OP2:
4248
5
    // FMUL I=A,B,0
4249
5
    // FADD R,C,I
4250
5
    // ==> FMADD R,A,B,C
4251
5
    // --- Create(FMADD);
4252
5
    if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
4253
3
      Opc = AArch64::FMADDSrrr;
4254
3
      RC = &AArch64::FPR32RegClass;
4255
3
    } else {
4256
2
      Opc = AArch64::FMADDDrrr;
4257
2
      RC = &AArch64::FPR64RegClass;
4258
2
    }
4259
5
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4260
5
    break;
4261
5
4262
5
  case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4263
1
    Opc = AArch64::FMLAv1i32_indexed;
4264
1
    RC = &AArch64::FPR32RegClass;
4265
1
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4266
1
                           FMAInstKind::Indexed);
4267
1
    break;
4268
5
  case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4269
0
    Opc = AArch64::FMLAv1i32_indexed;
4270
0
    RC = &AArch64::FPR32RegClass;
4271
0
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4272
0
                           FMAInstKind::Indexed);
4273
0
    break;
4274
5
4275
5
  case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4276
1
    Opc = AArch64::FMLAv1i64_indexed;
4277
1
    RC = &AArch64::FPR64RegClass;
4278
1
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4279
1
                           FMAInstKind::Indexed);
4280
1
    break;
4281
5
  case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4282
0
    Opc = AArch64::FMLAv1i64_indexed;
4283
0
    RC = &AArch64::FPR64RegClass;
4284
0
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4285
0
                           FMAInstKind::Indexed);
4286
0
    break;
4287
5
4288
5
  case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4289
2
  case MachineCombinerPattern::FMLAv2f32_OP1:
4290
2
    RC = &AArch64::FPR64RegClass;
4291
2
    if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
4292
1
      Opc = AArch64::FMLAv2i32_indexed;
4293
1
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4294
1
                             FMAInstKind::Indexed);
4295
1
    } else {
4296
1
      Opc = AArch64::FMLAv2f32;
4297
1
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4298
1
                             FMAInstKind::Accumulator);
4299
1
    }
4300
2
    break;
4301
2
  case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4302
0
  case MachineCombinerPattern::FMLAv2f32_OP2:
4303
0
    RC = &AArch64::FPR64RegClass;
4304
0
    if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
4305
0
      Opc = AArch64::FMLAv2i32_indexed;
4306
0
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4307
0
                             FMAInstKind::Indexed);
4308
0
    } else {
4309
0
      Opc = AArch64::FMLAv2f32;
4310
0
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4311
0
                             FMAInstKind::Accumulator);
4312
0
    }
4313
0
    break;
4314
0
4315
2
  case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4316
2
  case MachineCombinerPattern::FMLAv2f64_OP1:
4317
2
    RC = &AArch64::FPR128RegClass;
4318
2
    if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
4319
1
      Opc = AArch64::FMLAv2i64_indexed;
4320
1
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4321
1
                             FMAInstKind::Indexed);
4322
1
    } else {
4323
1
      Opc = AArch64::FMLAv2f64;
4324
1
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4325
1
                             FMAInstKind::Accumulator);
4326
1
    }
4327
2
    break;
4328
2
  case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4329
0
  case MachineCombinerPattern::FMLAv2f64_OP2:
4330
0
    RC = &AArch64::FPR128RegClass;
4331
0
    if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
4332
0
      Opc = AArch64::FMLAv2i64_indexed;
4333
0
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4334
0
                             FMAInstKind::Indexed);
4335
0
    } else {
4336
0
      Opc = AArch64::FMLAv2f64;
4337
0
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4338
0
                             FMAInstKind::Accumulator);
4339
0
    }
4340
0
    break;
4341
0
4342
4
  case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4343
4
  case MachineCombinerPattern::FMLAv4f32_OP1:
4344
4
    RC = &AArch64::FPR128RegClass;
4345
4
    if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
4346
1
      Opc = AArch64::FMLAv4i32_indexed;
4347
1
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4348
1
                             FMAInstKind::Indexed);
4349
3
    } else {
4350
3
      Opc = AArch64::FMLAv4f32;
4351
3
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4352
3
                             FMAInstKind::Accumulator);
4353
3
    }
4354
4
    break;
4355
4
4356
4
  case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4357
0
  case MachineCombinerPattern::FMLAv4f32_OP2:
4358
0
    RC = &AArch64::FPR128RegClass;
4359
0
    if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
4360
0
      Opc = AArch64::FMLAv4i32_indexed;
4361
0
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4362
0
                             FMAInstKind::Indexed);
4363
0
    } else {
4364
0
      Opc = AArch64::FMLAv4f32;
4365
0
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4366
0
                             FMAInstKind::Accumulator);
4367
0
    }
4368
0
    break;
4369
0
4370
1
  case MachineCombinerPattern::FMULSUBS_OP1:
4371
1
  case MachineCombinerPattern::FMULSUBD_OP1: {
4372
1
    // FMUL I=A,B,0
4373
1
    // FSUB R,I,C
4374
1
    // ==> FNMSUB R,A,B,C // = -C + A*B
4375
1
    // --- Create(FNMSUB);
4376
1
    if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4377
1
      Opc = AArch64::FNMSUBSrrr;
4378
1
      RC = &AArch64::FPR32RegClass;
4379
1
    } else {
4380
0
      Opc = AArch64::FNMSUBDrrr;
4381
0
      RC = &AArch64::FPR64RegClass;
4382
0
    }
4383
1
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4384
1
    break;
4385
1
  }
4386
1
4387
4
  case MachineCombinerPattern::FNMULSUBS_OP1:
4388
4
  case MachineCombinerPattern::FNMULSUBD_OP1: {
4389
4
    // FNMUL I=A,B,0
4390
4
    // FSUB R,I,C
4391
4
    // ==> FNMADD R,A,B,C // = -A*B - C
4392
4
    // --- Create(FNMADD);
4393
4
    if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
4394
2
      Opc = AArch64::FNMADDSrrr;
4395
2
      RC = &AArch64::FPR32RegClass;
4396
2
    } else {
4397
2
      Opc = AArch64::FNMADDDrrr;
4398
2
      RC = &AArch64::FPR64RegClass;
4399
2
    }
4400
4
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4401
4
    break;
4402
4
  }
4403
4
4404
5
  case MachineCombinerPattern::FMULSUBS_OP2:
4405
5
  case MachineCombinerPattern::FMULSUBD_OP2: {
4406
5
    // FMUL I=A,B,0
4407
5
    // FSUB R,C,I
4408
5
    // ==> FMSUB R,A,B,C (computes C - A*B)
4409
5
    // --- Create(FMSUB);
4410
5
    if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4411
5
      Opc = AArch64::FMSUBSrrr;
4412
5
      RC = &AArch64::FPR32RegClass;
4413
5
    } else {
4414
0
      Opc = AArch64::FMSUBDrrr;
4415
0
      RC = &AArch64::FPR64RegClass;
4416
0
    }
4417
5
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4418
5
    break;
4419
5
  }
4420
5
4421
5
  case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4422
2
    Opc = AArch64::FMLSv1i32_indexed;
4423
2
    RC = &AArch64::FPR32RegClass;
4424
2
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4425
2
                           FMAInstKind::Indexed);
4426
2
    break;
4427
5
4428
5
  case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4429
2
    Opc = AArch64::FMLSv1i64_indexed;
4430
2
    RC = &AArch64::FPR64RegClass;
4431
2
    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4432
2
                           FMAInstKind::Indexed);
4433
2
    break;
4434
5
4435
12
  case MachineCombinerPattern::FMLSv2f32_OP2:
4436
12
  case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4437
12
    RC = &AArch64::FPR64RegClass;
4438
12
    if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
4439
2
      Opc = AArch64::FMLSv2i32_indexed;
4440
2
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4441
2
                             FMAInstKind::Indexed);
4442
10
    } else {
4443
10
      Opc = AArch64::FMLSv2f32;
4444
10
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4445
10
                             FMAInstKind::Accumulator);
4446
10
    }
4447
12
    break;
4448
12
4449
12
  case MachineCombinerPattern::FMLSv2f64_OP2:
4450
12
  case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4451
12
    RC = &AArch64::FPR128RegClass;
4452
12
    if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
4453
2
      Opc = AArch64::FMLSv2i64_indexed;
4454
2
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4455
2
                             FMAInstKind::Indexed);
4456
10
    } else {
4457
10
      Opc = AArch64::FMLSv2f64;
4458
10
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4459
10
                             FMAInstKind::Accumulator);
4460
10
    }
4461
12
    break;
4462
12
4463
16
  case MachineCombinerPattern::FMLSv4f32_OP2:
4464
16
  case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4465
16
    RC = &AArch64::FPR128RegClass;
4466
16
    if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
4467
2
      Opc = AArch64::FMLSv4i32_indexed;
4468
2
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4469
2
                             FMAInstKind::Indexed);
4470
14
    } else {
4471
14
      Opc = AArch64::FMLSv4f32;
4472
14
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4473
14
                             FMAInstKind::Accumulator);
4474
14
    }
4475
16
    break;
4476
16
  case MachineCombinerPattern::FMLSv2f32_OP1:
4477
12
  case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
4478
12
    RC = &AArch64::FPR64RegClass;
4479
12
    unsigned NewVR = MRI.createVirtualRegister(RC);
4480
12
    MachineInstrBuilder MIB1 =
4481
12
        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
4482
12
            .add(Root.getOperand(2));
4483
12
    InsInstrs.push_back(MIB1);
4484
12
    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4485
12
    if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
4486
0
      Opc = AArch64::FMLAv2i32_indexed;
4487
0
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4488
0
                             FMAInstKind::Indexed, &NewVR);
4489
12
    } else {
4490
12
      Opc = AArch64::FMLAv2f32;
4491
12
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4492
12
                             FMAInstKind::Accumulator, &NewVR);
4493
12
    }
4494
12
    break;
4495
12
  }
4496
12
  case MachineCombinerPattern::FMLSv4f32_OP1:
4497
12
  case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
4498
12
    RC = &AArch64::FPR128RegClass;
4499
12
    unsigned NewVR = MRI.createVirtualRegister(RC);
4500
12
    MachineInstrBuilder MIB1 =
4501
12
        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
4502
12
            .add(Root.getOperand(2));
4503
12
    InsInstrs.push_back(MIB1);
4504
12
    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4505
12
    if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
4506
0
      Opc = AArch64::FMLAv4i32_indexed;
4507
0
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4508
0
                             FMAInstKind::Indexed, &NewVR);
4509
12
    } else {
4510
12
      Opc = AArch64::FMLAv4f32;
4511
12
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4512
12
                             FMAInstKind::Accumulator, &NewVR);
4513
12
    }
4514
12
    break;
4515
12
  }
4516
12
  case MachineCombinerPattern::FMLSv2f64_OP1:
4517
12
  case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
4518
12
    RC = &AArch64::FPR128RegClass;
4519
12
    unsigned NewVR = MRI.createVirtualRegister(RC);
4520
12
    MachineInstrBuilder MIB1 =
4521
12
        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
4522
12
            .add(Root.getOperand(2));
4523
12
    InsInstrs.push_back(MIB1);
4524
12
    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4525
12
    if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
4526
0
      Opc = AArch64::FMLAv2i64_indexed;
4527
0
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4528
0
                             FMAInstKind::Indexed, &NewVR);
4529
12
    } else {
4530
12
      Opc = AArch64::FMLAv2f64;
4531
12
      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4532
12
                             FMAInstKind::Accumulator, &NewVR);
4533
12
    }
4534
12
    break;
4535
83.7k
  }
4536
83.7k
  } // end switch (Pattern)
4537
83.7k
  // Record MUL and ADD/SUB for deletion
4538
83.7k
  DelInstrs.push_back(MUL);
4539
83.7k
  DelInstrs.push_back(&Root);
4540
83.7k
}
4541
4542
/// Replace csincr-branch sequence by simple conditional branch
4543
///
4544
/// Examples:
4545
/// 1. \code
4546
///   csinc  w9, wzr, wzr, <condition code>
4547
///   tbnz   w9, #0, 0x44
4548
///    \endcode
4549
/// to
4550
///    \code
4551
///   b.<inverted condition code>
4552
///    \endcode
4553
///
4554
/// 2. \code
4555
///   csinc w9, wzr, wzr, <condition code>
4556
///   tbz   w9, #0, 0x44
4557
///    \endcode
4558
/// to
4559
///    \code
4560
///   b.<condition code>
4561
///    \endcode
4562
///
4563
/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4564
/// compare's constant operand is power of 2.
4565
///
4566
/// Examples:
4567
///    \code
4568
///   and  w8, w8, #0x400
4569
///   cbnz w8, L1
4570
///    \endcode
4571
/// to
4572
///    \code
4573
///   tbnz w8, #10, L1
4574
///    \endcode
4575
///
4576
/// \param  MI Conditional Branch
4577
/// \return True when the simple conditional branch is generated
4578
///
4579
1.02M
bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
4580
1.02M
  bool IsNegativeBranch = false;
4581
1.02M
  bool IsTestAndBranch = false;
4582
1.02M
  unsigned TargetBBInMI = 0;
4583
1.02M
  switch (MI.getOpcode()) {
4584
1.02M
  default:
4585
0
    llvm_unreachable("Unknown branch instruction?");
4586
1.02M
  case AArch64::Bcc:
4587
436k
    return false;
4588
1.02M
  case AArch64::CBZW:
4589
210k
  case AArch64::CBZX:
4590
210k
    TargetBBInMI = 1;
4591
210k
    break;
4592
239k
  case AArch64::CBNZW:
4593
239k
  case AArch64::CBNZX:
4594
239k
    TargetBBInMI = 1;
4595
239k
    IsNegativeBranch = true;
4596
239k
    break;
4597
239k
  case AArch64::TBZW:
4598
9.06k
  case AArch64::TBZX:
4599
9.06k
    TargetBBInMI = 2;
4600
9.06k
    IsTestAndBranch = true;
4601
9.06k
    break;
4602
124k
  case AArch64::TBNZW:
4603
124k
  case AArch64::TBNZX:
4604
124k
    TargetBBInMI = 2;
4605
124k
    IsNegativeBranch = true;
4606
124k
    IsTestAndBranch = true;
4607
124k
    break;
4608
584k
  }
4609
584k
  // So we increment a zero register and test for bits other
4610
584k
  // than bit 0? Conservatively bail out in case the verifier
4611
584k
  // missed this case.
4612
584k
  if (IsTestAndBranch && 
MI.getOperand(1).getImm()133k
)
4613
10.3k
    return false;
4614
573k
4615
573k
  // Find Definition.
4616
573k
  assert(MI.getParent() && "Incomplete machine instruciton\n");
4617
573k
  MachineBasicBlock *MBB = MI.getParent();
4618
573k
  MachineFunction *MF = MBB->getParent();
4619
573k
  MachineRegisterInfo *MRI = &MF->getRegInfo();
4620
573k
  unsigned VReg = MI.getOperand(0).getReg();
4621
573k
  if (!TargetRegisterInfo::isVirtualRegister(VReg))
4622
0
    return false;
4623
573k
4624
573k
  MachineInstr *DefMI = MRI->getVRegDef(VReg);
4625
573k
4626
573k
  // Look through COPY instructions to find definition.
4627
586k
  while (DefMI->isCopy()) {
4628
194k
    unsigned CopyVReg = DefMI->getOperand(1).getReg();
4629
194k
    if (!MRI->hasOneNonDBGUse(CopyVReg))
4630
160k
      return false;
4631
33.3k
    if (!MRI->hasOneDef(CopyVReg))
4632
20.7k
      return false;
4633
12.6k
    DefMI = MRI->getVRegDef(CopyVReg);
4634
12.6k
  }
4635
573k
4636
573k
  switch (DefMI->getOpcode()) {
4637
392k
  default:
4638
330k
    return false;
4639
392k
  // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4640
392k
  case AArch64::ANDWri:
4641
60
  case AArch64::ANDXri: {
4642
60
    if (IsTestAndBranch)
4643
0
      return false;
4644
60
    if (DefMI->getParent() != MBB)
4645
11
      return false;
4646
49
    if (!MRI->hasOneNonDBGUse(VReg))
4647
7
      return false;
4648
42
4649
42
    bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
4650
42
    uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
4651
42
        DefMI->getOperand(2).getImm(), Is32Bit ? 
3224
:
6418
);
4652
42
    if (!isPowerOf2_64(Mask))
4653
21
      return false;
4654
21
4655
21
    MachineOperand &MO = DefMI->getOperand(1);
4656
21
    unsigned NewReg = MO.getReg();
4657
21
    if (!TargetRegisterInfo::isVirtualRegister(NewReg))
4658
0
      return false;
4659
21
4660
21
    assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4661
21
4662
21
    MachineBasicBlock &RefToMBB = *MBB;
4663
21
    MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4664
21
    DebugLoc DL = MI.getDebugLoc();
4665
21
    unsigned Imm = Log2_64(Mask);
4666
21
    unsigned Opc = (Imm < 32)
4667
21
                       ? 
(IsNegativeBranch 20
?
AArch64::TBNZW9
:
AArch64::TBZW11
)
4668
21
                       : 
(IsNegativeBranch 1
?
AArch64::TBNZX1
:
AArch64::TBZX0
);
4669
21
    MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4670
21
                              .addReg(NewReg)
4671
21
                              .addImm(Imm)
4672
21
                              .addMBB(TBB);
4673
21
    // Register lives on to the CBZ now.
4674
21
    MO.setIsKill(false);
4675
21
4676
21
    // For immediate smaller than 32, we need to use the 32-bit
4677
21
    // variant (W) in all cases. Indeed the 64-bit variant does not
4678
21
    // allow to encode them.
4679
21
    // Therefore, if the input register is 64-bit, we need to take the
4680
21
    // 32-bit sub-part.
4681
21
    if (!Is32Bit && 
Imm < 324
)
4682
3
      NewMI->getOperand(0).setSubReg(AArch64::sub_32);
4683
21
    MI.eraseFromParent();
4684
21
    return true;
4685
21
  }
4686
21
  // Look for CSINC
4687
61.2k
  case AArch64::CSINCWr:
4688
61.2k
  case AArch64::CSINCXr: {
4689
61.2k
    if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4690
61.2k
          
DefMI->getOperand(2).getReg() == AArch64::WZR61.2k
) &&
4691
61.2k
        
!(13
DefMI->getOperand(1).getReg() == AArch64::XZR13
&&
4692
13
          
DefMI->getOperand(2).getReg() == AArch64::XZR0
))
4693
13
      return false;
4694
61.2k
4695
61.2k
    if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4696
0
      return false;
4697
61.2k
4698
61.2k
    AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
4699
61.2k
    // Convert only when the condition code is not modified between
4700
61.2k
    // the CSINC and the branch. The CC may be used by other
4701
61.2k
    // instructions in between.
4702
61.2k
    if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
4703
8.82k
      return false;
4704
52.3k
    MachineBasicBlock &RefToMBB = *MBB;
4705
52.3k
    MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4706
52.3k
    DebugLoc DL = MI.getDebugLoc();
4707
52.3k
    if (IsNegativeBranch)
4708
52.2k
      CC = AArch64CC::getInvertedCondCode(CC);
4709
52.3k
    BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
4710
52.3k
    MI.eraseFromParent();
4711
52.3k
    return true;
4712
52.3k
  }
4713
392k
  }
4714
392k
}
4715
4716
std::pair<unsigned, unsigned>
4717
5.52k
AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
4718
5.52k
  const unsigned Mask = AArch64II::MO_FRAGMENT;
4719
5.52k
  return std::make_pair(TF & Mask, TF & ~Mask);
4720
5.52k
}
4721
4722
ArrayRef<std::pair<unsigned, const char *>>
4723
5.53k
AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4724
5.53k
  using namespace AArch64II;
4725
5.53k
4726
5.53k
  static const std::pair<unsigned, const char *> TargetFlags[] = {
4727
5.53k
      {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
4728
5.53k
      {MO_G3, "aarch64-g3"},     {MO_G2, "aarch64-g2"},
4729
5.53k
      {MO_G1, "aarch64-g1"},     {MO_G0, "aarch64-g0"},
4730
5.53k
      {MO_HI12, "aarch64-hi12"}};
4731
5.53k
  return makeArrayRef(TargetFlags);
4732
5.53k
}
4733
4734
ArrayRef<std::pair<unsigned, const char *>>
4735
2.78k
AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
4736
2.78k
  using namespace AArch64II;
4737
2.78k
4738
2.78k
  static const std::pair<unsigned, const char *> TargetFlags[] = {
4739
2.78k
      {MO_COFFSTUB, "aarch64-coffstub"},
4740
2.78k
      {MO_GOT, "aarch64-got"},   {MO_NC, "aarch64-nc"},
4741
2.78k
      {MO_S, "aarch64-s"},       {MO_TLS, "aarch64-tls"},
4742
2.78k
      {MO_DLLIMPORT, "aarch64-dllimport"}};
4743
2.78k
  return makeArrayRef(TargetFlags);
4744
2.78k
}
4745
4746
ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
4747
26
AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
4748
26
  static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
4749
26
      {{MOSuppressPair, "aarch64-suppress-pair"},
4750
26
       {MOStridedAccess, "aarch64-strided-access"}};
4751
26
  return makeArrayRef(TargetFlags);
4752
26
}
4753
4754
/// Constants defining how certain sequences should be outlined.
4755
/// This encompasses how an outlined function should be called, and what kind of
4756
/// frame should be emitted for that outlined function.
4757
///
4758
/// \p MachineOutlinerDefault implies that the function should be called with
4759
/// a save and restore of LR to the stack.
4760
///
4761
/// That is,
4762
///
4763
/// I1     Save LR                    OUTLINED_FUNCTION:
4764
/// I2 --> BL OUTLINED_FUNCTION       I1
4765
/// I3     Restore LR                 I2
4766
///                                   I3
4767
///                                   RET
4768
///
4769
/// * Call construction overhead: 3 (save + BL + restore)
4770
/// * Frame construction overhead: 1 (ret)
4771
/// * Requires stack fixups? Yes
4772
///
4773
/// \p MachineOutlinerTailCall implies that the function is being created from
4774
/// a sequence of instructions ending in a return.
4775
///
4776
/// That is,
4777
///
4778
/// I1                             OUTLINED_FUNCTION:
4779
/// I2 --> B OUTLINED_FUNCTION     I1
4780
/// RET                            I2
4781
///                                RET
4782
///
4783
/// * Call construction overhead: 1 (B)
4784
/// * Frame construction overhead: 0 (Return included in sequence)
4785
/// * Requires stack fixups? No
4786
///
4787
/// \p MachineOutlinerNoLRSave implies that the function should be called using
4788
/// a BL instruction, but doesn't require LR to be saved and restored. This
4789
/// happens when LR is known to be dead.
4790
///
4791
/// That is,
4792
///
4793
/// I1                                OUTLINED_FUNCTION:
4794
/// I2 --> BL OUTLINED_FUNCTION       I1
4795
/// I3                                I2
4796
///                                   I3
4797
///                                   RET
4798
///
4799
/// * Call construction overhead: 1 (BL)
4800
/// * Frame construction overhead: 1 (RET)
4801
/// * Requires stack fixups? No
4802
///
4803
/// \p MachineOutlinerThunk implies that the function is being created from
4804
/// a sequence of instructions ending in a call. The outlined function is
4805
/// called with a BL instruction, and the outlined function tail-calls the
4806
/// original call destination.
4807
///
4808
/// That is,
4809
///
4810
/// I1                                OUTLINED_FUNCTION:
4811
/// I2 --> BL OUTLINED_FUNCTION       I1
4812
/// BL f                              I2
4813
///                                   B f
4814
/// * Call construction overhead: 1 (BL)
4815
/// * Frame construction overhead: 0
4816
/// * Requires stack fixups? No
4817
///
4818
/// \p MachineOutlinerRegSave implies that the function should be called with a
4819
/// save and restore of LR to an available register. This allows us to avoid
4820
/// stack fixups. Note that this outlining variant is compatible with the
4821
/// NoLRSave case.
4822
///
4823
/// That is,
4824
///
4825
/// I1     Save LR                    OUTLINED_FUNCTION:
4826
/// I2 --> BL OUTLINED_FUNCTION       I1
4827
/// I3     Restore LR                 I2
4828
///                                   I3
4829
///                                   RET
4830
///
4831
/// * Call construction overhead: 3 (save + BL + restore)
4832
/// * Frame construction overhead: 1 (ret)
4833
/// * Requires stack fixups? No
4834
enum MachineOutlinerClass {
4835
  MachineOutlinerDefault,  /// Emit a save, restore, call, and return.
4836
  MachineOutlinerTailCall, /// Only emit a branch.
4837
  MachineOutlinerNoLRSave, /// Emit a call and return.
4838
  MachineOutlinerThunk,    /// Emit a call and tail-call.
4839
  MachineOutlinerRegSave   /// Same as default, but save to a register.
4840
};
4841
4842
enum MachineOutlinerMBBFlags {
4843
  LRUnavailableSomewhere = 0x2,
4844
  HasCalls = 0x4,
4845
  UnsafeRegsDead = 0x8
4846
};
4847
4848
unsigned
4849
145
AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
4850
145
  assert(C.LRUWasSet && "LRU wasn't set?");
4851
145
  MachineFunction *MF = C.getMF();
4852
145
  const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
4853
145
      MF->getSubtarget().getRegisterInfo());
4854
145
4855
145
  // Check if there is an available register across the sequence that we can
4856
145
  // use.
4857
1.22k
  for (unsigned Reg : AArch64::GPR64RegClass) {
4858
1.22k
    if (!ARI->isReservedReg(*MF, Reg) &&
4859
1.22k
        
Reg != AArch64::LR1.16k
&& // LR is not reserved, but don't use it.
4860
1.22k
        
Reg != AArch64::X161.12k
&& // X16 is not guaranteed to be preserved.
4861
1.22k
        
Reg != AArch64::X171.09k
&& // Ditto for X17.
4862
1.22k
        
C.LRU.available(Reg)1.05k
&&
C.UsedInSequence.available(Reg)172
)
4863
111
      return Reg;
4864
1.22k
  }
4865
145
4866
145
  // No suitable register. Return 0.
4867
145
  
return 0u34
;
4868
145
}
4869
4870
outliner::OutlinedFunction
4871
AArch64InstrInfo::getOutliningCandidateInfo(
4872
228
    std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
4873
228
  outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
4874
228
  unsigned SequenceSize =
4875
228
      std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
4876
1.17k
                      [this](unsigned Sum, const MachineInstr &MI) {
4877
1.17k
                        return Sum + getInstSizeInBytes(MI);
4878
1.17k
                      });
4879
228
4880
228
  // Properties about candidate MBBs that hold for all of them.
4881
228
  unsigned FlagsSetInAll = 0xF;
4882
228
4883
228
  // Compute liveness information for each candidate, and set FlagsSetInAll.
4884
228
  const TargetRegisterInfo &TRI = getRegisterInfo();
4885
228
  std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
4886
575
                [&FlagsSetInAll](outliner::Candidate &C) {
4887
575
                  FlagsSetInAll &= C.Flags;
4888
575
                });
4889
228
4890
228
  // According to the AArch64 Procedure Call Standard, the following are
4891
228
  // undefined on entry/exit from a function call:
4892
228
  //
4893
228
  // * Registers x16, x17, (and thus w16, w17)
4894
228
  // * Condition codes (and thus the NZCV register)
4895
228
  //
4896
228
  // Because if this, we can't outline any sequence of instructions where
4897
228
  // one
4898
228
  // of these registers is live into/across it. Thus, we need to delete
4899
228
  // those
4900
228
  // candidates.
4901
228
  auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
4902
88
    // If the unsafe registers in this block are all dead, then we don't need
4903
88
    // to compute liveness here.
4904
88
    if (C.Flags & UnsafeRegsDead)
4905
6
      return false;
4906
82
    C.initLRU(TRI);
4907
82
    LiveRegUnits LRU = C.LRU;
4908
82
    return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
4909
82
            
!LRU.available(AArch64::NZCV)79
);
4910
82
  };
4911
228
4912
228
  // Are there any candidates where those registers are live?
4913
228
  if (!(FlagsSetInAll & UnsafeRegsDead)) {
4914
34
    // Erase every candidate that violates the restrictions above. (It could be
4915
34
    // true that we have viable candidates, so it's not worth bailing out in
4916
34
    // the case that, say, 1 out of 20 candidates violate the restructions.)
4917
34
    RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
4918
34
                                              RepeatedSequenceLocs.end(),
4919
34
                                              CantGuaranteeValueAcrossCall),
4920
34
                               RepeatedSequenceLocs.end());
4921
34
4922
34
    // If the sequence doesn't have enough candidates left, then we're done.
4923
34
    if (RepeatedSequenceLocs.size() < 2)
4924
0
      return outliner::OutlinedFunction();
4925
228
  }
4926
228
4927
228
  // At this point, we have only "safe" candidates to outline. Figure out
4928
228
  // frame + call instruction information.
4929
228
4930
228
  unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
4931
228
4932
228
  // Helper lambda which sets call information for every candidate.
4933
228
  auto SetCandidateCallInfo =
4934
228
      [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
4935
147
        for (outliner::Candidate &C : RepeatedSequenceLocs)
4936
333
          C.setCallInfo(CallID, NumBytesForCall);
4937
147
      };
4938
228
4939
228
  unsigned FrameID = MachineOutlinerDefault;
4940
228
  unsigned NumBytesToCreateFrame = 4;
4941
228
4942
567
  bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
4943
567
    return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
4944
567
  });
4945
228
4946
228
  // Returns true if an instructions is safe to fix up, false otherwise.
4947
1.06k
  auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
4948
1.06k
    if (MI.isCall())
4949
35
      return true;
4950
1.03k
4951
1.03k
    if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
4952
1.03k
        
!MI.readsRegister(AArch64::SP, &TRI)947
)
4953
609
      return true;
4954
424
4955
424
    // Any modification of SP will break our code to save/restore LR.
4956
424
    // FIXME: We could handle some instructions which add a constant
4957
424
    // offset to SP, with a bit more work.
4958
424
    if (MI.modifiesRegister(AArch64::SP, &TRI))
4959
86
      return false;
4960
338
4961
338
    // At this point, we have a stack instruction that we might need to
4962
338
    // fix up. We'll handle it if it's a load or store.
4963
338
    if (MI.mayLoadOrStore()) {
4964
317
      const MachineOperand *Base; // Filled with the base operand of MI.
4965
317
      int64_t Offset;             // Filled with the offset of MI.
4966
317
4967
317
      // Does it allow us to offset the base operand and is the base the
4968
317
      // register SP?
4969
317
      if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() ||
4970
317
          Base->getReg() != AArch64::SP)
4971
0
        return false;
4972
317
4973
317
      // Find the minimum/maximum offset for this instruction and check
4974
317
      // if fixing it up would be in range.
4975
317
      int64_t MinOffset,
4976
317
          MaxOffset;  // Unscaled offsets for the instruction.
4977
317
      unsigned Scale; // The scale to multiply the offsets by.
4978
317
      unsigned DummyWidth;
4979
317
      getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
4980
317
4981
317
      Offset += 16; // Update the offset to what it would be if we outlined.
4982
317
      if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
4983
3
        return false;
4984
314
4985
314
      // It's in range, so we can outline it.
4986
314
      return true;
4987
314
    }
4988
21
4989
21
    // FIXME: Add handling for instructions like "add x0, sp, #8".
4990
21
4991
21
    // We can't fix it up, so don't outline it.
4992
21
    return false;
4993
21
  };
4994
228
4995
228
  // True if it's possible to fix up each stack instruction in this sequence.
4996
228
  // Important for frames/call variants that modify the stack.
4997
228
  bool AllStackInstrsSafe = std::all_of(
4998
228
      FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
4999
228
5000
228
  // If the last instruction in any candidate is a terminator, then we should
5001
228
  // tail call all of the candidates.
5002
228
  if (RepeatedSequenceLocs[0].back()->isTerminator()) {
5003
123
    FrameID = MachineOutlinerTailCall;
5004
123
    NumBytesToCreateFrame = 0;
5005
123
    SetCandidateCallInfo(MachineOutlinerTailCall, 4);
5006
123
  }
5007
105
5008
105
  else if (LastInstrOpcode == AArch64::BL ||
5009
105
           
(88
LastInstrOpcode == AArch64::BLR88
&&
!HasBTI6
)) {
5010
22
    // FIXME: Do we need to check if the code after this uses the value of LR?
5011
22
    FrameID = MachineOutlinerThunk;
5012
22
    NumBytesToCreateFrame = 0;
5013
22
    SetCandidateCallInfo(MachineOutlinerThunk, 4);
5014
22
  }
5015
83
5016
83
  else {
5017
83
    // We need to decide how to emit calls + frames. We can always emit the same
5018
83
    // frame if we don't need to save to the stack. If we have to save to the
5019
83
    // stack, then we need a different frame.
5020
83
    unsigned NumBytesNoStackCalls = 0;
5021
83
    std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
5022
83
5023
247
    for (outliner::Candidate &C : RepeatedSequenceLocs) {
5024
247
      C.initLRU(TRI);
5025
247
5026
247
      // Is LR available? If so, we don't need a save.
5027
247
      if (C.LRU.available(AArch64::LR)) {
5028
115
        NumBytesNoStackCalls += 4;
5029
115
        C.setCallInfo(MachineOutlinerNoLRSave, 4);
5030
115
        CandidatesWithoutStackFixups.push_back(C);
5031
115
      }
5032
132
5033
132
      // Is an unused register available? If so, we won't modify the stack, so
5034
132
      // we can outline with the same frame type as those that don't save LR.
5035
132
      else if (findRegisterToSaveLRTo(C)) {
5036
98
        NumBytesNoStackCalls += 12;
5037
98
        C.setCallInfo(MachineOutlinerRegSave, 12);
5038
98
        CandidatesWithoutStackFixups.push_back(C);
5039
98
      }
5040
34
5041
34
      // Is SP used in the sequence at all? If not, we don't have to modify
5042
34
      // the stack, so we are guaranteed to get the same frame.
5043
34
      else if (C.UsedInSequence.available(AArch64::SP)) {
5044
18
        NumBytesNoStackCalls += 12;
5045
18
        C.setCallInfo(MachineOutlinerDefault, 12);
5046
18
        CandidatesWithoutStackFixups.push_back(C);
5047
18
      }
5048
16
5049
16
      // If we outline this, we need to modify the stack. Pretend we don't
5050
16
      // outline this by saving all of its bytes.
5051
16
      else {
5052
16
        NumBytesNoStackCalls += SequenceSize;
5053
16
      }
5054
247
    }
5055
83
5056
83
    // If there are no places where we have to save LR, then note that we
5057
83
    // don't have to update the stack. Otherwise, give every candidate the
5058
83
    // default call type, as long as it's safe to do so.
5059
83
    if (!AllStackInstrsSafe ||
5060
83
        
NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 1261
) {
5061
81
      RepeatedSequenceLocs = CandidatesWithoutStackFixups;
5062
81
      FrameID = MachineOutlinerNoLRSave;
5063
81
    } else {
5064
2
      SetCandidateCallInfo(MachineOutlinerDefault, 12);
5065
2
    }
5066
83
5067
83
    // If we dropped all of the candidates, bail out here.
5068
83
    if (RepeatedSequenceLocs.size() < 2) {
5069
2
      RepeatedSequenceLocs.clear();
5070
2
      return outliner::OutlinedFunction();
5071
2
    }
5072
226
  }
5073
226
5074
226
  // Does every candidate's MBB contain a call? If so, then we might have a call
5075
226
  // in the range.
5076
226
  if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
5077
45
    // Check if the range contains a call. These require a save + restore of the
5078
45
    // link register.
5079
45
    bool ModStackToSaveLR = false;
5080
45
    if (std::any_of(FirstCand.front(), FirstCand.back(),
5081
107
                    [](const MachineInstr &MI) { return MI.isCall(); }))
5082
7
      ModStackToSaveLR = true;
5083
38
5084
38
    // Handle the last instruction separately. If this is a tail call, then the
5085
38
    // last instruction is a call. We don't want to save + restore in this case.
5086
38
    // However, it could be possible that the last instruction is a call without
5087
38
    // it being valid to tail call this sequence. We should consider this as
5088
38
    // well.
5089
38
    else if (FrameID != MachineOutlinerThunk &&
5090
38
             
FrameID != MachineOutlinerTailCall16
&&
FirstCand.back()->isCall()9
)
5091
1
      ModStackToSaveLR = true;
5092
45
5093
45
    if (ModStackToSaveLR) {
5094
8
      // We can't fix up the stack. Bail out.
5095
8
      if (!AllStackInstrsSafe) {
5096
0
        RepeatedSequenceLocs.clear();
5097
0
        return outliner::OutlinedFunction();
5098
0
      }
5099
8
5100
8
      // Save + restore LR.
5101
8
      NumBytesToCreateFrame += 8;
5102
8
    }
5103
45
  }
5104
226
5105
226
  return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
5106
226
                                    NumBytesToCreateFrame, FrameID);
5107
226
}
5108
5109
bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
5110
156
    MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
5111
156
  const Function &F = MF.getFunction();
5112
156
5113
156
  // Can F be deduplicated by the linker? If it can, don't outline from it.
5114
156
  if (!OutlineFromLinkOnceODRs && 
F.hasLinkOnceODRLinkage()152
)
5115
3
    return false;
5116
153
5117
153
  // Don't outline from functions with section markings; the program could
5118
153
  // expect that all the code is in the named section.
5119
153
  // FIXME: Allow outlining from multiple functions with the same section
5120
153
  // marking.
5121
153
  if (F.hasSection())
5122
4
    return false;
5123
149
5124
149
  // Outlining from functions with redzones is unsafe since the outliner may
5125
149
  // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
5126
149
  // outline from it.
5127
149
  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
5128
149
  if (!AFI || AFI->hasRedZone().getValueOr(true))
5129
1
    return false;
5130
148
5131
148
  // It's safe to outline from MF.
5132
148
  return true;
5133
148
}
5134
5135
bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
5136
208
                                              unsigned &Flags) const {
5137
208
  // Check if LR is available through all of the MBB. If it's not, then set
5138
208
  // a flag.
5139
208
  assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
5140
208
         "Suitable Machine Function for outlining must track liveness");
5141
208
  LiveRegUnits LRU(getRegisterInfo());
5142
208
5143
208
  std::for_each(MBB.rbegin(), MBB.rend(),
5144
1.57k
                [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
5145
208
5146
208
  // Check if each of the unsafe registers are available...
5147
208
  bool W16AvailableInBlock = LRU.available(AArch64::W16);
5148
208
  bool W17AvailableInBlock = LRU.available(AArch64::W17);
5149
208
  bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
5150
208
5151
208
  // If all of these are dead (and not live out), we know we don't have to check
5152
208
  // them later.
5153