Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file contains the Base ARM implementation of the TargetInstrInfo class.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "ARMBaseInstrInfo.h"
15
#include "ARMBaseRegisterInfo.h"
16
#include "ARMConstantPoolValue.h"
17
#include "ARMFeatures.h"
18
#include "ARMHazardRecognizer.h"
19
#include "ARMMachineFunctionInfo.h"
20
#include "ARMSubtarget.h"
21
#include "MCTargetDesc/ARMAddressingModes.h"
22
#include "MCTargetDesc/ARMBaseInfo.h"
23
#include "llvm/ADT/DenseMap.h"
24
#include "llvm/ADT/STLExtras.h"
25
#include "llvm/ADT/SmallSet.h"
26
#include "llvm/ADT/SmallVector.h"
27
#include "llvm/ADT/Triple.h"
28
#include "llvm/CodeGen/LiveVariables.h"
29
#include "llvm/CodeGen/MachineBasicBlock.h"
30
#include "llvm/CodeGen/MachineConstantPool.h"
31
#include "llvm/CodeGen/MachineFrameInfo.h"
32
#include "llvm/CodeGen/MachineFunction.h"
33
#include "llvm/CodeGen/MachineInstr.h"
34
#include "llvm/CodeGen/MachineInstrBuilder.h"
35
#include "llvm/CodeGen/MachineMemOperand.h"
36
#include "llvm/CodeGen/MachineOperand.h"
37
#include "llvm/CodeGen/MachineRegisterInfo.h"
38
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
39
#include "llvm/CodeGen/SelectionDAGNodes.h"
40
#include "llvm/CodeGen/TargetSchedule.h"
41
#include "llvm/IR/Attributes.h"
42
#include "llvm/IR/Constants.h"
43
#include "llvm/IR/DebugLoc.h"
44
#include "llvm/IR/Function.h"
45
#include "llvm/IR/GlobalValue.h"
46
#include "llvm/MC/MCAsmInfo.h"
47
#include "llvm/MC/MCInstrDesc.h"
48
#include "llvm/MC/MCInstrItineraries.h"
49
#include "llvm/Support/BranchProbability.h"
50
#include "llvm/Support/Casting.h"
51
#include "llvm/Support/CommandLine.h"
52
#include "llvm/Support/Compiler.h"
53
#include "llvm/Support/Debug.h"
54
#include "llvm/Support/ErrorHandling.h"
55
#include "llvm/Support/raw_ostream.h"
56
#include "llvm/Target/TargetInstrInfo.h"
57
#include "llvm/Target/TargetMachine.h"
58
#include "llvm/Target/TargetRegisterInfo.h"
59
#include <algorithm>
60
#include <cassert>
61
#include <cstdint>
62
#include <iterator>
63
#include <new>
64
#include <utility>
65
#include <vector>
66
67
using namespace llvm;
68
69
#define DEBUG_TYPE "arm-instrinfo"
70
71
#define GET_INSTRINFO_CTOR_DTOR
72
#include "ARMGenInstrInfo.inc"
73
74
static cl::opt<bool>
75
EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
76
               cl::desc("Enable ARM 2-addr to 3-addr conv"));
77
78
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
79
struct ARM_MLxEntry {
80
  uint16_t MLxOpc;     // MLA / MLS opcode
81
  uint16_t MulOpc;     // Expanded multiplication opcode
82
  uint16_t AddSubOpc;  // Expanded add / sub opcode
83
  bool NegAcc;         // True if the acc is negated before the add / sub.
84
  bool HasLane;        // True if instruction has an extra "lane" operand.
85
};
86
87
static const ARM_MLxEntry ARM_MLxTable[] = {
88
  // MLxOpc,          MulOpc,           AddSubOpc,       NegAcc, HasLane
89
  // fp scalar ops
90
  { ARM::VMLAS,       ARM::VMULS,       ARM::VADDS,      false,  false },
91
  { ARM::VMLSS,       ARM::VMULS,       ARM::VSUBS,      false,  false },
92
  { ARM::VMLAD,       ARM::VMULD,       ARM::VADDD,      false,  false },
93
  { ARM::VMLSD,       ARM::VMULD,       ARM::VSUBD,      false,  false },
94
  { ARM::VNMLAS,      ARM::VNMULS,      ARM::VSUBS,      true,   false },
95
  { ARM::VNMLSS,      ARM::VMULS,       ARM::VSUBS,      true,   false },
96
  { ARM::VNMLAD,      ARM::VNMULD,      ARM::VSUBD,      true,   false },
97
  { ARM::VNMLSD,      ARM::VMULD,       ARM::VSUBD,      true,   false },
98
99
  // fp SIMD ops
100
  { ARM::VMLAfd,      ARM::VMULfd,      ARM::VADDfd,     false,  false },
101
  { ARM::VMLSfd,      ARM::VMULfd,      ARM::VSUBfd,     false,  false },
102
  { ARM::VMLAfq,      ARM::VMULfq,      ARM::VADDfq,     false,  false },
103
  { ARM::VMLSfq,      ARM::VMULfq,      ARM::VSUBfq,     false,  false },
104
  { ARM::VMLAslfd,    ARM::VMULslfd,    ARM::VADDfd,     false,  true  },
105
  { ARM::VMLSslfd,    ARM::VMULslfd,    ARM::VSUBfd,     false,  true  },
106
  { ARM::VMLAslfq,    ARM::VMULslfq,    ARM::VADDfq,     false,  true  },
107
  { ARM::VMLSslfq,    ARM::VMULslfq,    ARM::VSUBfq,     false,  true  },
108
};
109
110
ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
111
  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
112
6.10k
    Subtarget(STI) {
113
103k
  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); 
i != e103k
;
++i97.6k
) {
114
97.6k
    if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
115
0
      llvm_unreachable("Duplicated entries?");
116
97.6k
    MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
117
97.6k
    MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
118
97.6k
  }
119
6.10k
}
120
121
// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
122
// currently defaults to no prepass hazard recognizer.
123
ScheduleHazardRecognizer *
124
ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
125
34.7k
                                               const ScheduleDAG *DAG) const {
126
34.7k
  if (
usePreRAHazardRecognizer()34.7k
) {
127
34.7k
    const InstrItineraryData *II =
128
34.7k
        static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
129
34.7k
    return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
130
34.7k
  }
131
0
  return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
132
0
}
133
134
ScheduleHazardRecognizer *ARMBaseInstrInfo::
135
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
136
12.5k
                                   const ScheduleDAG *DAG) const {
137
12.5k
  if (
Subtarget.isThumb2() || 12.5k
Subtarget.hasVFP2()6.04k
)
138
11.1k
    return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
139
1.46k
  return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
140
1.46k
}
141
142
MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
143
0
    MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
144
0
  // FIXME: Thumb2 support.
145
0
146
0
  if (!EnableARM3Addr)
147
0
    return nullptr;
148
0
149
0
  MachineFunction &MF = *MI.getParent()->getParent();
150
0
  uint64_t TSFlags = MI.getDesc().TSFlags;
151
0
  bool isPre = false;
152
0
  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
153
0
  default: return nullptr;
154
0
  case ARMII::IndexModePre:
155
0
    isPre = true;
156
0
    break;
157
0
  case ARMII::IndexModePost:
158
0
    break;
159
0
  }
160
0
161
0
  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
162
0
  // operation.
163
0
  unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
164
0
  if (MemOpc == 0)
165
0
    return nullptr;
166
0
167
0
  MachineInstr *UpdateMI = nullptr;
168
0
  MachineInstr *MemMI = nullptr;
169
0
  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
170
0
  const MCInstrDesc &MCID = MI.getDesc();
171
0
  unsigned NumOps = MCID.getNumOperands();
172
0
  bool isLoad = !MI.mayStore();
173
0
  const MachineOperand &WB = isLoad ? 
MI.getOperand(1)0
:
MI.getOperand(0)0
;
174
0
  const MachineOperand &Base = MI.getOperand(2);
175
0
  const MachineOperand &Offset = MI.getOperand(NumOps - 3);
176
0
  unsigned WBReg = WB.getReg();
177
0
  unsigned BaseReg = Base.getReg();
178
0
  unsigned OffReg = Offset.getReg();
179
0
  unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
180
0
  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
181
0
  switch (AddrMode) {
182
0
  
default: 0
llvm_unreachable0
("Unknown indexed op!");
183
0
  case ARMII::AddrMode2: {
184
0
    bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
185
0
    unsigned Amt = ARM_AM::getAM2Offset(OffImm);
186
0
    if (
OffReg == 00
) {
187
0
      if (ARM_AM::getSOImmVal(Amt) == -1)
188
0
        // Can't encode it in a so_imm operand. This transformation will
189
0
        // add more than 1 instruction. Abandon!
190
0
        return nullptr;
191
0
      UpdateMI = BuildMI(MF, MI.getDebugLoc(),
192
0
                         get(isSub ? 
ARM::SUBri0
:
ARM::ADDri0
), WBReg)
193
0
                     .addReg(BaseReg)
194
0
                     .addImm(Amt)
195
0
                     .add(predOps(Pred))
196
0
                     .add(condCodeOp());
197
0
    } else 
if (0
Amt != 00
) {
198
0
      ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
199
0
      unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
200
0
      UpdateMI = BuildMI(MF, MI.getDebugLoc(),
201
0
                         get(isSub ? 
ARM::SUBrsi0
:
ARM::ADDrsi0
), WBReg)
202
0
                     .addReg(BaseReg)
203
0
                     .addReg(OffReg)
204
0
                     .addReg(0)
205
0
                     .addImm(SOOpc)
206
0
                     .add(predOps(Pred))
207
0
                     .add(condCodeOp());
208
0
    } else
209
0
      UpdateMI = BuildMI(MF, MI.getDebugLoc(),
210
0
                         get(isSub ? 
ARM::SUBrr0
:
ARM::ADDrr0
), WBReg)
211
0
                     .addReg(BaseReg)
212
0
                     .addReg(OffReg)
213
0
                     .add(predOps(Pred))
214
0
                     .add(condCodeOp());
215
0
    break;
216
0
  }
217
0
  case ARMII::AddrMode3 : {
218
0
    bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
219
0
    unsigned Amt = ARM_AM::getAM3Offset(OffImm);
220
0
    if (OffReg == 0)
221
0
      // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
222
0
      UpdateMI = BuildMI(MF, MI.getDebugLoc(),
223
0
                         get(isSub ? 
ARM::SUBri0
:
ARM::ADDri0
), WBReg)
224
0
                     .addReg(BaseReg)
225
0
                     .addImm(Amt)
226
0
                     .add(predOps(Pred))
227
0
                     .add(condCodeOp());
228
0
    else
229
0
      UpdateMI = BuildMI(MF, MI.getDebugLoc(),
230
0
                         get(isSub ? 
ARM::SUBrr0
:
ARM::ADDrr0
), WBReg)
231
0
                     .addReg(BaseReg)
232
0
                     .addReg(OffReg)
233
0
                     .add(predOps(Pred))
234
0
                     .add(condCodeOp());
235
0
    break;
236
0
  }
237
0
  }
238
0
239
0
  std::vector<MachineInstr*> NewMIs;
240
0
  if (
isPre0
) {
241
0
    if (isLoad)
242
0
      MemMI =
243
0
          BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
244
0
              .addReg(WBReg)
245
0
              .addImm(0)
246
0
              .addImm(Pred);
247
0
    else
248
0
      MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
249
0
                  .addReg(MI.getOperand(1).getReg())
250
0
                  .addReg(WBReg)
251
0
                  .addReg(0)
252
0
                  .addImm(0)
253
0
                  .addImm(Pred);
254
0
    NewMIs.push_back(MemMI);
255
0
    NewMIs.push_back(UpdateMI);
256
0
  } else {
257
0
    if (isLoad)
258
0
      MemMI =
259
0
          BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
260
0
              .addReg(BaseReg)
261
0
              .addImm(0)
262
0
              .addImm(Pred);
263
0
    else
264
0
      MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
265
0
                  .addReg(MI.getOperand(1).getReg())
266
0
                  .addReg(BaseReg)
267
0
                  .addReg(0)
268
0
                  .addImm(0)
269
0
                  .addImm(Pred);
270
0
    if (WB.isDead())
271
0
      UpdateMI->getOperand(0).setIsDead();
272
0
    NewMIs.push_back(UpdateMI);
273
0
    NewMIs.push_back(MemMI);
274
0
  }
275
0
276
0
  // Transfer LiveVariables states, kill / dead info.
277
0
  if (
LV0
) {
278
0
    for (unsigned i = 0, e = MI.getNumOperands(); 
i != e0
;
++i0
) {
279
0
      MachineOperand &MO = MI.getOperand(i);
280
0
      if (
MO.isReg() && 0
TargetRegisterInfo::isVirtualRegister(MO.getReg())0
) {
281
0
        unsigned Reg = MO.getReg();
282
0
283
0
        LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
284
0
        if (
MO.isDef()0
) {
285
0
          MachineInstr *NewMI = (Reg == WBReg) ? 
UpdateMI0
:
MemMI0
;
286
0
          if (MO.isDead())
287
0
            LV->addVirtualRegisterDead(Reg, *NewMI);
288
0
        }
289
0
        if (
MO.isUse() && 0
MO.isKill()0
) {
290
0
          for (unsigned j = 0; 
j < 20
;
++j0
) {
291
0
            // Look at the two new MI's in reverse order.
292
0
            MachineInstr *NewMI = NewMIs[j];
293
0
            if (!NewMI->readsRegister(Reg))
294
0
              continue;
295
0
            LV->addVirtualRegisterKilled(Reg, *NewMI);
296
0
            if (VI.removeKill(MI))
297
0
              VI.Kills.push_back(NewMI);
298
0
            break;
299
0
          }
300
0
        }
301
0
      }
302
0
    }
303
0
  }
304
0
305
0
  MachineBasicBlock::iterator MBBI = MI.getIterator();
306
0
  MFI->insert(MBBI, NewMIs[1]);
307
0
  MFI->insert(MBBI, NewMIs[0]);
308
0
  return NewMIs[0];
309
0
}
310
311
// Branch analysis.
312
bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
313
                                     MachineBasicBlock *&TBB,
314
                                     MachineBasicBlock *&FBB,
315
                                     SmallVectorImpl<MachineOperand> &Cond,
316
1.73M
                                     bool AllowModify) const {
317
1.73M
  TBB = nullptr;
318
1.73M
  FBB = nullptr;
319
1.73M
320
1.73M
  MachineBasicBlock::iterator I = MBB.end();
321
1.73M
  if (I == MBB.begin())
322
11.8k
    return false; // Empty blocks are easy.
323
1.72M
  --I;
324
1.72M
325
1.72M
  // Walk backwards from the end of the basic block until the branch is
326
1.72M
  // analyzed or we give up.
327
2.91M
  while (
isPredicated(*I) || 2.91M
I->isTerminator()1.98M
||
I->isDebugValue()1.42M
) {
328
1.48M
    // Flag to be raised on unanalyzeable instructions. This is useful in cases
329
1.48M
    // where we want to clean up on the end of the basic block before we bail
330
1.48M
    // out.
331
1.48M
    bool CantAnalyze = false;
332
1.48M
333
1.48M
    // Skip over DEBUG values and predicated nonterminators.
334
1.73M
    while (
I->isDebugValue() || 1.73M
!I->isTerminator()1.72M
) {
335
279k
      if (I == MBB.begin())
336
36.5k
        return false;
337
242k
      --I;
338
242k
    }
339
1.48M
340
1.45M
    
if (1.45M
isIndirectBranchOpcode(I->getOpcode()) ||
341
1.45M
        
isJumpTableBranchOpcode(I->getOpcode())1.44M
) {
342
24.9k
      // Indirect branches and jump tables can't be analyzed, but we still want
343
24.9k
      // to clean up any instructions at the tail of the basic block.
344
24.9k
      CantAnalyze = true;
345
1.45M
    } else 
if (1.42M
isUncondBranchOpcode(I->getOpcode())1.42M
) {
346
321k
      TBB = I->getOperand(0).getMBB();
347
1.42M
    } else 
if (1.10M
isCondBranchOpcode(I->getOpcode())1.10M
) {
348
882k
      // Bail out if we encounter multiple conditional branches.
349
882k
      if (!Cond.empty())
350
6.97k
        return true;
351
875k
352
882k
      assert(!FBB && "FBB should have been null.");
353
875k
      FBB = TBB;
354
875k
      TBB = I->getOperand(0).getMBB();
355
875k
      Cond.push_back(I->getOperand(1));
356
875k
      Cond.push_back(I->getOperand(2));
357
1.10M
    } else 
if (221k
I->isReturn()221k
) {
358
217k
      // Returns can't be analyzed, but we should run cleanup.
359
217k
      CantAnalyze = !isPredicated(*I);
360
221k
    } else {
361
3.71k
      // We encountered other unrecognized terminator. Bail out immediately.
362
3.71k
      return true;
363
3.71k
    }
364
1.43M
365
1.43M
    // Cleanup code - to be run for unpredicated unconditional branches and
366
1.43M
    //                returns.
367
1.43M
    
if (1.43M
!isPredicated(*I) &&
368
557k
          (isUncondBranchOpcode(I->getOpcode()) ||
369
236k
           isIndirectBranchOpcode(I->getOpcode()) ||
370
235k
           isJumpTableBranchOpcode(I->getOpcode()) ||
371
1.43M
           
I->isReturn()211k
)) {
372
557k
      // Forget any previous condition branch information - it no longer applies.
373
557k
      Cond.clear();
374
557k
      FBB = nullptr;
375
557k
376
557k
      // If we can modify the function, delete everything below this
377
557k
      // unconditional branch.
378
557k
      if (
AllowModify557k
) {
379
339k
        MachineBasicBlock::iterator DI = std::next(I);
380
339k
        while (
DI != MBB.end()339k
) {
381
6
          MachineInstr &InstToDelete = *DI;
382
6
          ++DI;
383
6
          InstToDelete.eraseFromParent();
384
6
        }
385
339k
      }
386
557k
    }
387
1.43M
388
1.43M
    if (CantAnalyze)
389
236k
      return true;
390
1.20M
391
1.20M
    
if (1.20M
I == MBB.begin()1.20M
)
392
16.9k
      return false;
393
1.18M
394
1.18M
    --I;
395
1.18M
  }
396
1.72M
397
1.72M
  // We made it past the terminators without bailing out - we must have
398
1.72M
  // analyzed this branch successfully.
399
1.42M
  return false;
400
1.73M
}
401
402
unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,
403
180k
                                        int *BytesRemoved) const {
404
180k
  assert(!BytesRemoved && "code size not handled");
405
180k
406
180k
  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
407
180k
  if (I == MBB.end())
408
2
    return 0;
409
180k
410
180k
  
if (180k
!isUncondBranchOpcode(I->getOpcode()) &&
411
105k
      !isCondBranchOpcode(I->getOpcode()))
412
4.65k
    return 0;
413
175k
414
175k
  // Remove the branch.
415
175k
  I->eraseFromParent();
416
175k
417
175k
  I = MBB.end();
418
175k
419
175k
  if (
I == MBB.begin()175k
)
return 18.21k
;
420
167k
  --I;
421
167k
  if (!isCondBranchOpcode(I->getOpcode()))
422
134k
    return 1;
423
32.7k
424
32.7k
  // Remove the branch.
425
32.7k
  I->eraseFromParent();
426
32.7k
  return 2;
427
32.7k
}
428
429
unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
430
                                        MachineBasicBlock *TBB,
431
                                        MachineBasicBlock *FBB,
432
                                        ArrayRef<MachineOperand> Cond,
433
                                        const DebugLoc &DL,
434
177k
                                        int *BytesAdded) const {
435
177k
  assert(!BytesAdded && "code size not handled");
436
177k
  ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
437
177k
  int BOpc   = !AFI->isThumbFunction()
438
177k
    ? 
ARM::B3.04k
:
(AFI->isThumb2Function() ? 174k
ARM::t2B165k
:
ARM::tB9.76k
);
439
177k
  int BccOpc = !AFI->isThumbFunction()
440
177k
    ? 
ARM::Bcc3.04k
:
(AFI->isThumb2Function() ? 174k
ARM::t2Bcc165k
:
ARM::tBcc9.76k
);
441
3.04k
  bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
442
177k
443
177k
  // Shouldn't be a fall through.
444
177k
  assert(TBB && "insertBranch must not be told to insert a fallthrough");
445
177k
  assert((Cond.size() == 2 || Cond.size() == 0) &&
446
177k
         "ARM branch conditions have two components!");
447
177k
448
177k
  // For conditional branches, we use addOperand to preserve CPSR flags.
449
177k
450
177k
  if (
!FBB177k
) {
451
172k
    if (
Cond.empty()172k
) { // Unconditional branch?
452
47.3k
      if (isThumb)
453
46.7k
        BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
454
47.3k
      else
455
636
        BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
456
47.3k
    } else
457
125k
      BuildMI(&MBB, DL, get(BccOpc))
458
125k
          .addMBB(TBB)
459
125k
          .addImm(Cond[0].getImm())
460
125k
          .add(Cond[1]);
461
172k
    return 1;
462
172k
  }
463
5.42k
464
5.42k
  // Two-way conditional branch.
465
5.42k
  BuildMI(&MBB, DL, get(BccOpc))
466
5.42k
      .addMBB(TBB)
467
5.42k
      .addImm(Cond[0].getImm())
468
5.42k
      .add(Cond[1]);
469
5.42k
  if (isThumb)
470
5.35k
    BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
471
5.42k
  else
472
62
    BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
473
177k
  return 2;
474
177k
}
475
476
bool ARMBaseInstrInfo::
477
159k
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
478
159k
  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
479
159k
  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
480
159k
  return false;
481
159k
}
482
483
4.87M
bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const {
484
4.87M
  if (
MI.isBundle()4.87M
) {
485
13.1k
    MachineBasicBlock::const_instr_iterator I = MI.getIterator();
486
13.1k
    MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
487
26.3k
    while (
++I != E && 26.3k
I->isInsideBundle()26.3k
) {
488
26.3k
      int PIdx = I->findFirstPredOperandIdx();
489
26.3k
      if (
PIdx != -1 && 26.3k
I->getOperand(PIdx).getImm() != ARMCC::AL13.2k
)
490
13.1k
        return true;
491
26.3k
    }
492
28
    return false;
493
4.85M
  }
494
4.85M
495
4.85M
  int PIdx = MI.findFirstPredOperandIdx();
496
4.63M
  return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
497
4.87M
}
498
499
bool ARMBaseInstrInfo::PredicateInstruction(
500
5.20k
    MachineInstr &MI, ArrayRef<MachineOperand> Pred) const {
501
5.20k
  unsigned Opc = MI.getOpcode();
502
5.20k
  if (
isUncondBranchOpcode(Opc)5.20k
) {
503
0
    MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
504
0
    MachineInstrBuilder(*MI.getParent()->getParent(), MI)
505
0
      .addImm(Pred[0].getImm())
506
0
      .addReg(Pred[1].getReg());
507
0
    return true;
508
0
  }
509
5.20k
510
5.20k
  int PIdx = MI.findFirstPredOperandIdx();
511
5.20k
  if (
PIdx != -15.20k
) {
512
5.20k
    MachineOperand &PMO = MI.getOperand(PIdx);
513
5.20k
    PMO.setImm(Pred[0].getImm());
514
5.20k
    MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
515
5.20k
    return true;
516
5.20k
  }
517
0
  return false;
518
0
}
519
520
bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
521
1.20k
                                         ArrayRef<MachineOperand> Pred2) const {
522
1.20k
  if (
Pred1.size() > 2 || 1.20k
Pred2.size() > 21.20k
)
523
0
    return false;
524
1.20k
525
1.20k
  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
526
1.20k
  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
527
1.20k
  if (CC1 == CC2)
528
556
    return true;
529
646
530
646
  switch (CC1) {
531
414
  default:
532
414
    return false;
533
0
  case ARMCC::AL:
534
0
    return true;
535
147
  case ARMCC::HS:
536
147
    return CC2 == ARMCC::HI;
537
76
  case ARMCC::LS:
538
70
    return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
539
7
  case ARMCC::GE:
540
7
    return CC2 == ARMCC::GT;
541
2
  case ARMCC::LE:
542
2
    return CC2 == ARMCC::LT;
543
0
  }
544
0
}
545
546
bool ARMBaseInstrInfo::DefinesPredicate(
547
147k
    MachineInstr &MI, std::vector<MachineOperand> &Pred) const {
548
147k
  bool Found = false;
549
899k
  for (unsigned i = 0, e = MI.getNumOperands(); 
i != e899k
;
++i751k
) {
550
751k
    const MachineOperand &MO = MI.getOperand(i);
551
751k
    if (
(MO.isRegMask() && 751k
MO.clobbersPhysReg(ARM::CPSR)5.07k
) ||
552
751k
        
(MO.isReg() && 746k
MO.isDef()508k
&&
MO.getReg() == ARM::CPSR149k
)) {
553
25.3k
      Pred.push_back(MO);
554
25.3k
      Found = true;
555
25.3k
    }
556
751k
  }
557
147k
558
147k
  return Found;
559
147k
}
560
561
39
bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) {
562
39
  for (const auto &MO : MI.operands())
563
210
    
if (210
MO.isReg() && 210
MO.getReg() == ARM::CPSR153
&&
MO.isDef()41
&&
!MO.isDead()39
)
564
5
      return true;
565
34
  return false;
566
34
}
567
568
bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI,
569
0
                                        unsigned Op) const {
570
0
  const MachineOperand &Offset = MI.getOperand(Op + 1);
571
0
  return Offset.getReg() != 0;
572
0
}
573
574
// Load with negative register offset requires additional 1cyc and +I unit
575
// for Cortex A57
576
bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI,
577
0
                                             unsigned Op) const {
578
0
  const MachineOperand &Offset = MI.getOperand(Op + 1);
579
0
  const MachineOperand &Opc = MI.getOperand(Op + 2);
580
0
  assert(Opc.isImm());
581
0
  assert(Offset.isReg());
582
0
  int64_t OpcImm = Opc.getImm();
583
0
584
0
  bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub;
585
0
  return (isSub && Offset.getReg() != 0);
586
0
}
587
588
bool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI,
589
0
                                       unsigned Op) const {
590
0
  const MachineOperand &Opc = MI.getOperand(Op + 2);
591
0
  unsigned OffImm = Opc.getImm();
592
0
  return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
593
0
}
594
595
// Load, scaled register offset, not plus LSL2
596
bool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI,
597
3
                                                  unsigned Op) const {
598
3
  const MachineOperand &Opc = MI.getOperand(Op + 2);
599
3
  unsigned OffImm = Opc.getImm();
600
3
601
3
  bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add;
602
3
  unsigned Amt = ARM_AM::getAM2Offset(OffImm);
603
3
  ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm);
604
3
  if (
ShiftOpc == ARM_AM::no_shift3
)
return false0
; // not scaled
605
3
  
bool SimpleScaled = (isAdd && 3
ShiftOpc == ARM_AM::lsl3
&&
Amt == 23
);
606
3
  return !SimpleScaled;
607
3
}
608
609
// Minus reg for ldstso addr mode
610
bool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI,
611
3
                                        unsigned Op) const {
612
3
  unsigned OffImm = MI.getOperand(Op + 2).getImm();
613
3
  return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
614
3
}
615
616
// Load, scaled register offset
617
bool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI,
618
0
                                      unsigned Op) const {
619
0
  unsigned OffImm = MI.getOperand(Op + 2).getImm();
620
0
  return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
621
0
}
622
623
135k
static bool isEligibleForITBlock(const MachineInstr *MI) {
624
135k
  switch (MI->getOpcode()) {
625
135k
  default: return true;
626
39
  case ARM::tADC:   // ADC (register) T1
627
39
  case ARM::tADDi3: // ADD (immediate) T1
628
39
  case ARM::tADDi8: // ADD (immediate) T2
629
39
  case ARM::tADDrr: // ADD (register) T1
630
39
  case ARM::tAND:   // AND (register) T1
631
39
  case ARM::tASRri: // ASR (immediate) T1
632
39
  case ARM::tASRrr: // ASR (register) T1
633
39
  case ARM::tBIC:   // BIC (register) T1
634
39
  case ARM::tEOR:   // EOR (register) T1
635
39
  case ARM::tLSLri: // LSL (immediate) T1
636
39
  case ARM::tLSLrr: // LSL (register) T1
637
39
  case ARM::tLSRri: // LSR (immediate) T1
638
39
  case ARM::tLSRrr: // LSR (register) T1
639
39
  case ARM::tMUL:   // MUL T1
640
39
  case ARM::tMVN:   // MVN (register) T1
641
39
  case ARM::tORR:   // ORR (register) T1
642
39
  case ARM::tROR:   // ROR (register) T1
643
39
  case ARM::tRSB:   // RSB (immediate) T1
644
39
  case ARM::tSBC:   // SBC (register) T1
645
39
  case ARM::tSUBi3: // SUB (immediate) T1
646
39
  case ARM::tSUBi8: // SUB (immediate) T2
647
39
  case ARM::tSUBrr: // SUB (register) T1
648
39
    return !ARMBaseInstrInfo::isCPSRDefined(*MI);
649
0
  }
650
0
}
651
652
/// isPredicable - Return true if the specified instruction can be predicated.
653
/// By default, this returns true for every instruction with a
654
/// PredicateOperand.
655
147k
bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {
656
147k
  if (!MI.isPredicable())
657
12.1k
    return false;
658
135k
659
135k
  
if (135k
MI.isBundle()135k
)
660
26
    return false;
661
135k
662
135k
  
if (135k
!isEligibleForITBlock(&MI)135k
)
663
5
    return false;
664
135k
665
135k
  const ARMFunctionInfo *AFI =
666
135k
      MI.getParent()->getParent()->getInfo<ARMFunctionInfo>();
667
135k
668
135k
  // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
669
135k
  // In their ARM encoding, they can't be encoded in a conditional form.
670
135k
  if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
671
2.55k
    return false;
672
132k
673
132k
  
if (132k
AFI->isThumb2Function()132k
) {
674
115k
    if (getSubtarget().restrictIT())
675
700
      return isV8EligibleForIT(&MI);
676
132k
  }
677
132k
678
132k
  return true;
679
132k
}
680
681
namespace llvm {
682
683
64
template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
684
414
  for (unsigned i = 0, e = MI->getNumOperands(); 
i != e414
;
++i350
) {
685
350
    const MachineOperand &MO = MI->getOperand(i);
686
350
    if (
!MO.isReg() || 350
MO.isUndef()233
||
MO.isUse()233
)
687
222
      continue;
688
128
    
if (128
MO.getReg() != ARM::CPSR128
)
689
64
      continue;
690
64
    
if (64
!MO.isDead()64
)
691
0
      return false;
692
350
  }
693
64
  // all definitions of CPSR are dead
694
64
  return true;
695
64
}
696
697
} // end namespace llvm
698
699
/// GetInstSize - Return the size of the specified MachineInstr.
700
///
701
1.67M
unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
702
1.67M
  const MachineBasicBlock &MBB = *MI.getParent();
703
1.67M
  const MachineFunction *MF = MBB.getParent();
704
1.67M
  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
705
1.67M
706
1.67M
  const MCInstrDesc &MCID = MI.getDesc();
707
1.67M
  if (MCID.getSize())
708
1.21M
    return MCID.getSize();
709
456k
710
456k
  // If this machine instr is an inline asm, measure it.
711
456k
  
if (456k
MI.getOpcode() == ARM::INLINEASM456k
)
712
198k
    return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
713
257k
  unsigned Opc = MI.getOpcode();
714
257k
  switch (Opc) {
715
192k
  default:
716
192k
    // pseudo-instruction sizes are zero.
717
192k
    return 0;
718
0
  case TargetOpcode::BUNDLE:
719
0
    return getInstBundleLength(MI);
720
30.3k
  case ARM::MOVi16_ga_pcrel:
721
30.3k
  case ARM::MOVTi16_ga_pcrel:
722
30.3k
  case ARM::t2MOVi16_ga_pcrel:
723
30.3k
  case ARM::t2MOVTi16_ga_pcrel:
724
30.3k
    return 4;
725
7
  case ARM::MOVi32imm:
726
7
  case ARM::t2MOVi32imm:
727
7
    return 8;
728
35.2k
  case ARM::CONSTPOOL_ENTRY:
729
35.2k
  case ARM::JUMPTABLE_INSTS:
730
35.2k
  case ARM::JUMPTABLE_ADDRS:
731
35.2k
  case ARM::JUMPTABLE_TBB:
732
35.2k
  case ARM::JUMPTABLE_TBH:
733
35.2k
    // If this machine instr is a constant pool entry, its size is recorded as
734
35.2k
    // operand #2.
735
35.2k
    return MI.getOperand(2).getImm();
736
4
  case ARM::Int_eh_sjlj_longjmp:
737
4
    return 16;
738
2
  case ARM::tInt_eh_sjlj_longjmp:
739
2
    return 10;
740
3
  case ARM::tInt_WIN_eh_sjlj_longjmp:
741
3
    return 12;
742
7
  case ARM::Int_eh_sjlj_setjmp:
743
7
  case ARM::Int_eh_sjlj_setjmp_nofp:
744
7
    return 20;
745
13
  case ARM::tInt_eh_sjlj_setjmp:
746
13
  case ARM::t2Int_eh_sjlj_setjmp:
747
13
  case ARM::t2Int_eh_sjlj_setjmp_nofp:
748
13
    return 12;
749
26
  case ARM::SPACE:
750
26
    return MI.getOperand(1).getImm();
751
0
  }
752
0
}
753
754
0
unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
755
0
  unsigned Size = 0;
756
0
  MachineBasicBlock::const_instr_iterator I = MI.getIterator();
757
0
  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
758
0
  while (
++I != E && 0
I->isInsideBundle()0
) {
759
0
    assert(!I->isBundle() && "No nested bundle!");
760
0
    Size += getInstSizeInBytes(*I);
761
0
  }
762
0
  return Size;
763
0
}
764
765
void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB,
766
                                    MachineBasicBlock::iterator I,
767
                                    unsigned DestReg, bool KillSrc,
768
4
                                    const ARMSubtarget &Subtarget) const {
769
4
  unsigned Opc = Subtarget.isThumb()
770
2
                     ? 
(Subtarget.isMClass() ? 2
ARM::t2MRS_M1
:
ARM::t2MRS_AR1
)
771
2
                     : ARM::MRS;
772
4
773
4
  MachineInstrBuilder MIB =
774
4
      BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
775
4
776
4
  // There is only 1 A/R class MRS instruction, and it always refers to
777
4
  // APSR. However, there are lots of other possibilities on M-class cores.
778
4
  if (Subtarget.isMClass())
779
1
    MIB.addImm(0x800);
780
4
781
4
  MIB.add(predOps(ARMCC::AL))
782
4
     .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
783
4
}
784
785
void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
786
                                  MachineBasicBlock::iterator I,
787
                                  unsigned SrcReg, bool KillSrc,
788
4
                                  const ARMSubtarget &Subtarget) const {
789
4
  unsigned Opc = Subtarget.isThumb()
790
2
                     ? 
(Subtarget.isMClass() ? 2
ARM::t2MSR_M1
:
ARM::t2MSR_AR1
)
791
2
                     : ARM::MSR;
792
4
793
4
  MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
794
4
795
4
  if (Subtarget.isMClass())
796
1
    MIB.addImm(0x800);
797
4
  else
798
3
    MIB.addImm(8);
799
4
800
4
  MIB.addReg(SrcReg, getKillRegState(KillSrc))
801
4
     .add(predOps(ARMCC::AL))
802
4
     .addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
803
4
}
804
805
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
806
                                   MachineBasicBlock::iterator I,
807
                                   const DebugLoc &DL, unsigned DestReg,
808
8.85k
                                   unsigned SrcReg, bool KillSrc) const {
809
8.85k
  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
810
8.85k
  bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
811
8.85k
812
8.85k
  if (
GPRDest && 8.85k
GPRSrc4.84k
) {
813
4.58k
    BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
814
4.58k
        .addReg(SrcReg, getKillRegState(KillSrc))
815
4.58k
        .add(predOps(ARMCC::AL))
816
4.58k
        .add(condCodeOp());
817
4.58k
    return;
818
4.58k
  }
819
4.27k
820
4.27k
  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
821
4.27k
  bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
822
4.27k
823
4.27k
  unsigned Opc = 0;
824
4.27k
  if (
SPRDest && 4.27k
SPRSrc1.86k
)
825
1.64k
    Opc = ARM::VMOVS;
826
2.62k
  else 
if (2.62k
GPRDest && 2.62k
SPRSrc260
)
827
256
    Opc = ARM::VMOVRS;
828
2.36k
  else 
if (2.36k
SPRDest && 2.36k
GPRSrc215
)
829
215
    Opc = ARM::VMOVSR;
830
2.15k
  else 
if (2.15k
ARM::DPRRegClass.contains(DestReg, SrcReg) && 2.15k
!Subtarget.isFPOnlySP()1.68k
)
831
1.63k
    Opc = ARM::VMOVD;
832
516
  else 
if (516
ARM::QPRRegClass.contains(DestReg, SrcReg)516
)
833
450
    Opc = ARM::VORRq;
834
4.27k
835
4.27k
  if (
Opc4.27k
) {
836
4.20k
    MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
837
4.20k
    MIB.addReg(SrcReg, getKillRegState(KillSrc));
838
4.20k
    if (Opc == ARM::VORRq)
839
450
      MIB.addReg(SrcReg, getKillRegState(KillSrc));
840
4.20k
    MIB.add(predOps(ARMCC::AL));
841
4.20k
    return;
842
4.20k
  }
843
66
844
66
  // Handle register classes that require multiple instructions.
845
66
  unsigned BeginIdx = 0;
846
66
  unsigned SubRegs = 0;
847
66
  int Spacing = 1;
848
66
849
66
  // Use VORRq when possible.
850
66
  if (
ARM::QQPRRegClass.contains(DestReg, SrcReg)66
) {
851
0
    Opc = ARM::VORRq;
852
0
    BeginIdx = ARM::qsub_0;
853
0
    SubRegs = 2;
854
66
  } else 
if (66
ARM::QQQQPRRegClass.contains(DestReg, SrcReg)66
) {
855
5
    Opc = ARM::VORRq;
856
5
    BeginIdx = ARM::qsub_0;
857
5
    SubRegs = 4;
858
5
  // Fall back to VMOVD.
859
66
  } else 
if (61
ARM::DPairRegClass.contains(DestReg, SrcReg)61
) {
860
4
    Opc = ARM::VMOVD;
861
4
    BeginIdx = ARM::dsub_0;
862
4
    SubRegs = 2;
863
61
  } else 
if (57
ARM::DTripleRegClass.contains(DestReg, SrcReg)57
) {
864
0
    Opc = ARM::VMOVD;
865
0
    BeginIdx = ARM::dsub_0;
866
0
    SubRegs = 3;
867
57
  } else 
if (57
ARM::DQuadRegClass.contains(DestReg, SrcReg)57
) {
868
0
    Opc = ARM::VMOVD;
869
0
    BeginIdx = ARM::dsub_0;
870
0
    SubRegs = 4;
871
57
  } else 
if (57
ARM::GPRPairRegClass.contains(DestReg, SrcReg)57
) {
872
2
    Opc = Subtarget.isThumb2() ? 
ARM::tMOVr1
:
ARM::MOVr1
;
873
2
    BeginIdx = ARM::gsub_0;
874
2
    SubRegs = 2;
875
57
  } else 
if (55
ARM::DPairSpcRegClass.contains(DestReg, SrcReg)55
) {
876
0
    Opc = ARM::VMOVD;
877
0
    BeginIdx = ARM::dsub_0;
878
0
    SubRegs = 2;
879
0
    Spacing = 2;
880
55
  } else 
if (55
ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)55
) {
881
0
    Opc = ARM::VMOVD;
882
0
    BeginIdx = ARM::dsub_0;
883
0
    SubRegs = 3;
884
0
    Spacing = 2;
885
55
  } else 
if (55
ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)55
) {
886
0
    Opc = ARM::VMOVD;
887
0
    BeginIdx = ARM::dsub_0;
888
0
    SubRegs = 4;
889
0
    Spacing = 2;
890
55
  } else 
if (55
ARM::DPRRegClass.contains(DestReg, SrcReg) && 55
Subtarget.isFPOnlySP()47
) {
891
47
    Opc = ARM::VMOVS;
892
47
    BeginIdx = ARM::ssub_0;
893
47
    SubRegs = 2;
894
55
  } else 
if (8
SrcReg == ARM::CPSR8
) {
895
4
    copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
896
4
    return;
897
4
  } else 
if (4
DestReg == ARM::CPSR4
) {
898
4
    copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
899
4
    return;
900
4
  }
901
58
902
66
  assert(Opc && "Impossible reg-to-reg copy");
903
58
904
58
  const TargetRegisterInfo *TRI = &getRegisterInfo();
905
58
  MachineInstrBuilder Mov;
906
58
907
58
  // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
908
58
  if (
TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))58
) {
909
0
    BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
910
0
    Spacing = -Spacing;
911
0
  }
912
#ifndef NDEBUG
913
  SmallSet<unsigned, 4> DstRegs;
914
#endif
915
184
  for (unsigned i = 0; 
i != SubRegs184
;
++i126
) {
916
126
    unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
917
126
    unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
918
126
    assert(Dst && Src && "Bad sub-register");
919
#ifndef NDEBUG
920
    assert(!DstRegs.count(Src) && "destructive vector copy");
921
    DstRegs.insert(Dst);
922
#endif
923
    Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
924
126
    // VORR takes two source operands.
925
126
    if (Opc == ARM::VORRq)
926
20
      Mov.addReg(Src);
927
126
    Mov = Mov.add(predOps(ARMCC::AL));
928
126
    // MOVr can set CC.
929
126
    if (Opc == ARM::MOVr)
930
2
      Mov = Mov.add(condCodeOp());
931
126
  }
932
58
  // Add implicit super-register defs and kills to the last instruction.
933
58
  Mov->addRegisterDefined(DestReg, TRI);
934
58
  if (KillSrc)
935
21
    Mov->addRegisterKilled(SrcReg, TRI);
936
8.85k
}
937
938
const MachineInstrBuilder &
939
ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
940
                          unsigned SubIdx, unsigned State,
941
70
                          const TargetRegisterInfo *TRI) const {
942
70
  if (!SubIdx)
943
0
    return MIB.addReg(Reg, State);
944
70
945
70
  
if (70
TargetRegisterInfo::isPhysicalRegister(Reg)70
)
946
8
    return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
947
62
  return MIB.addReg(Reg, State, SubIdx);
948
62
}
949
950
void ARMBaseInstrInfo::
951
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
952
                    unsigned SrcReg, bool isKill, int FI,
953
                    const TargetRegisterClass *RC,
954
2.26k
                    const TargetRegisterInfo *TRI) const {
955
2.26k
  DebugLoc DL;
956
2.26k
  if (
I != MBB.end()2.26k
)
DL = I->getDebugLoc()2.23k
;
957
2.26k
  MachineFunction &MF = *MBB.getParent();
958
2.26k
  MachineFrameInfo &MFI = MF.getFrameInfo();
959
2.26k
  unsigned Align = MFI.getObjectAlignment(FI);
960
2.26k
961
2.26k
  MachineMemOperand *MMO = MF.getMachineMemOperand(
962
2.26k
      MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
963
2.26k
      MFI.getObjectSize(FI), Align);
964
2.26k
965
2.26k
  switch (TRI->getSpillSize(*RC)) {
966
1.67k
    case 4:
967
1.67k
      if (
ARM::GPRRegClass.hasSubClassEq(RC)1.67k
) {
968
1.54k
        BuildMI(MBB, I, DL, get(ARM::STRi12))
969
1.54k
            .addReg(SrcReg, getKillRegState(isKill))
970
1.54k
            .addFrameIndex(FI)
971
1.54k
            .addImm(0)
972
1.54k
            .addMemOperand(MMO)
973
1.54k
            .add(predOps(ARMCC::AL));
974
1.67k
      } else 
if (129
ARM::SPRRegClass.hasSubClassEq(RC)129
) {
975
129
        BuildMI(MBB, I, DL, get(ARM::VSTRS))
976
129
            .addReg(SrcReg, getKillRegState(isKill))
977
129
            .addFrameIndex(FI)
978
129
            .addImm(0)
979
129
            .addMemOperand(MMO)
980
129
            .add(predOps(ARMCC::AL));
981
129
      } else
982
0
        llvm_unreachable("Unknown reg class!");
983
1.67k
      break;
984
269
    case 8:
985
269
      if (
ARM::DPRRegClass.hasSubClassEq(RC)269
) {
986
262
        BuildMI(MBB, I, DL, get(ARM::VSTRD))
987
262
            .addReg(SrcReg, getKillRegState(isKill))
988
262
            .addFrameIndex(FI)
989
262
            .addImm(0)
990
262
            .addMemOperand(MMO)
991
262
            .add(predOps(ARMCC::AL));
992
269
      } else 
if (7
ARM::GPRPairRegClass.hasSubClassEq(RC)7
) {
993
7
        if (
Subtarget.hasV5TEOps()7
) {
994
5
          MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
995
5
          AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
996
5
          AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
997
5
          MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
998
5
             .add(predOps(ARMCC::AL));
999
7
        } else {
1000
2
          // Fallback to STM instruction, which has existed since the dawn of
1001
2
          // time.
1002
2
          MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STMIA))
1003
2
                                        .addFrameIndex(FI)
1004
2
                                        .addMemOperand(MMO)
1005
2
                                        .add(predOps(ARMCC::AL));
1006
2
          AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1007
2
          AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1008
2
        }
1009
7
      } else
1010
0
        llvm_unreachable("Unknown reg class!");
1011
269
      break;
1012
315
    case 16:
1013
315
      if (
ARM::DPairRegClass.hasSubClassEq(RC)315
) {
1014
315
        // Use aligned spills if the stack can be realigned.
1015
315
        if (
Align >= 16 && 315
getRegisterInfo().canRealignStack(MF)315
) {
1016
308
          BuildMI(MBB, I, DL, get(ARM::VST1q64))
1017
308
              .addFrameIndex(FI)
1018
308
              .addImm(16)
1019
308
              .addReg(SrcReg, getKillRegState(isKill))
1020
308
              .addMemOperand(MMO)
1021
308
              .add(predOps(ARMCC::AL));
1022
315
        } else {
1023
7
          BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
1024
7
              .addReg(SrcReg, getKillRegState(isKill))
1025
7
              .addFrameIndex(FI)
1026
7
              .addMemOperand(MMO)
1027
7
              .add(predOps(ARMCC::AL));
1028
7
        }
1029
315
      } else
1030
0
        llvm_unreachable("Unknown reg class!");
1031
315
      break;
1032
1
    case 24:
1033
1
      if (
ARM::DTripleRegClass.hasSubClassEq(RC)1
) {
1034
1
        // Use aligned spills if the stack can be realigned.
1035
1
        if (
Align >= 16 && 1
getRegisterInfo().canRealignStack(MF)0
) {
1036
0
          BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
1037
0
              .addFrameIndex(FI)
1038
0
              .addImm(16)
1039
0
              .addReg(SrcReg, getKillRegState(isKill))
1040
0
              .addMemOperand(MMO)
1041
0
              .add(predOps(ARMCC::AL));
1042
1
        } else {
1043
1
          MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
1044
1
                                        .addFrameIndex(FI)
1045
1
                                        .add(predOps(ARMCC::AL))
1046
1
                                        .addMemOperand(MMO);
1047
1
          MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1048
1
          MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1049
1
          AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1050
1
        }
1051
1
      } else
1052
0
        llvm_unreachable("Unknown reg class!");
1053
1
      break;
1054
0
    case 32:
1055
0
      if (
ARM::QQPRRegClass.hasSubClassEq(RC) || 0
ARM::DQuadRegClass.hasSubClassEq(RC)0
) {
1056
0
        if (
Align >= 16 && 0
getRegisterInfo().canRealignStack(MF)0
) {
1057
0
          // FIXME: It's possible to only store part of the QQ register if the
1058
0
          // spilled def has a sub-register index.
1059
0
          BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
1060
0
              .addFrameIndex(FI)
1061
0
              .addImm(16)
1062
0
              .addReg(SrcReg, getKillRegState(isKill))
1063
0
              .addMemOperand(MMO)
1064
0
              .add(predOps(ARMCC::AL));
1065
0
        } else {
1066
0
          MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
1067
0
                                        .addFrameIndex(FI)
1068
0
                                        .add(predOps(ARMCC::AL))
1069
0
                                        .addMemOperand(MMO);
1070
0
          MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1071
0
          MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1072
0
          MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1073
0
                AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1074
0
        }
1075
0
      } else
1076
0
        llvm_unreachable("Unknown reg class!");
1077
0
      break;
1078
2
    case 64:
1079
2
      if (
ARM::QQQQPRRegClass.hasSubClassEq(RC)2
) {
1080
2
        MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
1081
2
                                      .addFrameIndex(FI)
1082
2
                                      .add(predOps(ARMCC::AL))
1083
2
                                      .addMemOperand(MMO);
1084
2
        MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1085
2
        MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1086
2
        MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1087
2
        MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1088
2
        MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1089
2
        MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1090
2
        MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1091
2
              AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1092
2
      } else
1093
0
        llvm_unreachable("Unknown reg class!");
1094
2
      break;
1095
0
    default:
1096
0
      llvm_unreachable("Unknown reg class!");
1097
2.26k
  }
1098
2.26k
}
1099
1100
unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1101
83.0k
                                              int &FrameIndex) const {
1102
83.0k
  switch (MI.getOpcode()) {
1103
79.4k
  default: break;
1104
217
  case ARM::STRrs:
1105
217
  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1106
217
    if (
MI.getOperand(1).isFI() && 217
MI.getOperand(2).isReg()0
&&
1107
217
        
MI.getOperand(3).isImm()0
&&
MI.getOperand(2).getReg() == 00
&&
1108
217
        
MI.getOperand(3).getImm() == 00
) {
1109
0
      FrameIndex = MI.getOperand(1).getIndex();
1110
0
      return MI.getOperand(0).getReg();
1111
0
    }
1112
217
    break;
1113
3.37k
  case ARM::STRi12:
1114
3.37k
  case ARM::t2STRi12:
1115
3.37k
  case ARM::tSTRspi:
1116
3.37k
  case ARM::VSTRD:
1117
3.37k
  case ARM::VSTRS:
1118
3.37k
    if (
MI.getOperand(1).isFI() && 3.37k
MI.getOperand(2).isImm()1.30k
&&
1119
3.37k
        
MI.getOperand(2).getImm() == 01.30k
) {
1120
1.08k
      FrameIndex = MI.getOperand(1).getIndex();
1121
1.08k
      return MI.getOperand(0).getReg();
1122
1.08k
    }
1123
2.28k
    break;
1124
75
  case ARM::VST1q64:
1125
75
  case ARM::VST1d64TPseudo:
1126
75
  case ARM::VST1d64QPseudo:
1127
75
    if (
MI.getOperand(0).isFI() && 75
MI.getOperand(2).getSubReg() == 00
) {
1128
0
      FrameIndex = MI.getOperand(0).getIndex();
1129
0
      return MI.getOperand(2).getReg();
1130
0
    }
1131
75
    break;
1132
0
  case ARM::VSTMQIA:
1133
0
    if (
MI.getOperand(1).isFI() && 0
MI.getOperand(0).getSubReg() == 00
) {
1134
0
      FrameIndex = MI.getOperand(1).getIndex();
1135
0
      return MI.getOperand(0).getReg();
1136
0
    }
1137
0
    break;
1138
82.0k
  }
1139
82.0k
1140
82.0k
  return 0;
1141
82.0k
}
1142
1143
unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
1144
116k
                                                    int &FrameIndex) const {
1145
116k
  const MachineMemOperand *Dummy;
1146
13.6k
  return MI.mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
1147
116k
}
1148
1149
void ARMBaseInstrInfo::
1150
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
1151
                     unsigned DestReg, int FI,
1152
                     const TargetRegisterClass *RC,
1153
1.99k
                     const TargetRegisterInfo *TRI) const {
1154
1.99k
  DebugLoc DL;
1155
1.99k
  if (
I != MBB.end()1.99k
)
DL = I->getDebugLoc()1.99k
;
1156
1.99k
  MachineFunction &MF = *MBB.getParent();
1157
1.99k
  MachineFrameInfo &MFI = MF.getFrameInfo();
1158
1.99k
  unsigned Align = MFI.getObjectAlignment(FI);
1159
1.99k
  MachineMemOperand *MMO = MF.getMachineMemOperand(
1160
1.99k
      MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
1161
1.99k
      MFI.getObjectSize(FI), Align);
1162
1.99k
1163
1.99k
  switch (TRI->getSpillSize(*RC)) {
1164
1.20k
  case 4:
1165
1.20k
    if (
ARM::GPRRegClass.hasSubClassEq(RC)1.20k
) {
1166
1.18k
      BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1167
1.18k
          .addFrameIndex(FI)
1168
1.18k
          .addImm(0)
1169
1.18k
          .addMemOperand(MMO)
1170
1.18k
          .add(predOps(ARMCC::AL));
1171
1.18k
1172
1.20k
    } else 
if (28
ARM::SPRRegClass.hasSubClassEq(RC)28
) {
1173
28
      BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1174
28
          .addFrameIndex(FI)
1175
28
          .addImm(0)
1176
28
          .addMemOperand(MMO)
1177
28
          .add(predOps(ARMCC::AL));
1178
28
    } else
1179
0
      llvm_unreachable("Unknown reg class!");
1180
1.20k
    break;
1181
479
  case 8:
1182
479
    if (
ARM::DPRRegClass.hasSubClassEq(RC)479
) {
1183
475
      BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1184
475
          .addFrameIndex(FI)
1185
475
          .addImm(0)
1186
475
          .addMemOperand(MMO)
1187
475
          .add(predOps(ARMCC::AL));
1188
479
    } else 
if (4
ARM::GPRPairRegClass.hasSubClassEq(RC)4
) {
1189
4
      MachineInstrBuilder MIB;
1190
4
1191
4
      if (
Subtarget.hasV5TEOps()4
) {
1192
2
        MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1193
2
        AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1194
2
        AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1195
2
        MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1196
2
           .add(predOps(ARMCC::AL));
1197
4
      } else {
1198
2
        // Fallback to LDM instruction, which has existed since the dawn of
1199
2
        // time.
1200
2
        MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1201
2
                  .addFrameIndex(FI)
1202
2
                  .addMemOperand(MMO)
1203
2
                  .add(predOps(ARMCC::AL));
1204
2
        MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1205
2
        MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1206
2
      }
1207
4
1208
4
      if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1209
0
        MIB.addReg(DestReg, RegState::ImplicitDefine);
1210
4
    } else
1211
0
      llvm_unreachable("Unknown reg class!");
1212
479
    break;
1213
300
  case 16:
1214
300
    if (
ARM::DPairRegClass.hasSubClassEq(RC)300
) {
1215
300
      if (
Align >= 16 && 300
getRegisterInfo().canRealignStack(MF)300
) {
1216
298
        BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1217
298
            .addFrameIndex(FI)
1218
298
            .addImm(16)
1219
298
            .addMemOperand(MMO)
1220
298
            .add(predOps(ARMCC::AL));
1221
300
      } else {
1222
2
        BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1223
2
            .addFrameIndex(FI)
1224
2
            .addMemOperand(MMO)
1225
2
            .add(predOps(ARMCC::AL));
1226
2
      }
1227
300
    } else
1228
0
      llvm_unreachable("Unknown reg class!");
1229
300
    break;
1230
1
  case 24:
1231
1
    if (
ARM::DTripleRegClass.hasSubClassEq(RC)1
) {
1232
1
      if (
Align >= 16 && 1
getRegisterInfo().canRealignStack(MF)0
) {
1233
0
        BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1234
0
            .addFrameIndex(FI)
1235
0
            .addImm(16)
1236
0
            .addMemOperand(MMO)
1237
0
            .add(predOps(ARMCC::AL));
1238
1
      } else {
1239
1
        MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1240
1
                                      .addFrameIndex(FI)
1241
1
                                      .addMemOperand(MMO)
1242
1
                                      .add(predOps(ARMCC::AL));
1243
1
        MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1244
1
        MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1245
1
        MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1246
1
        if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1247
0
          MIB.addReg(DestReg, RegState::ImplicitDefine);
1248
1
      }
1249
1
    } else
1250
0
      llvm_unreachable("Unknown reg class!");
1251
1
    break;
1252
0
   case 32:
1253
0
    if (
ARM::QQPRRegClass.hasSubClassEq(RC) || 0
ARM::DQuadRegClass.hasSubClassEq(RC)0
) {
1254
0
      if (
Align >= 16 && 0
getRegisterInfo().canRealignStack(MF)0
) {
1255
0
        BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1256
0
            .addFrameIndex(FI)
1257
0
            .addImm(16)
1258
0
            .addMemOperand(MMO)
1259
0
            .add(predOps(ARMCC::AL));
1260
0
      } else {
1261
0
        MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1262
0
                                      .addFrameIndex(FI)
1263
0
                                      .add(predOps(ARMCC::AL))
1264
0
                                      .addMemOperand(MMO);
1265
0
        MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1266
0
        MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1267
0
        MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1268
0
        MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1269
0
        if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1270
0
          MIB.addReg(DestReg, RegState::ImplicitDefine);
1271
0
      }
1272
0
    } else
1273
0
      llvm_unreachable("Unknown reg class!");
1274
0
    break;
1275
2
  case 64:
1276
2
    if (
ARM::QQQQPRRegClass.hasSubClassEq(RC)2
) {
1277
2
      MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1278
2
                                    .addFrameIndex(FI)
1279
2
                                    .add(predOps(ARMCC::AL))
1280
2
                                    .addMemOperand(MMO);
1281
2
      MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1282
2
      MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1283
2
      MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1284
2
      MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1285
2
      MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1286
2
      MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1287
2
      MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1288
2
      MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1289
2
      if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1290
0
        MIB.addReg(DestReg, RegState::ImplicitDefine);
1291
2
    } else
1292
0
      llvm_unreachable("Unknown reg class!");
1293
2
    break;
1294
0
  default:
1295
0
    llvm_unreachable("Unknown regclass!");
1296
1.99k
  }
1297
1.99k
}
1298
1299
unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1300
217k
                                               int &FrameIndex) const {
1301
217k
  switch (MI.getOpcode()) {
1302
189k
  default: break;
1303
1.59k
  case ARM::LDRrs:
1304
1.59k
  case ARM::t2LDRs:  // FIXME: don't use t2LDRs to access frame.
1305
1.59k
    if (
MI.getOperand(1).isFI() && 1.59k
MI.getOperand(2).isReg()0
&&
1306
1.59k
        
MI.getOperand(3).isImm()0
&&
MI.getOperand(2).getReg() == 00
&&
1307
1.59k
        
MI.getOperand(3).getImm() == 00
) {
1308
0
      FrameIndex = MI.getOperand(1).getIndex();
1309
0
      return MI.getOperand(0).getReg();
1310
0
    }
1311
1.59k
    break;
1312
26.8k
  case ARM::LDRi12:
1313
26.8k
  case ARM::t2LDRi12:
1314
26.8k
  case ARM::tLDRspi:
1315
26.8k
  case ARM::VLDRD:
1316
26.8k
  case ARM::VLDRS:
1317
26.8k
    if (
MI.getOperand(1).isFI() && 26.8k
MI.getOperand(2).isImm()13.8k
&&
1318
26.8k
        
MI.getOperand(2).getImm() == 013.8k
) {
1319
11.4k
      FrameIndex = MI.getOperand(1).getIndex();
1320
11.4k
      return MI.getOperand(0).getReg();
1321
11.4k
    }
1322
15.3k
    break;
1323
134
  case ARM::VLD1q64:
1324
134
  case ARM::VLD1d64TPseudo:
1325
134
  case ARM::VLD1d64QPseudo:
1326
134
    if (
MI.getOperand(1).isFI() && 134
MI.getOperand(0).getSubReg() == 02
) {
1327
2
      FrameIndex = MI.getOperand(1).getIndex();
1328
2
      return MI.getOperand(0).getReg();
1329
2
    }
1330
132
    break;
1331
0
  case ARM::VLDMQIA:
1332
0
    if (
MI.getOperand(1).isFI() && 0
MI.getOperand(0).getSubReg() == 00
) {
1333
0
      FrameIndex = MI.getOperand(1).getIndex();
1334
0
      return MI.getOperand(0).getReg();
1335
0
    }
1336
0
    break;
1337
206k
  }
1338
206k
1339
206k
  return 0;
1340
206k
}
1341
1342
unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
1343
119k
                                                     int &FrameIndex) const {
1344
119k
  const MachineMemOperand *Dummy;
1345
18.4k
  return MI.mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
1346
119k
}
1347
1348
/// \brief Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1349
/// depending on whether the result is used.
1350
56
void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1351
56
  bool isThumb1 = Subtarget.isThumb1Only();
1352
56
  bool isThumb2 = Subtarget.isThumb2();
1353
56
  const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1354
56
1355
56
  DebugLoc dl = MI->getDebugLoc();
1356
56
  MachineBasicBlock *BB = MI->getParent();
1357
56
1358
56
  MachineInstrBuilder LDM, STM;
1359
56
  if (
isThumb1 || 56
!MI->getOperand(1).isDead()28
) {
1360
7
    LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1361
34
                                                 : 
isThumb1 ? 34
ARM::tLDMIA_UPD28
1362
6
                                                            : ARM::LDMIA_UPD))
1363
41
              .add(MI->getOperand(1));
1364
56
  } else {
1365
15
    LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? 
ARM::t2LDMIA7
:
ARM::LDMIA8
));
1366
15
  }
1367
56
1368
56
  if (
isThumb1 || 56
!MI->getOperand(0).isDead()28
) {
1369
7
    STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1370
34
                                                 : 
isThumb1 ? 34
ARM::tSTMIA_UPD28
1371
6
                                                            : ARM::STMIA_UPD))
1372
41
              .add(MI->getOperand(0));
1373
56
  } else {
1374
15
    STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? 
ARM::t2STMIA7
:
ARM::STMIA8
));
1375
15
  }
1376
56
1377
56
  LDM.add(MI->getOperand(3)).add(predOps(ARMCC::AL));
1378
56
  STM.add(MI->getOperand(2)).add(predOps(ARMCC::AL));
1379
56
1380
56
  // Sort the scratch registers into ascending order.
1381
56
  const TargetRegisterInfo &TRI = getRegisterInfo();
1382
56
  SmallVector<unsigned, 6> ScratchRegs;
1383
285
  for(unsigned I = 5; 
I < MI->getNumOperands()285
;
++I229
)
1384
229
    ScratchRegs.push_back(MI->getOperand(I).getReg());
1385
56
  std::sort(ScratchRegs.begin(), ScratchRegs.end(),
1386
56
            [&TRI](const unsigned &Reg1,
1387
227
                   const unsigned &Reg2) -> bool {
1388
227
              return TRI.getEncodingValue(Reg1) <
1389
227
                     TRI.getEncodingValue(Reg2);
1390
227
            });
1391
56
1392
229
  for (const auto &Reg : ScratchRegs) {
1393
229
    LDM.addReg(Reg, RegState::Define);
1394
229
    STM.addReg(Reg, RegState::Kill);
1395
229
  }
1396
56
1397
56
  BB->erase(MI);
1398
56
}
1399
1400
111k
bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1401
111k
  if (
MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD111k
) {
1402
156
    assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
1403
156
           "LOAD_STACK_GUARD currently supported only for MachO.");
1404
156
    expandLoadStackGuard(MI);
1405
156
    MI.getParent()->erase(MI);
1406
156
    return true;
1407
156
  }
1408
111k
1409
111k
  
if (111k
MI.getOpcode() == ARM::MEMCPY111k
) {
1410
56
    expandMEMCPY(MI);
1411
56
    return true;
1412
56
  }
1413
111k
1414
111k
  // This hook gets to expand COPY instructions before they become
1415
111k
  // copyPhysReg() calls.  Look for VMOVS instructions that can legally be
1416
111k
  // widened to VMOVD.  We prefer the VMOVD when possible because it may be
1417
111k
  // changed into a VORR that can go down the NEON pipeline.
1418
111k
  
if (111k
!MI.isCopy() || 111k
Subtarget.dontWidenVMOVS()34.8k
||
Subtarget.isFPOnlySP()34.8k
)
1419
79.1k
    return false;
1420
32.4k
1421
32.4k
  // Look for a copy between even S-registers.  That is where we keep floats
1422
32.4k
  // when using NEON v2f32 instructions for f32 arithmetic.
1423
32.4k
  unsigned DstRegS = MI.getOperand(0).getReg();
1424
32.4k
  unsigned SrcRegS = MI.getOperand(1).getReg();
1425
32.4k
  if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1426
30.7k
    return false;
1427
1.69k
1428
1.69k
  const TargetRegisterInfo *TRI = &getRegisterInfo();
1429
1.69k
  unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1430
1.69k
                                              &ARM::DPRRegClass);
1431
1.69k
  unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1432
1.69k
                                              &ARM::DPRRegClass);
1433
1.69k
  if (
!DstRegD || 1.69k
!SrcRegD1.42k
)
1434
397
    return false;
1435
1.29k
1436
1.29k
  // We want to widen this into a DstRegD = VMOVD SrcRegD copy.  This is only
1437
1.29k
  // legal if the COPY already defines the full DstRegD, and it isn't a
1438
1.29k
  // sub-register insertion.
1439
1.29k
  
if (1.29k
!MI.definesRegister(DstRegD, TRI) || 1.29k
MI.readsRegister(DstRegD, TRI)111
)
1440
1.19k
    return false;
1441
105
1442
105
  // A dead copy shouldn't show up here, but reject it just in case.
1443
105
  
if (105
MI.getOperand(0).isDead()105
)
1444
0
    return false;
1445
105
1446
105
  // All clear, widen the COPY.
1447
105
  
DEBUG105
(dbgs() << "widening: " << MI);
1448
105
  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1449
105
1450
105
  // Get rid of the old <imp-def> of DstRegD.  Leave it if it defines a Q-reg
1451
105
  // or some other super-register.
1452
105
  int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
1453
105
  if (ImpDefIdx != -1)
1454
80
    MI.RemoveOperand(ImpDefIdx);
1455
105
1456
105
  // Change the opcode and operands.
1457
105
  MI.setDesc(get(ARM::VMOVD));
1458
105
  MI.getOperand(0).setReg(DstRegD);
1459
105
  MI.getOperand(1).setReg(SrcRegD);
1460
105
  MIB.add(predOps(ARMCC::AL));
1461
105
1462
105
  // We are now reading SrcRegD instead of SrcRegS.  This may upset the
1463
105
  // register scavenger and machine verifier, so we need to indicate that we
1464
105
  // are reading an undefined value from SrcRegD, but a proper value from
1465
105
  // SrcRegS.
1466
105
  MI.getOperand(1).setIsUndef();
1467
105
  MIB.addReg(SrcRegS, RegState::Implicit);
1468
105
1469
105
  // SrcRegD may actually contain an unrelated value in the ssub_1
1470
105
  // sub-register.  Don't kill it.  Only kill the ssub_0 sub-register.
1471
105
  if (
MI.getOperand(1).isKill()105
) {
1472
25
    MI.getOperand(1).setIsKill(false);
1473
25
    MI.addRegisterKilled(SrcRegS, TRI, true);
1474
25
  }
1475
105
1476
105
  DEBUG(dbgs() << "replaced by: " << MI);
1477
111k
  return true;
1478
111k
}
1479
1480
/// Create a copy of a const pool value. Update CPI to the new index and return
1481
/// the label UID.
1482
0
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1483
0
  MachineConstantPool *MCP = MF.getConstantPool();
1484
0
  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1485
0
1486
0
  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1487
0
  assert(MCPE.isMachineConstantPoolEntry() &&
1488
0
         "Expecting a machine constantpool entry!");
1489
0
  ARMConstantPoolValue *ACPV =
1490
0
    static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1491
0
1492
0
  unsigned PCLabelId = AFI->createPICLabelUId();
1493
0
  ARMConstantPoolValue *NewCPV = nullptr;
1494
0
1495
0
  // FIXME: The below assumes PIC relocation model and that the function
1496
0
  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1497
0
  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1498
0
  // instructions, so that's probably OK, but is PIC always correct when
1499
0
  // we get here?
1500
0
  if (ACPV->isGlobalValue())
1501
0
    NewCPV = ARMConstantPoolConstant::Create(
1502
0
        cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1503
0
        4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1504
0
  else 
if (0
ACPV->isExtSymbol()0
)
1505
0
    NewCPV = ARMConstantPoolSymbol::
1506
0
      Create(MF.getFunction()->getContext(),
1507
0
             cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1508
0
  else 
if (0
ACPV->isBlockAddress()0
)
1509
0
    NewCPV = ARMConstantPoolConstant::
1510
0
      Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1511
0
             ARMCP::CPBlockAddress, 4);
1512
0
  else 
if (0
ACPV->isLSDA()0
)
1513
0
    NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId,
1514
0
                                             ARMCP::CPLSDA, 4);
1515
0
  else 
if (0
ACPV->isMachineBasicBlock()0
)
1516
0
    NewCPV = ARMConstantPoolMBB::
1517
0
      Create(MF.getFunction()->getContext(),
1518
0
             cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1519
0
  else
1520
0
    llvm_unreachable("Unexpected ARM constantpool value type!!");
1521
0
  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
1522
0
  return PCLabelId;
1523
0
}
1524
1525
void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
1526
                                     MachineBasicBlock::iterator I,
1527
                                     unsigned DestReg, unsigned SubIdx,
1528
                                     const MachineInstr &Orig,
1529
12.8k
                                     const TargetRegisterInfo &TRI) const {
1530
12.8k
  unsigned Opcode = Orig.getOpcode();
1531
12.8k
  switch (Opcode) {
1532
12.8k
  default: {
1533
12.8k
    MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
1534
12.8k
    MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1535
12.8k
    MBB.insert(I, MI);
1536
12.8k
    break;
1537
12.8k
  }
1538
0
  case ARM::tLDRpci_pic:
1539
0
  case ARM::t2LDRpci_pic: {
1540
0
    MachineFunction &MF = *MBB.getParent();
1541
0
    unsigned CPI = Orig.getOperand(1).getIndex();
1542
0
    unsigned PCLabelId = duplicateCPV(MF, CPI);
1543
0
    MachineInstrBuilder MIB =
1544
0
        BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1545
0
            .addConstantPoolIndex(CPI)
1546
0
            .addImm(PCLabelId);
1547
0
    MIB->setMemRefs(Orig.memoperands_begin(), Orig.memoperands_end());
1548
0
    break;
1549
12.8k
  }
1550
12.8k
  }
1551
12.8k
}
1552
1553
MachineInstr &
1554
ARMBaseInstrInfo::duplicate(MachineBasicBlock &MBB,
1555
    MachineBasicBlock::iterator InsertBefore,
1556
9.22k
    const MachineInstr &Orig) const {
1557
9.22k
  MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1558
9.22k
  MachineBasicBlock::instr_iterator I = Cloned.getIterator();
1559
9.31k
  for (;;) {
1560
9.31k
    switch (I->getOpcode()) {
1561
0
    case ARM::tLDRpci_pic:
1562
0
    case ARM::t2LDRpci_pic: {
1563
0
      MachineFunction &MF = *MBB.getParent();
1564
0
      unsigned CPI = I->getOperand(1).getIndex();
1565
0
      unsigned PCLabelId = duplicateCPV(MF, CPI);
1566
0
      I->getOperand(1).setIndex(CPI);
1567
0
      I->getOperand(2).setImm(PCLabelId);
1568
0
      break;
1569
9.31k
    }
1570
9.31k
    }
1571
9.31k
    
if (9.31k
!I->isBundledWithSucc()9.31k
)
1572
9.22k
      break;
1573
90
    ++I;
1574
90
  }
1575
9.22k
  return Cloned;
1576
9.22k
}
1577
1578
bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0,
1579
                                        const MachineInstr &MI1,
1580
6.09k
                                        const MachineRegisterInfo *MRI) const {
1581
6.09k
  unsigned Opcode = MI0.getOpcode();
1582
6.09k
  if (Opcode == ARM::t2LDRpci ||
1583
6.09k
      Opcode == ARM::t2LDRpci_pic ||
1584
6.09k
      Opcode == ARM::tLDRpci ||
1585
6.08k
      Opcode == ARM::tLDRpci_pic ||
1586
6.08k
      Opcode == ARM::LDRLIT_ga_pcrel ||
1587
6.08k
      Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1588
6.08k
      Opcode == ARM::tLDRLIT_ga_pcrel ||
1589
6.07k
      Opcode == ARM::MOV_ga_pcrel ||
1590
6.07k
      Opcode == ARM::MOV_ga_pcrel_ldr ||
1591
6.09k
      
Opcode == ARM::t2MOV_ga_pcrel6.06k
) {
1592
3.55k
    if (MI1.getOpcode() != Opcode)
1593
0
      return false;
1594
3.55k
    
if (3.55k
MI0.getNumOperands() != MI1.getNumOperands()3.55k
)
1595
0
      return false;
1596
3.55k
1597
3.55k
    const MachineOperand &MO0 = MI0.getOperand(1);
1598
3.55k
    const MachineOperand &MO1 = MI1.getOperand(1);
1599
3.55k
    if (MO0.getOffset() != MO1.getOffset())
1600
0
      return false;
1601
3.55k
1602
3.55k
    
if (3.55k
Opcode == ARM::LDRLIT_ga_pcrel ||
1603
3.55k
        Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1604
3.55k
        Opcode == ARM::tLDRLIT_ga_pcrel ||
1605
3.54k
        Opcode == ARM::MOV_ga_pcrel ||
1606
3.54k
        Opcode == ARM::MOV_ga_pcrel_ldr ||
1607
3.54k
        Opcode == ARM::t2MOV_ga_pcrel)
1608
3.55k
      // Ignore the PC labels.
1609
3.55k
      return MO0.getGlobal() == MO1.getGlobal();
1610
7
1611
7
    const MachineFunction *MF = MI0.getParent()->getParent();
1612
7
    const MachineConstantPool *MCP = MF->getConstantPool();
1613
7
    int CPI0 = MO0.getIndex();
1614
7
    int CPI1 = MO1.getIndex();
1615
7
    const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1616
7
    const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1617
7
    bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1618
7
    bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1619
7
    if (
isARMCP0 && 7
isARMCP11
) {
1620
1
      ARMConstantPoolValue *ACPV0 =
1621
1
        static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1622
1
      ARMConstantPoolValue *ACPV1 =
1623
1
        static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1624
1
      return ACPV0->hasSameValue(ACPV1);
1625
6
    } else 
if (6
!isARMCP0 && 6
!isARMCP16
) {
1626
6
      return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1627
6
    }
1628
0
    return false;
1629
2.53k
  } else 
if (2.53k
Opcode == ARM::PICLDR2.53k
) {
1630
0
    if (MI1.getOpcode() != Opcode)
1631
0
      return false;
1632
0
    
if (0
MI0.getNumOperands() != MI1.getNumOperands()0
)
1633
0
      return false;
1634
0
1635
0
    unsigned Addr0 = MI0.getOperand(1).getReg();
1636
0
    unsigned Addr1 = MI1.getOperand(1).getReg();
1637
0
    if (
Addr0 != Addr10
) {
1638
0
      if (!MRI ||
1639
0
          !TargetRegisterInfo::isVirtualRegister(Addr0) ||
1640
0
          !TargetRegisterInfo::isVirtualRegister(Addr1))
1641
0
        return false;
1642
0
1643
0
      // This assumes SSA form.
1644
0
      MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1645
0
      MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1646
0
      // Check if the loaded value, e.g. a constantpool of a global address, are
1647
0
      // the same.
1648
0
      if (!produceSameValue(*Def0, *Def1, MRI))
1649
0
        return false;
1650
0
    }
1651
0
1652
0
    
for (unsigned i = 3, e = MI0.getNumOperands(); 0
i != e0
;
++i0
) {
1653
0
      // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
1654
0
      const MachineOperand &MO0 = MI0.getOperand(i);
1655
0
      const MachineOperand &MO1 = MI1.getOperand(i);
1656
0
      if (!MO0.isIdenticalTo(MO1))
1657
0
        return false;
1658
0
    }
1659
0
    return true;
1660
2.53k
  }
1661
2.53k
1662
2.53k
  return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
1663
2.53k
}
1664
1665
/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1666
/// determine if two loads are loading from the same base address. It should
1667
/// only return true if the base pointers are the same and the only differences
1668
/// between the two addresses is the offset. It also returns the offsets by
1669
/// reference.
1670
///
1671
/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1672
/// is permanently disabled.
1673
bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
1674
                                               int64_t &Offset1,
1675
228k
                                               int64_t &Offset2) const {
1676
228k
  // Don't worry about Thumb: just ARM and Thumb2.
1677
228k
  if (
Subtarget.isThumb1Only()228k
)
return false23.4k
;
1678
205k
1679
205k
  
if (205k
!Load1->isMachineOpcode() || 205k
!Load2->isMachineOpcode()205k
)
1680
87.0k
    return false;
1681
118k
1682
118k
  switch (Load1->getMachineOpcode()) {
1683
24.4k
  default:
1684
24.4k
    return false;
1685
93.8k
  case ARM::LDRi12:
1686
93.8k
  case ARM::LDRBi12:
1687
93.8k
  case ARM::LDRD:
1688
93.8k
  case ARM::LDRH:
1689
93.8k
  case ARM::LDRSB:
1690
93.8k
  case ARM::LDRSH:
1691
93.8k
  case ARM::VLDRD:
1692
93.8k
  case ARM::VLDRS:
1693
93.8k
  case ARM::t2LDRi8:
1694
93.8k
  case ARM::t2LDRBi8:
1695
93.8k
  case ARM::t2LDRDi8:
1696
93.8k
  case ARM::t2LDRSHi8:
1697
93.8k
  case ARM::t2LDRi12:
1698
93.8k
  case ARM::t2LDRBi12:
1699
93.8k
  case ARM::t2LDRSHi12:
1700
93.8k
    break;
1701
93.8k
  }
1702
93.8k
1703
93.8k
  switch (Load2->getMachineOpcode()) {
1704
12.2k
  default:
1705
12.2k
    return false;
1706
81.6k
  case ARM::LDRi12:
1707
81.6k
  case ARM::LDRBi12:
1708
81.6k
  case ARM::LDRD:
1709
81.6k
  case ARM::LDRH:
1710
81.6k
  case ARM::LDRSB:
1711
81.6k
  case ARM::LDRSH:
1712
81.6k
  case ARM::VLDRD:
1713
81.6k
  case ARM::VLDRS:
1714
81.6k
  case ARM::t2LDRi8:
1715
81.6k
  case ARM::t2LDRBi8:
1716
81.6k
  case ARM::t2LDRSHi8:
1717
81.6k
  case ARM::t2LDRi12:
1718
81.6k
  case ARM::t2LDRBi12:
1719
81.6k
  case ARM::t2LDRSHi12:
1720
81.6k
    break;
1721
81.6k
  }
1722
81.6k
1723
81.6k
  // Check if base addresses and chain operands match.
1724
81.6k
  
if (81.6k
Load1->getOperand(0) != Load2->getOperand(0) ||
1725
19.0k
      Load1->getOperand(4) != Load2->getOperand(4))
1726
62.5k
    return false;
1727
19.0k
1728
19.0k
  // Index should be Reg0.
1729
19.0k
  
if (19.0k
Load1->getOperand(3) != Load2->getOperand(3)19.0k
)
1730
0
    return false;
1731
19.0k
1732
19.0k
  // Determine the offsets.
1733
19.0k
  
if (19.0k
isa<ConstantSDNode>(Load1->getOperand(1)) &&
1734
19.0k
      
isa<ConstantSDNode>(Load2->getOperand(1))18.8k
) {
1735
18.8k
    Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1736
18.8k
    Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1737
18.8k
    return true;
1738
18.8k
  }
1739
132
1740
132
  return false;
1741
132
}
1742
1743
/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1744
/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1745
/// be scheduled togther. On some targets if two loads are loading from
1746
/// addresses in the same cache line, it's better if they are scheduled
1747
/// together. This function takes two integers that represent the load offsets
1748
/// from the common base address. It returns true if it decides it's desirable
1749
/// to schedule the two loads together. "NumLoads" is the number of loads that
1750
/// have already been scheduled after Load1.
1751
///
1752
/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1753
/// is permanently disabled.
1754
bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
1755
                                               int64_t Offset1, int64_t Offset2,
1756
5.61k
                                               unsigned NumLoads) const {
1757
5.61k
  // Don't worry about Thumb: just ARM and Thumb2.
1758
5.61k
  if (
Subtarget.isThumb1Only()5.61k
)
return false0
;
1759
5.61k
1760
5.61k
  assert(Offset2 > Offset1);
1761
5.61k
1762
5.61k
  if ((Offset2 - Offset1) / 8 > 64)
1763
0
    return false;
1764
5.61k
1765
5.61k
  // Check if the machine opcodes are different. If they are different
1766
5.61k
  // then we consider them to not be of the same base address,
1767
5.61k
  // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1768
5.61k
  // In this case, they are considered to be the same because they are different
1769
5.61k
  // encoding forms of the same basic instruction.
1770
5.61k
  
if (5.61k
(Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1771
125
      !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1772
3
         Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1773
122
        (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1774
122
         Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1775
122
    return false;  // FIXME: overly conservative?
1776
5.49k
1777
5.49k
  // Four loads in a row should be sufficient.
1778
5.49k
  
if (5.49k
NumLoads >= 35.49k
)
1779
641
    return false;
1780
4.85k
1781
4.85k
  return true;
1782
4.85k
}
1783
1784
bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
1785
                                            const MachineBasicBlock *MBB,
1786
325k
                                            const MachineFunction &MF) const {
1787
325k
  // Debug info is never a scheduling boundary. It's necessary to be explicit
1788
325k
  // due to the special treatment of IT instructions below, otherwise a
1789
325k
  // dbg_value followed by an IT will result in the IT instruction being
1790
325k
  // considered a scheduling hazard, which is wrong. It should be the actual
1791
325k
  // instruction preceding the dbg_value instruction(s), just like it is
1792
325k
  // when debug info is not present.
1793
325k
  if (MI.isDebugValue())
1794
116
    return false;
1795
325k
1796
325k
  // Terminators and labels can't be scheduled around.
1797
325k
  
if (325k
MI.isTerminator() || 325k
MI.isPosition()279k
)
1798
72.1k
    return true;
1799
253k
1800
253k
  // Treat the start of the IT block as a scheduling boundary, but schedule
1801
253k
  // t2IT along with all instructions following it.
1802
253k
  // FIXME: This is a big hammer. But the alternative is to add all potential
1803
253k
  // true and anti dependencies to IT block instructions as implicit operands
1804
253k
  // to the t2IT instruction. The added compile time and complexity does not
1805
253k
  // seem worth it.
1806
253k
  MachineBasicBlock::const_iterator I = MI;
1807
253k
  // Make sure to skip any dbg_value instructions
1808
253k
  while (
++I != MBB->end() && 253k
I->isDebugValue()243k
)
1809
42
    ;
1810
253k
  if (
I != MBB->end() && 253k
I->getOpcode() == ARM::t2IT243k
)
1811
0
    return true;
1812
253k
1813
253k
  // Don't attempt to schedule around any instruction that defines
1814
253k
  // a stack-oriented pointer, as it's unlikely to be profitable. This
1815
253k
  // saves compile time, because it doesn't require every single
1816
253k
  // stack slot reference to depend on the instruction that does the
1817
253k
  // modification.
1818
253k
  // Calls don't actually change the stack pointer, even if they have imp-defs.
1819
253k
  // No ARM calling conventions change the stack pointer. (X86 calling
1820
253k
  // conventions sometimes do).
1821
253k
  
if (253k
!MI.isCall() && 253k
MI.definesRegister(ARM::SP)253k
)
1822
20.2k
    return true;
1823
232k
1824
232k
  return false;
1825
232k
}
1826
1827
bool ARMBaseInstrInfo::
1828
isProfitableToIfCvt(MachineBasicBlock &MBB,
1829
                    unsigned NumCycles, unsigned ExtraPredCycles,
1830
11.0k
                    BranchProbability Probability) const {
1831
11.0k
  if (!NumCycles)
1832
0
    return false;
1833
11.0k
1834
11.0k
  // If we are optimizing for size, see if the branch in the predecessor can be
1835
11.0k
  // lowered to cbn?z by the constant island lowering pass, and return false if
1836
11.0k
  // so. This results in a shorter instruction sequence.
1837
11.0k
  
if (11.0k
MBB.getParent()->getFunction()->optForSize()11.0k
) {
1838
1.92k
    MachineBasicBlock *Pred = *MBB.pred_begin();
1839
1.92k
    if (
!Pred->empty()1.92k
) {
1840
1.92k
      MachineInstr *LastMI = &*Pred->rbegin();
1841
1.92k
      if (
LastMI->getOpcode() == ARM::t2Bcc1.92k
) {
1842
1.90k
        MachineBasicBlock::iterator CmpMI = LastMI;
1843
1.90k
        if (
CmpMI != Pred->begin()1.90k
) {
1844
1.84k
          --CmpMI;
1845
1.84k
          if (CmpMI->getOpcode() == ARM::tCMPi8 ||
1846
1.84k
              
CmpMI->getOpcode() == ARM::t2CMPri1.81k
) {
1847
983
            unsigned Reg = CmpMI->getOperand(0).getReg();
1848
983
            unsigned PredReg = 0;
1849
983
            ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg);
1850
983
            if (
P == ARMCC::AL && 983
CmpMI->getOperand(1).getImm() == 0981
&&
1851
643
                isARMLowRegister(Reg))
1852
567
              return false;
1853
10.4k
          }
1854
1.84k
        }
1855
1.90k
      }
1856
1.92k
    }
1857
1.92k
  }
1858
10.4k
  return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
1859
10.4k
                             MBB, 0, 0, Probability);
1860
10.4k
}
1861
1862
bool ARMBaseInstrInfo::
1863
isProfitableToIfCvt(MachineBasicBlock &TBB,
1864
                    unsigned TCycles, unsigned TExtra,
1865
                    MachineBasicBlock &FBB,
1866
                    unsigned FCycles, unsigned FExtra,
1867
10.8k
                    BranchProbability Probability) const {
1868
10.8k
  if (!TCycles)
1869
0
    return false;
1870
10.8k
1871
10.8k
  // Attempt to estimate the relative costs of predication versus branching.
1872
10.8k
  // Here we scale up each component of UnpredCost to avoid precision issue when
1873
10.8k
  // scaling TCycles/FCycles by Probability.
1874
10.8k
  const unsigned ScalingUpFactor = 1024;
1875
10.8k
1876
10.8k
  unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
1877
10.8k
  unsigned UnpredCost;
1878
10.8k
  if (
!Subtarget.hasBranchPredictor()10.8k
) {
1879
822
    // When we don't have a branch predictor it's always cheaper to not take a
1880
822
    // branch than take it, so we have to take that into account.
1881
822
    unsigned NotTakenBranchCost = 1;
1882
822
    unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
1883
822
    unsigned TUnpredCycles, FUnpredCycles;
1884
822
    if (
!FCycles822
) {
1885
767
      // Triangle: TBB is the fallthrough
1886
767
      TUnpredCycles = TCycles + NotTakenBranchCost;
1887
767
      FUnpredCycles = TakenBranchCost;
1888
822
    } else {
1889
55
      // Diamond: TBB is the block that is branched to, FBB is the fallthrough
1890
55
      TUnpredCycles = TCycles + TakenBranchCost;
1891
55
      FUnpredCycles = FCycles + NotTakenBranchCost;
1892
55
      // The branch at the end of FBB will disappear when it's predicated, so
1893
55
      // discount it from PredCost.
1894
55
      PredCost -= 1 * ScalingUpFactor;
1895
55
    }
1896
822
    // The total cost is the cost of each path scaled by their probabilites
1897
822
    unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
1898
822
    unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
1899
822
    UnpredCost = TUnpredCost + FUnpredCost;
1900
822
    // When predicating assume that the first IT can be folded away but later
1901
822
    // ones cost one cycle each
1902
822
    if (
Subtarget.isThumb2() && 822
TCycles + FCycles > 4822
) {
1903
168
      PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
1904
168
    }
1905
10.8k
  } else {
1906
10.0k
    unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
1907
10.0k
    unsigned FUnpredCost =
1908
10.0k
      Probability.getCompl().scale(FCycles * ScalingUpFactor);
1909
10.0k
    UnpredCost = TUnpredCost + FUnpredCost;
1910
10.0k
    UnpredCost += 1 * ScalingUpFactor; // The branch itself
1911
10.0k
    UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
1912
10.0k
  }
1913
10.8k
1914
10.8k
  return PredCost <= UnpredCost;
1915
10.8k
}
1916
1917
bool
1918
ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
1919
150
                                            MachineBasicBlock &FMBB) const {
1920
150
  // Reduce false anti-dependencies to let the target's out-of-order execution
1921
150
  // engine do its thing.
1922
150
  return Subtarget.isProfitableToUnpredicate();
1923
150
}
1924
1925
/// getInstrPredicate - If instruction is predicated, returns its predicate
1926
/// condition, otherwise returns AL. It also returns the condition code
1927
/// register by reference.
1928
ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI,
1929
457k
                                         unsigned &PredReg) {
1930
457k
  int PIdx = MI.findFirstPredOperandIdx();
1931
457k
  if (
PIdx == -1457k
) {
1932
60.5k
    PredReg = 0;
1933
60.5k
    return ARMCC::AL;
1934
60.5k
  }
1935
396k
1936
396k
  PredReg = MI.getOperand(PIdx+1).getReg();
1937
396k
  return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
1938
396k
}
1939
1940
0
unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
1941
0
  if (Opc == ARM::B)
1942
0
    return ARM::Bcc;
1943
0
  
if (0
Opc == ARM::tB0
)
1944
0
    return ARM::tBcc;
1945
0
  
if (0
Opc == ARM::t2B0
)
1946
0
    return ARM::t2Bcc;
1947
0
1948
0
  
llvm_unreachable0
("Unknown unconditional branch opcode!");
1949
0
}
1950
1951
MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,
1952
                                                       bool NewMI,
1953
                                                       unsigned OpIdx1,
1954
36.3k
                                                       unsigned OpIdx2) const {
1955
36.3k
  switch (MI.getOpcode()) {
1956
4.88k
  case ARM::MOVCCr:
1957
4.88k
  case ARM::t2MOVCCr: {
1958
4.88k
    // MOVCC can be commuted by inverting the condition.
1959
4.88k
    unsigned PredReg = 0;
1960
4.88k
    ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
1961
4.88k
    // MOVCC AL can't be inverted. Shouldn't happen.
1962
4.88k
    if (
CC == ARMCC::AL || 4.88k
PredReg != ARM::CPSR4.88k
)
1963
0
      return nullptr;
1964
4.88k
    MachineInstr *CommutedMI =
1965
4.88k
        TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1966
4.88k
    if (!CommutedMI)
1967
0
      return nullptr;
1968
4.88k
    // After swapping the MOVCC operands, also invert the condition.
1969
4.88k
    CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
1970
4.88k
        .setImm(ARMCC::getOppositeCondition(CC));
1971
4.88k
    return CommutedMI;
1972
4.88k
  }
1973
31.4k
  }
1974
31.4k
  return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1975
31.4k
}
1976
1977
/// Identify instructions that can be folded into a MOVCC instruction, and
1978
/// return the defining instruction.
1979
static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
1980
                                      const MachineRegisterInfo &MRI,
1981
3.56k
                                      const TargetInstrInfo *TII) {
1982
3.56k
  if (!TargetRegisterInfo::isVirtualRegister(Reg))
1983
0
    return nullptr;
1984
3.56k
  
if (3.56k
!MRI.hasOneNonDBGUse(Reg)3.56k
)
1985
1.75k
    return nullptr;
1986
1.80k
  MachineInstr *MI = MRI.getVRegDef(Reg);
1987
1.80k
  if (!MI)
1988
0
    return nullptr;
1989
1.80k
  // MI is folded into the MOVCC by predicating it.
1990
1.80k
  
if (1.80k
!MI->isPredicable()1.80k
)
1991
245
    return nullptr;
1992
1.56k
  // Check if MI has any non-dead defs or physreg uses. This also detects
1993
1.56k
  // predicated instructions which will be reading CPSR.
1994
8.10k
  
for (unsigned i = 1, e = MI->getNumOperands(); 1.56k
i != e8.10k
;
++i6.54k
) {
1995
6.85k
    const MachineOperand &MO = MI->getOperand(i);
1996
6.85k
    // Reject frame index operands, PEI can't handle the predicated pseudos.
1997
6.85k
    if (
MO.isFI() || 6.85k
MO.isCPI()6.83k
||
MO.isJTI()6.80k
)
1998
51
      return nullptr;
1999
6.80k
    
if (6.80k
!MO.isReg()6.80k
)
2000
2.05k
      continue;
2001
4.74k
    // MI can't have any tied operands, that would conflict with predication.
2002
4.74k
    
if (4.74k
MO.isTied()4.74k
)
2003
91
      return nullptr;
2004
4.65k
    
if (4.65k
TargetRegisterInfo::isPhysicalRegister(MO.getReg())4.65k
)
2005
149
      return nullptr;
2006
4.50k
    
if (4.50k
MO.isDef() && 4.50k
!MO.isDead()24
)
2007
24
      return nullptr;
2008
6.85k
  }
2009
1.24k
  bool DontMoveAcrossStores = true;
2010
1.24k
  if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
2011
18
    return nullptr;
2012
1.23k
  return MI;
2013
1.23k
}
2014
2015
bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI,
2016
                                     SmallVectorImpl<MachineOperand> &Cond,
2017
                                     unsigned &TrueOp, unsigned &FalseOp,
2018
2.27k
                                     bool &Optimizable) const {
2019
2.27k
  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2020
2.27k
         "Unknown select instruction");
2021
2.27k
  // MOVCC operands:
2022
2.27k
  // 0: Def.
2023
2.27k
  // 1: True use.
2024
2.27k
  // 2: False use.
2025
2.27k
  // 3: Condition code.
2026
2.27k
  // 4: CPSR use.
2027
2.27k
  TrueOp = 1;
2028
2.27k
  FalseOp = 2;
2029
2.27k
  Cond.push_back(MI.getOperand(3));
2030
2.27k
  Cond.push_back(MI.getOperand(4));
2031
2.27k
  // We can always fold a def.
2032
2.27k
  Optimizable = true;
2033
2.27k
  return false;
2034
2.27k
}
2035
2036
MachineInstr *
2037
ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI,
2038
                                 SmallPtrSetImpl<MachineInstr *> &SeenMIs,
2039
2.27k
                                 bool PreferFalse) const {
2040
2.27k
  assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2041
2.27k
         "Unknown select instruction");
2042
2.27k
  MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2043
2.27k
  MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2044
2.27k
  bool Invert = !DefMI;
2045
2.27k
  if (!DefMI)
2046
1.29k
    DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2047
2.27k
  if (!DefMI)
2048
1.03k
    return nullptr;
2049
1.23k
2050
1.23k
  // Find new register class to use.
2051
1.23k
  
MachineOperand FalseReg = MI.getOperand(Invert ? 1.23k
2253
:
1978
);
2052
1.23k
  unsigned DestReg = MI.getOperand(0).getReg();
2053
1.23k
  const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
2054
1.23k
  if (!MRI.constrainRegClass(DestReg, PreviousClass))
2055
0
    return nullptr;
2056
1.23k
2057
1.23k
  // Create a new predicated version of DefMI.
2058
1.23k
  // Rfalse is the first use.
2059
1.23k
  MachineInstrBuilder NewMI =
2060
1.23k
      BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2061
1.23k
2062
1.23k
  // Copy all the DefMI operands, excluding its (null) predicate.
2063
1.23k
  const MCInstrDesc &DefDesc = DefMI->getDesc();
2064
1.23k
  for (unsigned i = 1, e = DefDesc.getNumOperands();
2065
3.45k
       
i != e && 3.45k
!DefDesc.OpInfo[i].isPredicate()3.45k
;
++i2.22k
)
2066
2.22k
    NewMI.add(DefMI->getOperand(i));
2067
1.23k
2068
1.23k
  unsigned CondCode = MI.getOperand(3).getImm();
2069
1.23k
  if (Invert)
2070
253
    NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
2071
1.23k
  else
2072
978
    NewMI.addImm(CondCode);
2073
1.23k
  NewMI.add(MI.getOperand(4));
2074
1.23k
2075
1.23k
  // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2076
1.23k
  if (NewMI->hasOptionalDef())
2077
1.11k
    NewMI.add(condCodeOp());
2078
1.23k
2079
1.23k
  // The output register value when the predicate is false is an implicit
2080
1.23k
  // register operand tied to the first def.
2081
1.23k
  // The tie makes the register allocator ensure the FalseReg is allocated the
2082
1.23k
  // same register as operand 0.
2083
1.23k
  FalseReg.setImplicit();
2084
1.23k
  NewMI.add(FalseReg);
2085
1.23k
  NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2086
1.23k
2087
1.23k
  // Update SeenMIs set: register newly created MI and erase removed DefMI.
2088
1.23k
  SeenMIs.insert(NewMI);
2089
1.23k
  SeenMIs.erase(DefMI);
2090
1.23k
2091
1.23k
  // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2092
1.23k
  // DefMI would be invalid when tranferred inside the loop.  Checking for a
2093
1.23k
  // loop is expensive, but at least remove kill flags if they are in different
2094
1.23k
  // BBs.
2095
1.23k
  if (DefMI->getParent() != MI.getParent())
2096
51
    NewMI->clearKillInfo();
2097
2.27k
2098
2.27k
  // The caller will erase MI, but not DefMI.
2099
2.27k
  DefMI->eraseFromParent();
2100
2.27k
  return NewMI;
2101
2.27k
}
2102
2103
/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2104
/// instruction is encoded with an 'S' bit is determined by the optional CPSR
2105
/// def operand.
2106
///
2107
/// This will go away once we can teach tblgen how to set the optional CPSR def
2108
/// operand itself.
2109
struct AddSubFlagsOpcodePair {
2110
  uint16_t PseudoOpc;
2111
  uint16_t MachineOpc;
2112
};
2113
2114
static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
2115
  {ARM::ADDSri, ARM::ADDri},
2116
  {ARM::ADDSrr, ARM::ADDrr},
2117
  {ARM::ADDSrsi, ARM::ADDrsi},
2118
  {ARM::ADDSrsr, ARM::ADDrsr},
2119
2120
  {ARM::SUBSri, ARM::SUBri},
2121
  {ARM::SUBSrr, ARM::SUBrr},
2122
  {ARM::SUBSrsi, ARM::SUBrsi},
2123
  {ARM::SUBSrsr, ARM::SUBrsr},
2124
2125
  {ARM::RSBSri, ARM::RSBri},
2126
  {ARM::RSBSrsi, ARM::RSBrsi},
2127
  {ARM::RSBSrsr, ARM::RSBrsr},
2128
2129
  {ARM::tADDSi3, ARM::tADDi3},
2130
  {ARM::tADDSi8, ARM::tADDi8},
2131
  {ARM::tADDSrr, ARM::tADDrr},
2132
  {ARM::tADCS, ARM::tADC},
2133
2134
  {ARM::tSUBSi3, ARM::tSUBi3},
2135
  {ARM::tSUBSi8, ARM::tSUBi8},
2136
  {ARM::tSUBSrr, ARM::tSUBrr},
2137
  {ARM::tSBCS, ARM::tSBC},
2138
2139
  {ARM::t2ADDSri, ARM::t2ADDri},
2140
  {ARM::t2ADDSrr, ARM::t2ADDrr},
2141
  {ARM::t2ADDSrs, ARM::t2ADDrs},
2142
2143
  {ARM::t2SUBSri, ARM::t2SUBri},
2144
  {ARM::t2SUBSrr, ARM::t2SUBrr},
2145
  {ARM::t2SUBSrs, ARM::t2SUBrs},
2146
2147
  {ARM::t2RSBSri, ARM::t2RSBri},
2148
  {ARM::t2RSBSrs, ARM::t2RSBrs},
2149
};
2150
2151
1.20M
unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2152
33.8M
  for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); 
i != e33.8M
;
++i32.6M
)
2153
32.6M
    
if (32.6M
OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc32.6M
)
2154
2.82k
      return AddSubFlagsOpcodeMap[i].MachineOpc;
2155
1.20M
  return 0;
2156
1.20M
}
2157
2158
void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
2159
                                   MachineBasicBlock::iterator &MBBI,
2160
                                   const DebugLoc &dl, unsigned DestReg,
2161
                                   unsigned BaseReg, int NumBytes,
2162
                                   ARMCC::CondCodes Pred, unsigned PredReg,
2163
                                   const ARMBaseInstrInfo &TII,
2164
2.80k
                                   unsigned MIFlags) {
2165
2.80k
  if (
NumBytes == 0 && 2.80k
DestReg != BaseReg301
) {
2166
301
    BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2167
301
        .addReg(BaseReg, RegState::Kill)
2168
301
        .add(predOps(Pred, PredReg))
2169
301
        .add(condCodeOp())
2170
301
        .setMIFlags(MIFlags);
2171
301
    return;
2172
301
  }
2173
2.50k
2174
2.50k
  bool isSub = NumBytes < 0;
2175
2.50k
  if (
isSub2.50k
)
NumBytes = -NumBytes1.15k
;
2176
2.50k
2177
5.06k
  while (
NumBytes5.06k
) {
2178
2.55k
    unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2179
2.55k
    unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
2180
2.55k
    assert(ThisVal && "Didn't extract field correctly");
2181
2.55k
2182
2.55k
    // We will handle these bits from offset, clear them.
2183
2.55k
    NumBytes &= ~ThisVal;
2184
2.55k
2185
2.55k
    assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2186
2.55k
2187
2.55k
    // Build the new ADD / SUB.
2188
2.55k
    unsigned Opc = isSub ? 
ARM::SUBri1.19k
:
ARM::ADDri1.36k
;
2189
2.55k
    BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2190
2.55k
        .addReg(BaseReg, RegState::Kill)
2191
2.55k
        .addImm(ThisVal)
2192
2.55k
        .add(predOps(Pred, PredReg))
2193
2.55k
        .add(condCodeOp())
2194
2.55k
        .setMIFlags(MIFlags);
2195
2.55k
    BaseReg = DestReg;
2196
2.55k
  }
2197
2.80k
}
2198
2199
bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
2200
                                      MachineFunction &MF, MachineInstr *MI,
2201
6.16k
                                      unsigned NumBytes) {
2202
6.16k
  // This optimisation potentially adds lots of load and store
2203
6.16k
  // micro-operations, it's only really a great benefit to code-size.
2204
6.16k
  if (!MF.getFunction()->optForMinSize())
2205
5.52k
    return false;
2206
635
2207
635
  // If only one register is pushed/popped, LLVM can use an LDR/STR
2208
635
  // instead. We can't modify those so make sure we're dealing with an
2209
635
  // instruction we understand.
2210
635
  bool IsPop = isPopOpcode(MI->getOpcode());
2211
635
  bool IsPush = isPushOpcode(MI->getOpcode());
2212
635
  if (
!IsPush && 635
!IsPop316
)
2213
4
    return false;
2214
631
2215
631
  bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2216
617
                      MI->getOpcode() == ARM::VLDMDIA_UPD;
2217
631
  bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2218
463
                     MI->getOpcode() == ARM::tPOP ||
2219
461
                     MI->getOpcode() == ARM::tPOP_RET;
2220
631
2221
631
  assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2222
631
                          MI->getOperand(1).getReg() == ARM::SP)) &&
2223
631
         "trying to fold sp update into non-sp-updating push/pop");
2224
631
2225
631
  // The VFP push & pop act on D-registers, so we can only fold an adjustment
2226
631
  // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2227
631
  // if this is violated.
2228
631
  if (
NumBytes % (IsVFPPushPop ? 631
828
:
4603
) != 0)
2229
2
    return false;
2230
629
2231
629
  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2232
629
  // pred) so the list starts at 4. Thumb1 starts after the predicate.
2233
629
  
int RegListIdx = IsT1PushPop ? 629
2337
:
4292
;
2234
629
2235
629
  // Calculate the space we'll need in terms of registers.
2236
629
  unsigned RegsNeeded;
2237
629
  const TargetRegisterClass *RegClass;
2238
629
  if (
IsVFPPushPop629
) {
2239
26
    RegsNeeded = NumBytes / 8;
2240
26
    RegClass = &ARM::DPRRegClass;
2241
629
  } else {
2242
603
    RegsNeeded = NumBytes / 4;
2243
603
    RegClass = &ARM::GPRRegClass;
2244
603
  }
2245
629
2246
629
  // We're going to have to strip all list operands off before
2247
629
  // re-adding them since the order matters, so save the existing ones
2248
629
  // for later.
2249
629
  SmallVector<MachineOperand, 4> RegList;
2250
629
2251
629
  // We're also going to need the first register transferred by this
2252
629
  // instruction, which won't necessarily be the first register in the list.
2253
629
  unsigned FirstRegEnc = -1;
2254
629
2255
629
  const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
2256
4.31k
  for (int i = MI->getNumOperands() - 1; 
i >= RegListIdx4.31k
;
--i3.68k
) {
2257
3.68k
    MachineOperand &MO = MI->getOperand(i);
2258
3.68k
    RegList.push_back(MO);
2259
3.68k
2260
3.68k
    if (
MO.isReg() && 3.68k
TRI->getEncodingValue(MO.getReg()) < FirstRegEnc3.68k
)
2261
2.04k
      FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2262
3.68k
  }
2263
629
2264
629
  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2265
629
2266
629
  // Now try to find enough space in the reglist to allocate NumBytes.
2267
1.83k
  for (int CurRegEnc = FirstRegEnc - 1; 
CurRegEnc >= 0 && 1.83k
RegsNeeded1.43k
;
2268
1.20k
       
--CurRegEnc1.20k
) {
2269
1.21k
    unsigned CurReg = RegClass->getRegister(CurRegEnc);
2270
1.21k
    if (
!IsPop1.21k
) {
2271
782
      // Pushing any register is completely harmless, mark the
2272
782
      // register involved as undef since we don't care about it in
2273
782
      // the slightest.
2274
782
      RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2275
782
                                                  false, false, true));
2276
782
      --RegsNeeded;
2277
782
      continue;
2278
782
    }
2279
431
2280
431
    // However, we can only pop an extra register if it's not live. For
2281
431
    // registers live within the function we might clobber a return value
2282
431
    // register; the other way a register can be live here is if it's
2283
431
    // callee-saved.
2284
431
    
if (431
isCalleeSavedRegister(CurReg, CSRegs) ||
2285
306
        MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2286
431
        MachineBasicBlock::LQR_Dead) {
2287
145
      // VFP pops don't allow holes in the register list, so any skip is fatal
2288
145
      // for our transformation. GPR pops do, so we should just keep looking.
2289
145
      if (IsVFPPushPop)
2290
8
        return false;
2291
145
      else
2292
137
        continue;
2293
286
    }
2294
286
2295
286
    // Mark the unimportant registers as <def,dead> in the POP.
2296
286
    RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2297
286
                                                true));
2298
286
    --RegsNeeded;
2299
286
  }
2300
629
2301
621
  
if (621
RegsNeeded > 0621
)
2302
262
    return false;
2303
359
2304
359
  // Finally we know we can profitably perform the optimisation so go
2305
359
  // ahead: strip all existing registers off and add them back again
2306
359
  // in the right order.
2307
2.22k
  
for (int i = MI->getNumOperands() - 1; 359
i >= RegListIdx2.22k
;
--i1.86k
)
2308
1.86k
    MI->RemoveOperand(i);
2309
359
2310
359
  // Add the complete list back in.
2311
359
  MachineInstrBuilder MIB(MF, &*MI);
2312
2.65k
  for (int i = RegList.size() - 1; 
i >= 02.65k
;
--i2.29k
)
2313
2.29k
    MIB.add(RegList[i]);
2314
6.16k
2315
6.16k
  return true;
2316
6.16k
}
2317
2318
bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2319
                                unsigned FrameReg, int &Offset,
2320
7.78k
                                const ARMBaseInstrInfo &TII) {
2321
7.78k
  unsigned Opcode = MI.getOpcode();
2322
7.78k
  const MCInstrDesc &Desc = MI.getDesc();
2323
7.78k
  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2324
7.78k
  bool isSub = false;
2325
7.78k
2326
7.78k
  // Memory operands in inline assembly always use AddrMode2.
2327
7.78k
  if (Opcode == ARM::INLINEASM)
2328
0
    AddrMode = ARMII::AddrMode2;
2329
7.78k
2330
7.78k
  if (
Opcode == ARM::ADDri7.78k
) {
2331
1.22k
    Offset += MI.getOperand(FrameRegIdx+1).getImm();
2332
1.22k
    if (
Offset == 01.22k
) {
2333
158
      // Turn it into a move.
2334
158
      MI.setDesc(TII.get(ARM::MOVr));
2335
158
      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2336
158
      MI.RemoveOperand(FrameRegIdx+1);
2337
158
      Offset = 0;
2338
158
      return true;
2339
1.07k
    } else 
if (1.07k
Offset < 01.07k
) {
2340
51
      Offset = -Offset;
2341
51
      isSub = true;
2342
51
      MI.setDesc(TII.get(ARM::SUBri));
2343
51
    }
2344
1.22k
2345
1.22k
    // Common case: small offset, fits into instruction.
2346
1.07k
    
if (1.07k
ARM_AM::getSOImmVal(Offset) != -11.07k
) {
2347
749
      // Replace the FrameIndex with sp / fp
2348
749
      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2349
749
      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2350
749
      Offset = 0;
2351
749
      return true;
2352
749
    }
2353
322
2354
322
    // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2355
322
    // as possible.
2356
322
    unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2357
322
    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
2358
322
2359
322
    // We will handle these bits from offset, clear them.
2360
322
    Offset &= ~ThisImmVal;
2361
322
2362
322
    // Get the properly encoded SOImmVal field.
2363
322
    assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2364
322
           "Bit extraction didn't work?");
2365
322
    MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2366
7.78k
 } else {
2367
6.55k
    unsigned ImmIdx = 0;
2368
6.55k
    int InstrOffs = 0;
2369
6.55k
    unsigned NumBits = 0;
2370
6.55k
    unsigned Scale = 1;
2371
6.55k
    switch (AddrMode) {
2372
5.93k
    case ARMII::AddrMode_i12:
2373
5.93k
      ImmIdx = FrameRegIdx + 1;
2374
5.93k
      InstrOffs = MI.getOperand(ImmIdx).getImm();
2375
5.93k
      NumBits = 12;
2376
5.93k
      break;
2377
0
    case ARMII::AddrMode2:
2378
0
      ImmIdx = FrameRegIdx+2;
2379
0
      InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2380
0
      if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2381
0
        InstrOffs *= -1;
2382
0
      NumBits = 12;
2383
0
      break;
2384
61
    case ARMII::AddrMode3:
2385
61
      ImmIdx = FrameRegIdx+2;
2386
61
      InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2387
61
      if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2388
0
        InstrOffs *= -1;
2389
61
      NumBits = 8;
2390
61
      break;
2391
29
    case ARMII::AddrMode4:
2392
29
    case ARMII::AddrMode6:
2393
29
      // Can't fold any offset even if it's zero.
2394
29
      return false;
2395
530
    case ARMII::AddrMode5:
2396
530
      ImmIdx = FrameRegIdx+1;
2397
530
      InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2398
530
      if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2399
0
        InstrOffs *= -1;
2400
530
      NumBits = 8;
2401
530
      Scale = 4;
2402
530
      break;
2403
0
    default:
2404
0
      llvm_unreachable("Unsupported addressing mode!");
2405
6.52k
    }
2406
6.52k
2407
6.52k
    Offset += InstrOffs * Scale;
2408
6.52k
    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2409
6.52k
    if (
Offset < 06.52k
) {
2410
218
      Offset = -Offset;
2411
218
      isSub = true;
2412
218
    }
2413
6.52k
2414
6.52k
    // Attempt to fold address comp. if opcode has offset bits
2415
6.52k
    if (
NumBits > 06.52k
) {
2416
6.52k
      // Common case: small offset, fits into instruction.
2417
6.52k
      MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2418
6.52k
      int ImmedOffset = Offset / Scale;
2419
6.52k
      unsigned Mask = (1 << NumBits) - 1;
2420
6.52k
      if (
(unsigned)Offset <= Mask * Scale6.52k
) {
2421
6.48k
        // Replace the FrameIndex with sp
2422
6.48k
        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2423
6.48k
        // FIXME: When addrmode2 goes away, this will simplify (like the
2424
6.48k
        // T2 version), as the LDR.i12 versions don't need the encoding
2425
6.48k
        // tricks for the offset value.
2426
6.48k
        if (
isSub6.48k
) {
2427
218
          if (AddrMode == ARMII::AddrMode_i12)
2428
172
            ImmedOffset = -ImmedOffset;
2429
218
          else
2430
46
            ImmedOffset |= 1 << NumBits;
2431
218
        }
2432
6.48k
        ImmOp.ChangeToImmediate(ImmedOffset);
2433
6.48k
        Offset = 0;
2434
6.48k
        return true;
2435
6.48k
      }
2436
46
2437
46
      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2438
46
      ImmedOffset = ImmedOffset & Mask;
2439
46
      if (
isSub46
) {
2440
0
        if (AddrMode == ARMII::AddrMode_i12)
2441
0
          ImmedOffset = -ImmedOffset;
2442
0
        else
2443
0
          ImmedOffset |= 1 << NumBits;
2444
0
      }
2445
6.52k
      ImmOp.ChangeToImmediate(ImmedOffset);
2446
6.52k
      Offset &= ~(Mask*Scale);
2447
6.52k
    }
2448
6.55k
  }
2449
7.78k
2450
368
  
Offset = (isSub) ? 368
-Offset1
:
Offset367
;
2451
368
  return Offset == 0;
2452
7.78k
}
2453
2454
/// analyzeCompare - For a comparison instruction, return the source registers
2455
/// in SrcReg and SrcReg2 if having two register operands, and the value it
2456
/// compares against in CmpValue. Return true if the comparison instruction
2457
/// can be analyzed.
2458
bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
2459
                                      unsigned &SrcReg2, int &CmpMask,
2460
24.9k
                                      int &CmpValue) const {
2461
24.9k
  switch (MI.getOpcode()) {
2462
1.17k
  default: break;
2463
19.2k
  case ARM::CMPri:
2464
19.2k
  case ARM::t2CMPri:
2465
19.2k
  case ARM::tCMPi8:
2466
19.2k
    SrcReg = MI.getOperand(0).getReg();
2467
19.2k
    SrcReg2 = 0;
2468
19.2k
    CmpMask = ~0;
2469
19.2k
    CmpValue = MI.getOperand(1).getImm();
2470
19.2k
    return true;
2471
4.23k
  case ARM::CMPrr:
2472
4.23k
  case ARM::t2CMPrr:
2473
4.23k
    SrcReg = MI.getOperand(0).getReg();
2474
4.23k
    SrcReg2 = MI.getOperand(1).getReg();
2475
4.23k
    CmpMask = ~0;
2476
4.23k
    CmpValue = 0;
2477
4.23k
    return true;
2478
227
  case ARM::TSTri:
2479
227
  case ARM::t2TSTri:
2480
227
    SrcReg = MI.getOperand(0).getReg();
2481
227
    SrcReg2 = 0;
2482
227
    CmpMask = MI.getOperand(1).getImm();
2483
227
    CmpValue = 0;
2484
227
    return true;
2485
1.17k
  }
2486
1.17k
2487
1.17k
  return false;
2488
1.17k
}
2489
2490
/// isSuitableForMask - Identify a suitable 'and' instruction that
2491
/// operates on the given source register and applies the same mask
2492
/// as a 'tst' instruction. Provide a limited look-through for copies.
2493
/// When successful, MI will hold the found instruction.
2494
static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
2495
574
                              int CmpMask, bool CommonUse) {
2496
574
  switch (MI->getOpcode()) {
2497
12
    case ARM::ANDri:
2498
12
    case ARM::t2ANDri:
2499
12
      if (CmpMask != MI->getOperand(2).getImm())
2500
12
        return false;
2501
0
      
if (0
SrcReg == MI->getOperand(CommonUse ? 0
10
:
00
).getReg())
2502
0
        return true;
2503
0
      break;
2504
562
  }
2505
562
2506
562
  return false;
2507
562
}
2508
2509
/// getSwappedCondition - assume the flags are set by MI(a,b), return
2510
/// the condition code if we modify the instructions such that flags are
2511
/// set by MI(b,a).
2512
44
inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
2513
44
  switch (CC) {
2514
0
  default: return ARMCC::AL;
2515
0
  case ARMCC::EQ: return ARMCC::EQ;
2516
3
  case ARMCC::NE: return ARMCC::NE;
2517
0
  case ARMCC::HS: return ARMCC::LS;
2518
15
  case ARMCC::LO: return ARMCC::HI;
2519
6
  case ARMCC::HI: return ARMCC::LO;
2520
0
  case ARMCC::LS: return ARMCC::HS;
2521
0
  case ARMCC::GE: return ARMCC::LE;
2522
6
  case ARMCC::LT: return ARMCC::GT;
2523
14
  case ARMCC::GT: return ARMCC::LT;
2524
0
  case ARMCC::LE: return ARMCC::GE;
2525
0
  }
2526
0
}
2527
2528
/// isRedundantFlagInstr - check whether the first instruction, whose only
2529
/// purpose is to update flags, can be made redundant.
2530
/// CMPrr can be made redundant by SUBrr if the operands are the same.
2531
/// CMPri can be made redundant by SUBri if the operands are the same.
2532
/// This function can be extended later on.
2533
inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
2534
                                        unsigned SrcReg2, int ImmValue,
2535
12.7k
                                        MachineInstr *OI) {
2536
12.7k
  if ((CmpI->getOpcode() == ARM::CMPrr ||
2537
12.6k
       CmpI->getOpcode() == ARM::t2CMPrr) &&
2538
4.57k
      (OI->getOpcode() == ARM::SUBrr ||
2539
4.57k
       OI->getOpcode() == ARM::t2SUBrr) &&
2540
72
      ((OI->getOperand(1).getReg() == SrcReg &&
2541
15
        OI->getOperand(2).getReg() == SrcReg2) ||
2542
57
       (OI->getOperand(1).getReg() == SrcReg2 &&
2543
57
        OI->getOperand(2).getReg() == SrcReg)))
2544
34
    return true;
2545
12.7k
2546
12.7k
  
if (12.7k
(CmpI->getOpcode() == ARM::CMPri ||
2547
12.5k
       CmpI->getOpcode() == ARM::t2CMPri) &&
2548
8.19k
      (OI->getOpcode() == ARM::SUBri ||
2549
8.19k
       OI->getOpcode() == ARM::t2SUBri) &&
2550
137
      OI->getOperand(1).getReg() == SrcReg &&
2551
45
      OI->getOperand(2).getImm() == ImmValue)
2552
6
    return true;
2553
12.7k
  return false;
2554
12.7k
}
2555
2556
12.2k
static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2557
12.2k
  switch (MI->getOpcode()) {
2558
9.01k
  default: return false;
2559
125
  case ARM::tLSLri:
2560
125
  case ARM::tLSRri:
2561
125
  case ARM::tLSLrr:
2562
125
  case ARM::tLSRrr:
2563
125
  case ARM::tSUBrr:
2564
125
  case ARM::tADDrr:
2565
125
  case ARM::tADDi3:
2566
125
  case ARM::tADDi8:
2567
125
  case ARM::tSUBi3:
2568
125
  case ARM::tSUBi8:
2569
125
  case ARM::tMUL:
2570
125
    IsThumb1 = true;
2571
125
    LLVM_FALLTHROUGH;
2572
3.27k
  case ARM::RSBrr:
2573
3.27k
  case ARM::RSBri:
2574
3.27k
  case ARM::RSCrr:
2575
3.27k
  case ARM::RSCri:
2576
3.27k
  case ARM::ADDrr:
2577
3.27k
  case ARM::ADDri:
2578
3.27k
  case ARM::ADCrr:
2579
3.27k
  case ARM::ADCri:
2580
3.27k
  case ARM::SUBrr:
2581
3.27k
  case ARM::SUBri:
2582
3.27k
  case ARM::SBCrr:
2583
3.27k
  case ARM::SBCri:
2584
3.27k
  case ARM::t2RSBri:
2585
3.27k
  case ARM::t2ADDrr:
2586
3.27k
  case ARM::t2ADDri:
2587
3.27k
  case ARM::t2ADCrr:
2588
3.27k
  case ARM::t2ADCri:
2589
3.27k
  case ARM::t2SUBrr:
2590
3.27k
  case ARM::t2SUBri:
2591
3.27k
  case ARM::t2SBCrr:
2592
3.27k
  case ARM::t2SBCri:
2593
3.27k
  case ARM::ANDrr:
2594
3.27k
  case ARM::ANDri:
2595
3.27k
  case ARM::t2ANDrr:
2596
3.27k
  case ARM::t2ANDri:
2597
3.27k
  case ARM::ORRrr:
2598
3.27k
  case ARM::ORRri:
2599
3.27k
  case ARM::t2ORRrr:
2600
3.27k
  case ARM::t2ORRri:
2601
3.27k
  case ARM::EORrr:
2602
3.27k
  case ARM::EORri:
2603
3.27k
  case ARM::t2EORrr:
2604
3.27k
  case ARM::t2EORri:
2605
3.27k
  case ARM::t2LSRri:
2606
3.27k
  case ARM::t2LSRrr:
2607
3.27k
  case ARM::t2LSLri:
2608
3.27k
  case ARM::t2LSLrr:
2609
3.27k
    return true;
2610
0
  }
2611
0
}
2612
2613
/// optimizeCompareInstr - Convert the instruction supplying the argument to the
2614
/// comparison into one that sets the zero bit in the flags register;
2615
/// Remove a redundant Compare instruction if an earlier instruction can set the
2616
/// flags in the same way as Compare.
2617
/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2618
/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2619
/// condition code of instructions which use the flags.
2620
bool ARMBaseInstrInfo::optimizeCompareInstr(
2621
    MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
2622
23.7k
    int CmpValue, const MachineRegisterInfo *MRI) const {
2623
23.7k
  // Get the unique definition of SrcReg.
2624
23.7k
  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2625
23.7k
  if (
!MI23.7k
)
return false0
;
2626
23.7k
2627
23.7k
  // Masked compares sometimes use the same register as the corresponding 'and'.
2628
23.7k
  
if (23.7k
CmpMask != ~023.7k
) {
2629
227
    if (
!isSuitableForMask(MI, SrcReg, CmpMask, false) || 227
isPredicated(*MI)0
) {
2630
227
      MI = nullptr;
2631
227
      for (MachineRegisterInfo::use_instr_iterator
2632
227
           UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
2633
855
           
UI != UE855
;
++UI628
) {
2634
628
        if (UI->getParent() != CmpInstr.getParent())
2635
281
          continue;
2636
347
        MachineInstr *PotentialAND = &*UI;
2637
347
        if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2638
0
            isPredicated(*PotentialAND))
2639
347
          continue;
2640
0
        MI = PotentialAND;
2641
0
        break;
2642
0
      }
2643
227
      if (
!MI227
)
return false227
;
2644
23.4k
    }
2645
227
  }
2646
23.4k
2647
23.4k
  // Get ready to iterate backward from CmpInstr.
2648
23.4k
  MachineBasicBlock::iterator I = CmpInstr, E = MI,
2649
23.4k
                              B = CmpInstr.getParent()->begin();
2650
23.4k
2651
23.4k
  // Early exit if CmpInstr is at the beginning of the BB.
2652
23.4k
  if (
I == B23.4k
)
return false2.61k
;
2653
20.8k
2654
20.8k
  // There are two possible candidates which can be changed to set CPSR:
2655
20.8k
  // One is MI, the other is a SUB instruction.
2656
20.8k
  // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2657
20.8k
  // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2658
20.8k
  MachineInstr *Sub = nullptr;
2659
20.8k
  if (SrcReg2 != 0)
2660
20.8k
    // MI is not a candidate for CMPrr.
2661
3.71k
    MI = nullptr;
2662
17.1k
  else 
if (17.1k
MI->getParent() != CmpInstr.getParent() || 17.1k
CmpValue != 015.6k
) {
2663
4.88k
    // Conservatively refuse to convert an instruction which isn't in the same
2664
4.88k
    // BB as the comparison.
2665
4.88k
    // For CMPri w/ CmpValue != 0, a Sub may still be a candidate.
2666
4.88k
    // Thus we cannot return here.
2667
4.88k
    if (CmpInstr.getOpcode() == ARM::CMPri ||
2668
4.75k
        CmpInstr.getOpcode() == ARM::t2CMPri)
2669
4.50k
      MI = nullptr;
2670
4.88k
    else
2671
381
      return false;
2672
20.5k
  }
2673
20.5k
2674
20.5k
  bool IsThumb1 = false;
2675
20.5k
  if (
MI && 20.5k
!isOptimizeCompareCandidate(MI, IsThumb1)12.2k
)
2676
9.01k
    return false;
2677
11.4k
2678
11.4k
  // We also want to do this peephole for cases like this: if (a*b == 0),
2679
11.4k
  // and optimise away the CMP instruction from the generated code sequence:
2680
11.4k
  // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
2681
11.4k
  // resulting from the select instruction, but these MOVS instructions for
2682
11.4k
  // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
2683
11.4k
  // However, if we only have MOVS instructions in between the CMP and the
2684
11.4k
  // other instruction (the MULS in this example), then the CPSR is dead so we
2685
11.4k
  // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
2686
11.4k
  // reordering and then continue the analysis hoping we can eliminate the
2687
11.4k
  // CMP. This peephole works on the vregs, so is still in SSA form. As a
2688
11.4k
  // consequence, the movs won't redefine/kill the MUL operands which would
2689
11.4k
  // make this reordering illegal.
2690
11.4k
  
if (11.4k
MI && 11.4k
IsThumb13.27k
) {
2691
125
    --I;
2692
125
    bool CanReorder = true;
2693
125
    const bool HasStmts = I != E;
2694
158
    for (; 
I != E158
;
--I33
) {
2695
82
      if (
I->getOpcode() != ARM::tMOVi882
) {
2696
49
        CanReorder = false;
2697
49
        break;
2698
49
      }
2699
82
    }
2700
125
    if (
HasStmts && 125
CanReorder68
) {
2701
19
      MI = MI->removeFromParent();
2702
19
      E = CmpInstr;
2703
19
      CmpInstr.getParent()->insert(E, MI);
2704
19
    }
2705
125
    I = CmpInstr;
2706
125
    E = MI;
2707
125
  }
2708
11.4k
2709
11.4k
  // Check that CPSR isn't set between the comparison instruction and the one we
2710
11.4k
  // want to change. At the same time, search for Sub.
2711
11.4k
  const TargetRegisterInfo *TRI = &getRegisterInfo();
2712
11.4k
  --I;
2713
22.4k
  for (; 
I != E22.4k
;
--I10.9k
) {
2714
13.6k
    const MachineInstr &Instr = *I;
2715
13.6k
2716
13.6k
    if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
2717
13.3k
        Instr.readsRegister(ARM::CPSR, TRI))
2718
13.6k
      // This instruction modifies or uses CPSR after the one we want to
2719
13.6k
      // change. We can't do this transformation.
2720
854
      return false;
2721
12.7k
2722
12.7k
    // Check whether CmpInstr can be made redundant by the current instruction.
2723
12.7k
    
if (12.7k
isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)12.7k
) {
2724
40
      Sub = &*I;
2725
40
      break;
2726
40
    }
2727
12.7k
2728
12.7k
    
if (12.7k
I == B12.7k
)
2729
12.7k
      // The 'and' is below the comparison instruction.
2730
1.78k
      return false;
2731
13.6k
  }
2732
11.4k
2733
11.4k
  // Return false if no candidates exist.
2734
8.85k
  
if (8.85k
!MI && 8.85k
!Sub5.95k
)
2735
5.91k
    return false;
2736
2.94k
2737
2.94k
  // The single candidate is called MI.
2738
2.94k
  
if (2.94k
!MI2.94k
)
MI = Sub40
;
2739
2.94k
2740
2.94k
  // We can't use a predicated instruction - it doesn't always write the flags.
2741
2.94k
  if (isPredicated(*MI))
2742
4
    return false;
2743
2.94k
2744
2.94k
  // Scan forward for the use of CPSR
2745
2.94k
  // When checking against MI: if it's a conditional code that requires
2746
2.94k
  // checking of the V bit or C bit, then this is not safe to do.
2747
2.94k
  // It is safe to remove CmpInstr if CPSR is redefined or killed.
2748
2.94k
  // If we are done with the basic block, we need to check whether CPSR is
2749
2.94k
  // live-out.
2750
2.94k
  SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
2751
2.94k
      OperandsToUpdate;
2752
2.94k
  bool isSafe = false;
2753
2.94k
  I = CmpInstr;
2754
2.94k
  E = CmpInstr.getParent()->end();
2755
8.14k
  while (
!isSafe && 8.14k
++I != E7.99k
) {
2756
5.69k
    const MachineInstr &Instr = *I;
2757
5.69k
    for (unsigned IO = 0, EO = Instr.getNumOperands();
2758
24.6k
         
!isSafe && 24.6k
IO != EO24.6k
;
++IO18.9k
) {
2759
19.6k
      const MachineOperand &MO = Instr.getOperand(IO);
2760
19.6k
      if (
MO.isRegMask() && 19.6k
MO.clobbersPhysReg(ARM::CPSR)11
) {
2761
11
        isSafe = true;
2762
11
        break;
2763
11
      }
2764
19.6k
      
if (19.6k
!MO.isReg() || 19.6k
MO.getReg() != ARM::CPSR9.86k
)
2765
16.4k
        continue;
2766
3.11k
      
if (3.11k
MO.isDef()3.11k
) {
2767
141
        isSafe = true;
2768
141
        break;
2769
141
      }
2770
2.97k
      // Condition code is after the operand before CPSR except for VSELs.
2771
2.97k
      ARMCC::CondCodes CC;
2772
2.97k
      bool IsInstrVSel = true;
2773
2.97k
      switch (Instr.getOpcode()) {
2774
2.97k
      default:
2775
2.97k
        IsInstrVSel = false;
2776
2.97k
        CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
2777
2.97k
        break;
2778
4
      case ARM::VSELEQD:
2779
4
      case ARM::VSELEQS:
2780
4
        CC = ARMCC::EQ;
2781
4
        break;
2782
0
      case ARM::VSELGTD:
2783
0
      case ARM::VSELGTS:
2784
0
        CC = ARMCC::GT;
2785
0
        break;
2786
0
      case ARM::VSELGED:
2787
0
      case ARM::VSELGES:
2788
0
        CC = ARMCC::GE;
2789
0
        break;
2790
0
      case ARM::VSELVSS:
2791
0
      case ARM::VSELVSD:
2792
0
        CC = ARMCC::VS;
2793
0
        break;
2794
2.97k
      }
2795
2.97k
2796
2.97k
      
if (2.97k
Sub2.97k
) {
2797
44
        ARMCC::CondCodes NewCC = getSwappedCondition(CC);
2798
44
        if (NewCC == ARMCC::AL)
2799
0
          return false;
2800
44
        // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
2801
44
        // on CMP needs to be updated to be based on SUB.
2802
44
        // Push the condition code operands to OperandsToUpdate.
2803
44
        // If it is safe to remove CmpInstr, the condition code of these
2804
44
        // operands will be modified.
2805
44
        
if (44
SrcReg2 != 0 && 44
Sub->getOperand(1).getReg() == SrcReg238
&&
2806
44
            
Sub->getOperand(2).getReg() == SrcReg21
) {
2807
21
          // VSel doesn't support condition code update.
2808
21
          if (IsInstrVSel)
2809
0
            return false;
2810
21
          OperandsToUpdate.push_back(
2811
21
              std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
2812
21
        }
2813
2.97k
      } else {
2814
2.93k
        // No Sub, so this is x = <op> y, z; cmp x, 0.
2815
2.93k
        switch (CC) {
2816
2.44k
        case ARMCC::EQ: // Z
2817
2.44k
        case ARMCC::NE: // Z
2818
2.44k
        case ARMCC::MI: // N
2819
2.44k
        case ARMCC::PL: // N
2820
2.44k
        case ARMCC::AL: // none
2821
2.44k
          // CPSR can be used multiple times, we should continue.
2822
2.44k
          break;
2823
486
        case ARMCC::HS: // C
2824
486
        case ARMCC::LO: // C
2825
486
        case ARMCC::VS: // V
2826
486
        case ARMCC::VC: // V
2827
486
        case ARMCC::HI: // C Z
2828
486
        case ARMCC::LS: // C Z
2829
486
        case ARMCC::GE: // N V
2830
486
        case ARMCC::LT: // N V
2831
486
        case ARMCC::GT: // Z N V
2832
486
        case ARMCC::LE: // Z N V
2833
486
          // The instruction uses the V bit or C bit which is not safe.
2834
486
          return false;
2835
2.93k
        }
2836
2.93k
      }
2837
19.6k
    }
2838
5.69k
  }
2839
2.94k
2840
2.94k
  // If CPSR is not killed nor re-defined, we should check whether it is
2841
2.94k
  // live-out. If it is live-out, do not optimize.
2842
2.45k
  
if (2.45k
!isSafe2.45k
) {
2843
2.30k
    MachineBasicBlock *MBB = CmpInstr.getParent();
2844
2.30k
    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
2845
6.57k
             SE = MBB->succ_end(); 
SI != SE6.57k
;
++SI4.27k
)
2846
4.27k
      
if (4.27k
(*SI)->isLiveIn(ARM::CPSR)4.27k
)
2847
3
        return false;
2848
2.30k
  }
2849
2.45k
2850
2.45k
  // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
2851
2.45k
  // set CPSR so this is represented as an explicit output)
2852
2.45k
  
if (2.45k
!IsThumb12.45k
) {
2853
2.39k
    MI->getOperand(5).setReg(ARM::CPSR);
2854
2.39k
    MI->getOperand(5).setIsDef(true);
2855
2.39k
  }
2856
2.45k
  assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
2857
2.45k
  CmpInstr.eraseFromParent();
2858
2.45k
2859
2.45k
  // Modify the condition code of operands in OperandsToUpdate.
2860
2.45k
  // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2861
2.45k
  // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2862
2.47k
  for (unsigned i = 0, e = OperandsToUpdate.size(); 
i < e2.47k
;
i++18
)
2863
18
    OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
2864
2.45k
2865
2.45k
  return true;
2866
23.7k
}
2867
2868
bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
2869
                                     unsigned Reg,
2870
28.7k
                                     MachineRegisterInfo *MRI) const {
2871
28.7k
  // Fold large immediates into add, sub, or, xor.
2872
28.7k
  unsigned DefOpc = DefMI.getOpcode();
2873
28.7k
  if (
DefOpc != ARM::t2MOVi32imm && 28.7k
DefOpc != ARM::MOVi32imm27.6k
)
2874
27.0k
    return false;
2875
1.62k
  
if (1.62k
!DefMI.getOperand(1).isImm()1.62k
)
2876
1.62k
    // Could be t2MOVi32imm <ga:xx>
2877
692
    return false;
2878
933
2879
933
  
if (933
!MRI->hasOneNonDBGUse(Reg)933
)
2880
376
    return false;
2881
557
2882
557
  const MCInstrDesc &DefMCID = DefMI.getDesc();
2883
557
  if (
DefMCID.hasOptionalDef()557
) {
2884
0
    unsigned NumOps = DefMCID.getNumOperands();
2885
0
    const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
2886
0
    if (
MO.getReg() == ARM::CPSR && 0
!MO.isDead()0
)
2887
0
      // If DefMI defines CPSR and it is not dead, it's obviously not safe
2888
0
      // to delete DefMI.
2889
0
      return false;
2890
557
  }
2891
557
2892
557
  const MCInstrDesc &UseMCID = UseMI.getDesc();
2893
557
  if (
UseMCID.hasOptionalDef()557
) {
2894
204
    unsigned NumOps = UseMCID.getNumOperands();
2895
204
    if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
2896
204
      // If the instruction sets the flag, do not attempt this optimization
2897
204
      // since it may change the semantics of the code.
2898
33
      return false;
2899
524
  }
2900
524
2901
524
  unsigned UseOpc = UseMI.getOpcode();
2902
524
  unsigned NewUseOpc = 0;
2903
524
  uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
2904
524
  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
2905
524
  bool Commute = false;
2906
524
  switch (UseOpc) {
2907
408
  default: return false;
2908
116
  case ARM::SUBrr:
2909
116
  case ARM::ADDrr:
2910
116
  case ARM::ORRrr:
2911
116
  case ARM::EORrr:
2912
116
  case ARM::t2SUBrr:
2913
116
  case ARM::t2ADDrr:
2914
116
  case ARM::t2ORRrr:
2915
116
  case ARM::t2EORrr: {
2916
116
    Commute = UseMI.getOperand(2).getReg() != Reg;
2917
116
    switch (UseOpc) {
2918
0
    default: break;
2919
5
    case ARM::ADDrr:
2920
5
    case ARM::SUBrr:
2921
5
      if (
UseOpc == ARM::SUBrr && 5
Commute2
)
2922
0
        return false;
2923
5
2924
5
      // ADD/SUB are special because they're essentially the same operation, so
2925
5
      // we can handle a larger range of immediates.
2926
5
      
if (5
ARM_AM::isSOImmTwoPartVal(ImmVal)5
)
2927
3
        
NewUseOpc = UseOpc == ARM::ADDrr ? 3
ARM::ADDri2
:
ARM::SUBri1
;
2928
2
      else 
if (2
ARM_AM::isSOImmTwoPartVal(-ImmVal)2
) {
2929
2
        ImmVal = -ImmVal;
2930
2
        NewUseOpc = UseOpc == ARM::ADDrr ? 
ARM::SUBri1
:
ARM::ADDri1
;
2931
2
      } else
2932
0
        return false;
2933
5
      SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
2934
5
      SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
2935
5
      break;
2936
2
    case ARM::ORRrr:
2937
2
    case ARM::EORrr:
2938
2
      if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
2939
1
        return false;
2940
1
      SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
2941
1
      SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
2942
1
      switch (UseOpc) {
2943
0
      default: break;
2944
1
      case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
2945
0
      case ARM::EORrr: NewUseOpc = ARM::EORri; break;
2946
1
      }
2947
1
      break;
2948
102
    case ARM::t2ADDrr:
2949
102
    case ARM::t2SUBrr:
2950
102
      if (
UseOpc == ARM::t2SUBrr && 102
Commute3
)
2951
1
        return false;
2952
101
2953
101
      // ADD/SUB are special because they're essentially the same operation, so
2954
101
      // we can handle a larger range of immediates.
2955
101
      
if (101
ARM_AM::isT2SOImmTwoPartVal(ImmVal)101
)
2956
90
        
NewUseOpc = UseOpc == ARM::t2ADDrr ? 90
ARM::t2ADDri89
:
ARM::t2SUBri1
;
2957
11
      else 
if (11
ARM_AM::isT2SOImmTwoPartVal(-ImmVal)11
) {
2958
2
        ImmVal = -ImmVal;
2959
2
        NewUseOpc = UseOpc == ARM::t2ADDrr ? 
ARM::t2SUBri1
:
ARM::t2ADDri1
;
2960
2
      } else
2961
9
        return false;
2962
92
      SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
2963
92
      SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
2964
92
      break;
2965
7
    case ARM::t2ORRrr:
2966
7
    case ARM::t2EORrr:
2967
7
      if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
2968
4
        return false;
2969
3
      SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
2970
3
      SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
2971
3
      switch (UseOpc) {
2972
0
      default: break;
2973
3
      case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
2974
0
      case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
2975
3
      }
2976
3
      break;
2977
116
    }
2978
116
  }
2979
101
  }
2980
101
2981
101
  
unsigned OpIdx = Commute ? 101
20
:
1101
;
2982
28.7k
  unsigned Reg1 = UseMI.getOperand(OpIdx).getReg();
2983
28.7k
  bool isKill = UseMI.getOperand(OpIdx).isKill();
2984
28.7k
  unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
2985
28.7k
  BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
2986
28.7k
          NewReg)
2987
28.7k
      .addReg(Reg1, getKillRegState(isKill))
2988
28.7k
      .addImm(SOImmValV1)
2989
28.7k
      .add(predOps(ARMCC::AL))
2990
28.7k
      .add(condCodeOp());
2991
28.7k
  UseMI.setDesc(get(NewUseOpc));
2992
28.7k
  UseMI.getOperand(1).setReg(NewReg);
2993
28.7k
  UseMI.getOperand(1).setIsKill();
2994
28.7k
  UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
2995
28.7k
  DefMI.eraseFromParent();
2996
28.7k
  return true;
2997
28.7k
}
2998
2999
static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3000
0
                                        const MachineInstr &MI) {
3001
0
  switch (MI.getOpcode()) {
3002
0
  default: {
3003
0
    const MCInstrDesc &Desc = MI.getDesc();
3004
0
    int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3005
0
    assert(UOps >= 0 && "bad # UOps");
3006
0
    return UOps;
3007
0
  }
3008
0
3009
0
  case ARM::LDRrs:
3010
0
  case ARM::LDRBrs:
3011
0
  case ARM::STRrs:
3012
0
  case ARM::STRBrs: {
3013
0
    unsigned ShOpVal = MI.getOperand(3).getImm();
3014
0
    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3015
0
    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3016
0
    if (!isSub &&
3017
0
        (ShImm == 0 ||
3018
0
         
((ShImm == 1 || 0
ShImm == 20
||
ShImm == 30
) &&
3019
0
          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3020
0
      return 1;
3021
0
    return 2;
3022
0
  }
3023
0
3024
0
  case ARM::LDRH:
3025
0
  case ARM::STRH: {
3026
0
    if (!MI.getOperand(2).getReg())
3027
0
      return 1;
3028
0
3029
0
    unsigned ShOpVal = MI.getOperand(3).getImm();
3030
0
    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3031
0
    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3032
0
    if (!isSub &&
3033
0
        (ShImm == 0 ||
3034
0
         
((ShImm == 1 || 0
ShImm == 20
||
ShImm == 30
) &&
3035
0
          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3036
0
      return 1;
3037
0
    return 2;
3038
0
  }
3039
0
3040
0
  case ARM::LDRSB:
3041
0
  case ARM::LDRSH:
3042
0
    return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 
30
:
20
;
3043
0
3044
0
  case ARM::LDRSB_POST:
3045
0
  case ARM::LDRSH_POST: {
3046
0
    unsigned Rt = MI.getOperand(0).getReg();
3047
0
    unsigned Rm = MI.getOperand(3).getReg();
3048
0
    return (Rt == Rm) ? 
40
:
30
;
3049
0
  }
3050
0
3051
0
  case ARM::LDR_PRE_REG:
3052
0
  case ARM::LDRB_PRE_REG: {
3053
0
    unsigned Rt = MI.getOperand(0).getReg();
3054
0
    unsigned Rm = MI.getOperand(3).getReg();
3055
0
    if (Rt == Rm)
3056
0
      return 3;
3057
0
    unsigned ShOpVal = MI.getOperand(4).getImm();
3058
0
    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3059
0
    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3060
0
    if (!isSub &&
3061
0
        (ShImm == 0 ||
3062
0
         
((ShImm == 1 || 0
ShImm == 20
||
ShImm == 30
) &&
3063
0
          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3064
0
      return 2;
3065
0
    return 3;
3066
0
  }
3067
0
3068
0
  case ARM::STR_PRE_REG:
3069
0
  case ARM::STRB_PRE_REG: {
3070
0
    unsigned ShOpVal = MI.getOperand(4).getImm();
3071
0
    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3072
0
    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3073
0
    if (!isSub &&
3074
0
        (ShImm == 0 ||
3075
0
         
((ShImm == 1 || 0
ShImm == 20
||
ShImm == 30
) &&
3076
0
          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3077
0
      return 2;
3078
0
    return 3;
3079
0
  }
3080
0
3081
0
  case ARM::LDRH_PRE:
3082
0
  case ARM::STRH_PRE: {
3083
0
    unsigned Rt = MI.getOperand(0).getReg();
3084
0
    unsigned Rm = MI.getOperand(3).getReg();
3085
0
    if (!Rm)
3086
0
      return 2;
3087
0
    
if (0
Rt == Rm0
)
3088
0
      return 3;
3089
0
    
return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 0
30
:
20
;
3090
0
  }
3091
0
3092
0
  case ARM::LDR_POST_REG:
3093
0
  case ARM::LDRB_POST_REG:
3094
0
  case ARM::LDRH_POST: {
3095
0
    unsigned Rt = MI.getOperand(0).getReg();
3096
0
    unsigned Rm = MI.getOperand(3).getReg();
3097
0
    return (Rt == Rm) ? 
30
:
20
;
3098
0
  }
3099
0
3100
0
  case ARM::LDR_PRE_IMM:
3101
0
  case ARM::LDRB_PRE_IMM:
3102
0
  case ARM::LDR_POST_IMM:
3103
0
  case ARM::LDRB_POST_IMM:
3104
0
  case ARM::STRB_POST_IMM:
3105
0
  case ARM::STRB_POST_REG:
3106
0
  case ARM::STRB_PRE_IMM:
3107
0
  case ARM::STRH_POST:
3108
0
  case ARM::STR_POST_IMM:
3109
0
  case ARM::STR_POST_REG:
3110
0
  case ARM::STR_PRE_IMM:
3111
0
    return 2;
3112
0
3113
0
  case ARM::LDRSB_PRE:
3114
0
  case ARM::LDRSH_PRE: {
3115
0
    unsigned Rm = MI.getOperand(3).getReg();
3116
0
    if (Rm == 0)
3117
0
      return 3;
3118
0
    unsigned Rt = MI.getOperand(0).getReg();
3119
0
    if (Rt == Rm)
3120
0
      return 4;
3121
0
    unsigned ShOpVal = MI.getOperand(4).getImm();
3122
0
    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3123
0
    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3124
0
    if (!isSub &&
3125
0
        (ShImm == 0 ||
3126
0
         
((ShImm == 1 || 0
ShImm == 20
||
ShImm == 30
) &&
3127
0
          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3128
0
      return 3;
3129
0
    return 4;
3130
0
  }
3131
0
3132
0
  case ARM::LDRD: {
3133
0
    unsigned Rt = MI.getOperand(0).getReg();
3134
0
    unsigned Rn = MI.getOperand(2).getReg();
3135
0
    unsigned Rm = MI.getOperand(3).getReg();
3136
0
    if (Rm)
3137
0
      
return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 0
40
3138
0
                                                                          : 3;
3139
0
    
return (Rt == Rn) ? 0
30
:
20
;
3140
0
  }
3141
0
3142
0
  case ARM::STRD: {
3143
0
    unsigned Rm = MI.getOperand(3).getReg();
3144
0
    if (Rm)
3145
0
      
return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 0
40
3146
0
                                                                          : 3;
3147
0
    return 2;
3148
0
  }
3149
0
3150
0
  case ARM::LDRD_POST:
3151
0
  case ARM::t2LDRD_POST:
3152
0
    return 3;
3153
0
3154
0
  case ARM::STRD_POST:
3155
0
  case ARM::t2STRD_POST:
3156
0
    return 4;
3157
0
3158
0
  case ARM::LDRD_PRE: {
3159
0
    unsigned Rt = MI.getOperand(0).getReg();
3160
0
    unsigned Rn = MI.getOperand(3).getReg();
3161
0
    unsigned Rm = MI.getOperand(4).getReg();
3162
0
    if (Rm)
3163
0
      
return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 0
50
3164
0
                                                                          : 4;
3165
0
    
return (Rt == Rn) ? 0
40
:
30
;
3166
0
  }
3167
0
3168
0
  case ARM::t2LDRD_PRE: {
3169
0
    unsigned Rt = MI.getOperand(0).getReg();
3170
0
    unsigned Rn = MI.getOperand(3).getReg();
3171
0
    return (Rt == Rn) ? 
40
:
30
;
3172
0
  }
3173
0
3174
0
  case ARM::STRD_PRE: {
3175
0
    unsigned Rm = MI.getOperand(4).getReg();
3176
0
    if (Rm)
3177
0
      
return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 0
50
3178
0
                                                                          : 4;
3179
0
    return 3;
3180
0
  }
3181
0
3182
0
  case ARM::t2STRD_PRE:
3183
0
    return 3;
3184
0
3185
0
  case ARM::t2LDR_POST:
3186
0
  case ARM::t2LDRB_POST:
3187
0
  case ARM::t2LDRB_PRE:
3188
0
  case ARM::t2LDRSBi12:
3189
0
  case ARM::t2LDRSBi8:
3190
0
  case ARM::t2LDRSBpci:
3191
0
  case ARM::t2LDRSBs:
3192
0
  case ARM::t2LDRH_POST:
3193
0
  case ARM::t2LDRH_PRE:
3194
0
  case ARM::t2LDRSBT:
3195
0
  case ARM::t2LDRSB_POST:
3196
0
  case ARM::t2LDRSB_PRE:
3197
0
  case ARM::t2LDRSH_POST:
3198
0
  case ARM::t2LDRSH_PRE:
3199
0
  case ARM::t2LDRSHi12:
3200
0
  case ARM::t2LDRSHi8:
3201
0
  case ARM::t2LDRSHpci:
3202
0
  case ARM::t2LDRSHs:
3203
0
    return 2;
3204
0
3205
0
  case ARM::t2LDRDi8: {
3206
0
    unsigned Rt = MI.getOperand(0).getReg();
3207
0
    unsigned Rn = MI.getOperand(2).getReg();
3208
0
    return (Rt == Rn) ? 
30
:
20
;
3209
0
  }
3210
0
3211
0
  case ARM::t2STRB_POST:
3212
0
  case ARM::t2STRB_PRE:
3213
0
  case ARM::t2STRBs:
3214
0
  case ARM::t2STRDi8:
3215
0
  case ARM::t2STRH_POST:
3216
0
  case ARM::t2STRH_PRE:
3217
0
  case ARM::t2STRHs:
3218
0
  case ARM::t2STR_POST:
3219
0
  case ARM::t2STR_PRE:
3220
0
  case ARM::t2STRs:
3221
0
    return 2;
3222
0
  }
3223
0
}
3224
3225
// Return the number of 32-bit words loaded by LDM or stored by STM. If this
3226
// can't be easily determined return 0 (missing MachineMemOperand).
3227
//
3228
// FIXME: The current MachineInstr design does not support relying on machine
3229
// mem operands to determine the width of a memory access. Instead, we expect
3230
// the target to provide this information based on the instruction opcode and
3231
// operands. However, using MachineMemOperand is the best solution now for
3232
// two reasons:
3233
//
3234
// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3235
// operands. This is much more dangerous than using the MachineMemOperand
3236
// sizes because CodeGen passes can insert/remove optional machine operands. In
3237
// fact, it's totally incorrect for preRA passes and appears to be wrong for
3238
// postRA passes as well.
3239
//
3240
// 2) getNumLDMAddresses is only used by the scheduling machine model and any
3241
// machine model that calls this should handle the unknown (zero size) case.
3242
//
3243
// Long term, we should require a target hook that verifies MachineMemOperand
3244
// sizes during MC lowering. That target hook should be local to MC lowering
3245
// because we can't ensure that it is aware of other MI forms. Doing this will
3246
// ensure that MachineMemOperands are correctly propagated through all passes.
3247
26.3k
unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const {
3248
26.3k
  unsigned Size = 0;
3249
26.3k
  for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3250
26.3k
                                  E = MI.memoperands_end();
3251
26.3k
       
I != E26.3k
;
++I36
) {
3252
36
    Size += (*I)->getSize();
3253
36
  }
3254
26.3k
  return Size / 4;
3255
26.3k
}
3256
3257
static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3258
0
                                                    unsigned NumRegs) {
3259
0
  unsigned UOps = 1 + NumRegs; // 1 for address computation.
3260
0
  switch (Opc) {
3261
0
  default:
3262
0
    break;
3263
0
  case ARM::VLDMDIA_UPD:
3264
0
  case ARM::VLDMDDB_UPD:
3265
0
  case ARM::VLDMSIA_UPD:
3266
0
  case ARM::VLDMSDB_UPD:
3267
0
  case ARM::VSTMDIA_UPD:
3268
0
  case ARM::VSTMDDB_UPD:
3269
0
  case ARM::VSTMSIA_UPD:
3270
0
  case ARM::VSTMSDB_UPD:
3271
0
  case ARM::LDMIA_UPD:
3272
0
  case ARM::LDMDA_UPD:
3273
0
  case ARM::LDMDB_UPD:
3274
0
  case ARM::LDMIB_UPD:
3275
0
  case ARM::STMIA_UPD:
3276
0
  case ARM::STMDA_UPD:
3277
0
  case ARM::STMDB_UPD:
3278
0
  case ARM::STMIB_UPD:
3279
0
  case ARM::tLDMIA_UPD:
3280
0
  case ARM::tSTMIA_UPD:
3281
0
  case ARM::t2LDMIA_UPD:
3282
0
  case ARM::t2LDMDB_UPD:
3283
0
  case ARM::t2STMIA_UPD:
3284
0
  case ARM::t2STMDB_UPD:
3285
0
    ++UOps; // One for base register writeback.
3286
0
    break;
3287
0
  case ARM::LDMIA_RET:
3288
0
  case ARM::tPOP_RET:
3289
0
  case ARM::t2LDMIA_RET:
3290
0
    UOps += 2; // One for base reg wb, one for write to pc.
3291
0
    break;
3292
0
  }
3293
0
  return UOps;
3294
0
}
3295
3296
unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
3297
7.99k
                                          const MachineInstr &MI) const {
3298
7.99k
  if (
!ItinData || 7.99k
ItinData->isEmpty()7.99k
)
3299
0
    return 1;
3300
7.99k
3301
7.99k
  const MCInstrDesc &Desc = MI.getDesc();
3302
7.99k
  unsigned Class = Desc.getSchedClass();
3303
7.99k
  int ItinUOps = ItinData->getNumMicroOps(Class);
3304
7.99k
  if (
ItinUOps >= 07.99k
) {
3305
0
    if (
Subtarget.isSwift() && 0
(Desc.mayLoad() || 0
Desc.mayStore()0
))
3306
0
      return getNumMicroOpsSwiftLdSt(ItinData, MI);
3307
0
3308
0
    return ItinUOps;
3309
0
  }
3310
7.99k
3311
7.99k
  unsigned Opc = MI.getOpcode();
3312
7.99k
  switch (Opc) {
3313
0
  default:
3314
0
    llvm_unreachable("Unexpected multi-uops instruction!");
3315
0
  case ARM::VLDMQIA:
3316
0
  case ARM::VSTMQIA:
3317
0
    return 2;
3318
0
3319
0
  // The number of uOps for load / store multiple are determined by the number
3320
0
  // registers.
3321
0
  //
3322
0
  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3323
0
  // same cycle. The scheduling for the first load / store must be done
3324
0
  // separately by assuming the address is not 64-bit aligned.
3325
0
  //
3326
0
  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3327
0
  // is not 64-bit aligned, then AGU would take an extra cycle.  For VFP / NEON
3328
0
  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3329
374
  case ARM::VLDMDIA:
3330
374
  case ARM::VLDMDIA_UPD:
3331
374
  case ARM::VLDMDDB_UPD:
3332
374
  case ARM::VLDMSIA:
3333
374
  case ARM::VLDMSIA_UPD:
3334
374
  case ARM::VLDMSDB_UPD:
3335
374
  case ARM::VSTMDIA:
3336
374
  case ARM::VSTMDIA_UPD:
3337
374
  case ARM::VSTMDDB_UPD:
3338
374
  case ARM::VSTMSIA:
3339
374
  case ARM::VSTMSIA_UPD:
3340
374
  case ARM::VSTMSDB_UPD: {
3341
374
    unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3342
374
    return (NumRegs / 2) + (NumRegs % 2) + 1;
3343
374
  }
3344
374
3345
7.62k
  case ARM::LDMIA_RET:
3346
7.62k
  case ARM::LDMIA:
3347
7.62k
  case ARM::LDMDA:
3348
7.62k
  case ARM::LDMDB:
3349
7.62k
  case ARM::LDMIB:
3350
7.62k
  case ARM::LDMIA_UPD:
3351
7.62k
  case ARM::LDMDA_UPD:
3352
7.62k
  case ARM::LDMDB_UPD:
3353
7.62k
  case ARM::LDMIB_UPD:
3354
7.62k
  case ARM::STMIA:
3355
7.62k
  case ARM::STMDA:
3356
7.62k
  case ARM::STMDB:
3357
7.62k
  case ARM::STMIB:
3358
7.62k
  case ARM::STMIA_UPD:
3359
7.62k
  case ARM::STMDA_UPD:
3360
7.62k
  case ARM::STMDB_UPD:
3361
7.62k
  case ARM::STMIB_UPD:
3362
7.62k
  case ARM::tLDMIA:
3363
7.62k
  case ARM::tLDMIA_UPD:
3364
7.62k
  case ARM::tSTMIA_UPD:
3365
7.62k
  case ARM::tPOP_RET:
3366
7.62k
  case ARM::tPOP:
3367
7.62k
  case ARM::tPUSH:
3368
7.62k
  case ARM::t2LDMIA_RET:
3369
7.62k
  case ARM::t2LDMIA:
3370
7.62k
  case ARM::t2LDMDB:
3371
7.62k
  case ARM::t2LDMIA_UPD:
3372
7.62k
  case ARM::t2LDMDB_UPD:
3373
7.62k
  case ARM::t2STMIA:
3374
7.62k
  case ARM::t2STMDB:
3375
7.62k
  case ARM::t2STMIA_UPD:
3376
7.62k
  case ARM::t2STMDB_UPD: {
3377
7.62k
    unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3378
7.62k
    switch (Subtarget.getLdStMultipleTiming()) {
3379
0
    case ARMSubtarget::SingleIssuePlusExtras:
3380
0
      return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3381
5.47k
    case ARMSubtarget::SingleIssue:
3382
5.47k
      // Assume the worst.
3383
5.47k
      return NumRegs;
3384
2.08k
    case ARMSubtarget::DoubleIssue: {
3385
2.08k
      if (NumRegs < 4)
3386
734
        return 2;
3387
1.35k
      // 4 registers would be issued: 2, 2.
3388
1.35k
      // 5 registers would be issued: 2, 2, 1.
3389
1.35k
      unsigned UOps = (NumRegs / 2);
3390
1.35k
      if (NumRegs % 2)
3391
802
        ++UOps;
3392
1.35k
      return UOps;
3393
1.35k
    }
3394
63
    case ARMSubtarget::DoubleIssueCheckUnalignedAccess: {
3395
63
      unsigned UOps = (NumRegs / 2);
3396
63
      // If there are odd number of registers or if it's not 64-bit aligned,
3397
63
      // then it takes an extra AGU (Address Generation Unit) cycle.
3398
63
      if (
(NumRegs % 2) || 63
!MI.hasOneMemOperand()49
||
3399
0
          (*MI.memoperands_begin())->getAlignment() < 8)
3400
63
        ++UOps;
3401
63
      return UOps;
3402
0
      }
3403
0
    }
3404
0
  }
3405
0
  }
3406
0
  
llvm_unreachable0
("Didn't find the number of microops");
3407
0
}
3408
3409
int
3410
ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3411
                                  const MCInstrDesc &DefMCID,
3412
                                  unsigned DefClass,
3413
221
                                  unsigned DefIdx, unsigned DefAlign) const {
3414
221
  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3415
221
  if (RegNo <= 0)
3416
221
    // Def is the address writeback.
3417
0
    return ItinData->getOperandCycle(DefClass, DefIdx);
3418
221
3419
221
  int DefCycle;
3420
221
  if (
Subtarget.isCortexA8() || 221
Subtarget.isCortexA7()202
) {
3421
19
    // (regno / 2) + (regno % 2) + 1
3422
19
    DefCycle = RegNo / 2 + 1;
3423
19
    if (RegNo % 2)
3424
8
      ++DefCycle;
3425
221
  } else 
if (202
Subtarget.isLikeA9() || 202
Subtarget.isSwift()193
) {
3426
9
    DefCycle = RegNo;
3427
9
    bool isSLoad = false;
3428
9
3429
9
    switch (DefMCID.getOpcode()) {
3430
9
    default: break;
3431
0
    case ARM::VLDMSIA:
3432
0
    case ARM::VLDMSIA_UPD:
3433
0
    case ARM::VLDMSDB_UPD:
3434
0
      isSLoad = true;
3435
0
      break;
3436
9
    }
3437
9
3438
9
    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3439
9
    // then it takes an extra cycle.
3440
9
    
if (9
(isSLoad && 9
(RegNo % 2)0
) ||
DefAlign < 89
)
3441
0
      ++DefCycle;
3442
202
  } else {
3443
193
    // Assume the worst.
3444
193
    DefCycle = RegNo + 2;
3445
193
  }
3446
221
3447
221
  return DefCycle;
3448
221
}
3449
3450
0
bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const {
3451
0
  unsigned BaseReg = MI.getOperand(0).getReg();
3452
0
  for (unsigned i = 1, sz = MI.getNumOperands(); 
i < sz0
;
++i0
) {
3453
0
    const auto &Op = MI.getOperand(i);
3454
0
    if (
Op.isReg() && 0
Op.getReg() == BaseReg0
)
3455
0
      return true;
3456
0
  }
3457
0
  return false;
3458
0
}
3459
unsigned
3460
7
ARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const {
3461
7
  // ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops
3462
7
  // (outs GPR:$wb), (ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops)
3463
7
  return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands();
3464
7
}
3465
3466
int
3467
ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3468
                                 const MCInstrDesc &DefMCID,
3469
                                 unsigned DefClass,
3470
412
                                 unsigned DefIdx, unsigned DefAlign) const {
3471
412
  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3472
412
  if (RegNo <= 0)
3473
412
    // Def is the address writeback.
3474
0
    return ItinData->getOperandCycle(DefClass, DefIdx);
3475
412
3476
412
  int DefCycle;
3477
412
  if (
Subtarget.isCortexA8() || 412
Subtarget.isCortexA7()406
) {
3478
22
    // 4 registers would be issued: 1, 2, 1.
3479
22
    // 5 registers would be issued: 1, 2, 2.
3480
22
    DefCycle = RegNo / 2;
3481
22
    if (DefCycle < 1)
3482
6
      DefCycle = 1;
3483
22
    // Result latency is issue cycle + 2: E2.
3484
22
    DefCycle += 2;
3485
412
  } else 
if (390
Subtarget.isLikeA9() || 390
Subtarget.isSwift()384
) {
3486
6
    DefCycle = (RegNo / 2);
3487
6
    // If there are odd number of registers or if it's not 64-bit aligned,
3488
6
    // then it takes an extra AGU (Address Generation Unit) cycle.
3489
6
    if (
(RegNo % 2) || 6
DefAlign < 83
)
3490
6
      ++DefCycle;
3491
6
    // Result latency is AGU cycles + 2.
3492
6
    DefCycle += 2;
3493
390
  } else {
3494
384
    // Assume the worst.
3495
384
    DefCycle = RegNo + 2;
3496
384
  }
3497
412
3498
412
  return DefCycle;
3499
412
}
3500
3501
int
3502
ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3503
                                  const MCInstrDesc &UseMCID,
3504
                                  unsigned UseClass,
3505
19
                                  unsigned UseIdx, unsigned UseAlign) const {
3506
19
  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3507
19
  if (RegNo <= 0)
3508
0
    return ItinData->getOperandCycle(UseClass, UseIdx);
3509
19
3510
19
  int UseCycle;
3511
19
  if (
Subtarget.isCortexA8() || 19
Subtarget.isCortexA7()6
) {
3512
13
    // (regno / 2) + (regno % 2) + 1
3513
13
    UseCycle = RegNo / 2 + 1;
3514
13
    if (RegNo % 2)
3515
5
      ++UseCycle;
3516
19
  } else 
if (6
Subtarget.isLikeA9() || 6
Subtarget.isSwift()3
) {
3517
3
    UseCycle = RegNo;
3518
3
    bool isSStore = false;
3519
3
3520
3
    switch (UseMCID.getOpcode()) {
3521
3
    default: break;
3522
0
    case ARM::VSTMSIA:
3523
0
    case ARM::VSTMSIA_UPD:
3524
0
    case ARM::VSTMSDB_UPD:
3525
0
      isSStore = true;
3526
0
      break;
3527
3
    }
3528
3
3529
3
    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3530
3
    // then it takes an extra cycle.
3531
3
    
if (3
(isSStore && 3
(RegNo % 2)0
) ||
UseAlign < 83
)
3532
0
      ++UseCycle;
3533
6
  } else {
3534
3
    // Assume the worst.
3535
3
    UseCycle = RegNo + 2;
3536
3
  }
3537
19
3538
19
  return UseCycle;
3539
19
}
3540
3541
int
3542
ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3543
                                 const MCInstrDesc &UseMCID,
3544
                                 unsigned UseClass,
3545
474
                                 unsigned UseIdx, unsigned UseAlign) const {
3546
474
  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3547
474
  if (RegNo <= 0)
3548
0
    return ItinData->getOperandCycle(UseClass, UseIdx);
3549
474
3550
474
  int UseCycle;
3551
474
  if (
Subtarget.isCortexA8() || 474
Subtarget.isCortexA7()461
) {
3552
164
    UseCycle = RegNo / 2;
3553
164
    if (UseCycle < 2)
3554
159
      UseCycle = 2;
3555
164
    // Read in E3.
3556
164
    UseCycle += 2;
3557
474
  } else 
if (310
Subtarget.isLikeA9() || 310
Subtarget.isSwift()310
) {
3558
0
    UseCycle = (RegNo / 2);
3559
0
    // If there are odd number of registers or if it's not 64-bit aligned,
3560
0
    // then it takes an extra AGU (Address Generation Unit) cycle.
3561
0
    if (
(RegNo % 2) || 0
UseAlign < 80
)
3562
0
      ++UseCycle;
3563
310
  } else {
3564
310
    // Assume the worst.
3565
310
    UseCycle = 1;
3566
310
  }
3567
474
  return UseCycle;
3568
474
}
3569
3570
int
3571
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
3572
                                    const MCInstrDesc &DefMCID,
3573
                                    unsigned DefIdx, unsigned DefAlign,
3574
                                    const MCInstrDesc &UseMCID,
3575
157k
                                    unsigned UseIdx, unsigned UseAlign) const {
3576
157k
  unsigned DefClass = DefMCID.getSchedClass();
3577
157k
  unsigned UseClass = UseMCID.getSchedClass();
3578
157k
3579
157k
  if (
DefIdx < DefMCID.getNumDefs() && 157k
UseIdx < UseMCID.getNumOperands()156k
)
3580
154k
    return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3581
2.41k
3582
2.41k
  // This may be a def / use of a variable_ops instruction, the operand
3583
2.41k
  // latency might be determinable dynamically. Let the target try to
3584
2.41k
  // figure it out.
3585
2.41k
  int DefCycle = -1;
3586
2.41k
  bool LdmBypass = false;
3587
2.41k
  switch (DefMCID.getOpcode()) {
3588
1.77k
  default:
3589
1.77k
    DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3590
1.77k
    break;
3591
2.41k
3592
221
  case ARM::VLDMDIA:
3593
221
  case ARM::VLDMDIA_UPD:
3594
221
  case ARM::VLDMDDB_UPD:
3595
221
  case ARM::VLDMSIA:
3596
221
  case ARM::VLDMSIA_UPD:
3597
221
  case ARM::VLDMSDB_UPD:
3598
221
    DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3599
221
    break;
3600
221
3601
412
  case ARM::LDMIA_RET:
3602
412
  case ARM::LDMIA:
3603
412
  case ARM::LDMDA:
3604
412
  case ARM::LDMDB:
3605
412
  case ARM::LDMIB:
3606
412
  case ARM::LDMIA_UPD:
3607
412
  case ARM::LDMDA_UPD:
3608
412
  case ARM::LDMDB_UPD:
3609
412
  case ARM::LDMIB_UPD:
3610
412
  case ARM::tLDMIA:
3611
412
  case ARM::tLDMIA_UPD:
3612
412
  case ARM::tPUSH:
3613
412
  case ARM::t2LDMIA_RET:
3614
412
  case ARM::t2LDMIA:
3615
412
  case ARM::t2LDMDB:
3616
412
  case ARM::t2LDMIA_UPD:
3617
412
  case ARM::t2LDMDB_UPD:
3618
412
    LdmBypass = true;
3619
412
    DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3620
412
    break;
3621
2.41k
  }
3622
2.41k
3623
2.41k
  
if (2.41k
DefCycle == -12.41k
)
3624
2.41k
    // We can't seem to determine the result latency of the def, assume it's 2.
3625
198
    DefCycle = 2;
3626
2.41k
3627
2.41k
  int UseCycle = -1;
3628
2.41k
  switch (UseMCID.getOpcode()) {
3629
1.91k
  default:
3630
1.91k
    UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
3631
1.91k
    break;
3632
2.41k
3633
19
  case ARM::VSTMDIA:
3634
19
  case ARM::VSTMDIA_UPD:
3635
19
  case ARM::VSTMDDB_UPD:
3636
19
  case ARM::VSTMSIA:
3637
19
  case ARM::VSTMSIA_UPD:
3638
19
  case ARM::VSTMSDB_UPD:
3639
19
    UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3640
19
    break;
3641
19
3642
474
  case ARM::STMIA:
3643
474
  case ARM::STMDA:
3644
474
  case ARM::STMDB:
3645
474
  case ARM::STMIB:
3646
474
  case ARM::STMIA_UPD:
3647
474
  case ARM::STMDA_UPD:
3648
474
  case ARM::STMDB_UPD:
3649
474
  case ARM::STMIB_UPD:
3650
474
  case ARM::tSTMIA_UPD:
3651
474
  case ARM::tPOP_RET:
3652
474
  case ARM::tPOP:
3653
474
  case ARM::t2STMIA:
3654
474
  case ARM::t2STMDB:
3655
474
  case ARM::t2STMIA_UPD:
3656
474
  case ARM::t2STMDB_UPD:
3657
474
    UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3658
474
    break;
3659
2.41k
  }
3660
2.41k
3661
2.41k
  
if (2.41k
UseCycle == -12.41k
)
3662
2.41k
    // Assume it's read in the first stage.
3663
1.41k
    UseCycle = 1;
3664
2.41k
3665
2.41k
  UseCycle = DefCycle - UseCycle + 1;
3666
2.41k
  if (
UseCycle > 02.41k
) {
3667
2.16k
    if (
LdmBypass2.16k
) {
3668
410
      // It's a variable_ops instruction so we can't use DefIdx here. Just use
3669
410
      // first def operand.
3670
410
      if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
3671
410
                                          UseClass, UseIdx))
3672
0
        --UseCycle;
3673
2.16k
    } else 
if (1.75k
ItinData->hasPipelineForwarding(DefClass, DefIdx,
3674
1.75k
                                               UseClass, UseIdx)) {
3675
0
      --UseCycle;
3676
0
    }
3677
2.16k
  }
3678
157k
3679
157k
  return UseCycle;
3680
157k
}
3681
3682
static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
3683
                                           const MachineInstr *MI, unsigned Reg,
3684
1.99k
                                           unsigned &DefIdx, unsigned &Dist) {
3685
1.99k
  Dist = 0;
3686
1.99k
3687
1.99k
  MachineBasicBlock::const_iterator I = MI; ++I;
3688
1.99k
  MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
3689
1.99k
  assert(II->isInsideBundle() && "Empty bundle?");
3690
1.99k
3691
1.99k
  int Idx = -1;
3692
2.07k
  while (
II->isInsideBundle()2.07k
) {
3693
2.07k
    Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
3694
2.07k
    if (Idx != -1)
3695
1.99k
      break;
3696
85
    --II;
3697
85
    ++Dist;
3698
85
  }
3699
1.99k
3700
1.99k
  assert(Idx != -1 && "Cannot find bundled definition!");
3701
1.99k
  DefIdx = Idx;
3702
1.99k
  return &*II;
3703
1.99k
}
3704
3705
static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
3706
                                           const MachineInstr &MI, unsigned Reg,
3707
6.59k
                                           unsigned &UseIdx, unsigned &Dist) {
3708
6.59k
  Dist = 0;
3709
6.59k
3710
6.59k
  MachineBasicBlock::const_instr_iterator II = ++MI.getIterator();
3711
6.59k
  assert(II->isInsideBundle() && "Empty bundle?");
3712
6.59k
  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
3713
6.59k
3714
6.59k
  // FIXME: This doesn't properly handle multiple uses.
3715
6.59k
  int Idx = -1;
3716
13.5k
  while (
II != E && 13.5k
II->isInsideBundle()13.5k
) {
3717
13.3k
    Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
3718
13.3k
    if (Idx != -1)
3719
6.47k
      break;
3720
6.92k
    
if (6.92k
II->getOpcode() != ARM::t2IT6.92k
)
3721
330
      ++Dist;
3722
13.3k
    ++II;
3723
13.3k
  }
3724
6.59k
3725
6.59k
  if (
Idx == -16.59k
) {
3726
112
    Dist = 0;
3727
112
    return nullptr;
3728
112
  }
3729
6.47k
3730
6.47k
  UseIdx = Idx;
3731
6.47k
  return &*II;
3732
6.47k
}
3733
3734
/// Return the number of cycles to add to (or subtract from) the static
3735
/// itinerary based on the def opcode and alignment. The caller will ensure that
3736
/// adjusted latency is at least one cycle.
3737
static int adjustDefLatency(const ARMSubtarget &Subtarget,
3738
                            const MachineInstr &DefMI,
3739
407k
                            const MCInstrDesc &DefMCID, unsigned DefAlign) {
3740
407k
  int Adjust = 0;
3741
407k
  if (
Subtarget.isCortexA8() || 407k
Subtarget.isLikeA9()390k
||
Subtarget.isCortexA7()385k
) {
3742
137k
    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
3743
137k
    // variants are one cycle cheaper.
3744
137k
    switch (DefMCID.getOpcode()) {
3745
136k
    default: break;
3746
58
    case ARM::LDRrs:
3747
58
    case ARM::LDRBrs: {
3748
58
      unsigned ShOpVal = DefMI.getOperand(3).getImm();
3749
58
      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3750
58
      if (ShImm == 0 ||
3751
43
          
(ShImm == 2 && 43
ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl27
))
3752
42
        --Adjust;
3753
58
      break;
3754
58
    }
3755
1.16k
    case ARM::t2LDRs:
3756
1.16k
    case ARM::t2LDRBs:
3757
1.16k
    case ARM::t2LDRHs:
3758
1.16k
    case ARM::t2LDRSHs: {
3759
1.16k
      // Thumb2 mode: lsl only.
3760
1.16k
      unsigned ShAmt = DefMI.getOperand(3).getImm();
3761
1.16k
      if (
ShAmt == 0 || 1.16k
ShAmt == 2533
)
3762
1.09k
        --Adjust;
3763
1.16k
      break;
3764
407k
    }
3765
137k
    }
3766
269k
  } else 
if (269k
Subtarget.isSwift()269k
) {
3767
46
    // FIXME: Properly handle all of the latency adjustments for address
3768
46
    // writeback.
3769
46
    switch (DefMCID.getOpcode()) {
3770
46
    default: break;
3771
0
    case ARM::LDRrs:
3772
0
    case ARM::LDRBrs: {
3773
0
      unsigned ShOpVal = DefMI.getOperand(3).getImm();
3774
0
      bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3775
0
      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3776
0
      if (!isSub &&
3777
0
          (ShImm == 0 ||
3778
0
           
((ShImm == 1 || 0
ShImm == 20
||
ShImm == 30
) &&
3779
0
            ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3780
0
        Adjust -= 2;
3781
0
      else 
if (0
!isSub &&
3782
0
               
ShImm == 10
&&
ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr0
)
3783
0
        --Adjust;
3784
0
      break;
3785
0
    }
3786
0
    case ARM::t2LDRs:
3787
0
    case ARM::t2LDRBs:
3788
0
    case ARM::t2LDRHs:
3789
0
    case ARM::t2LDRSHs: {
3790
0
      // Thumb2 mode: lsl only.
3791
0
      unsigned ShAmt = DefMI.getOperand(3).getImm();
3792
0
      if (
ShAmt == 0 || 0
ShAmt == 10
||
ShAmt == 20
||
ShAmt == 30
)
3793
0
        Adjust -= 2;
3794
0
      break;
3795
407k
    }
3796
269k
    }
3797
269k
  }
3798
407k
3799
407k
  
if (407k
DefAlign < 8 && 407k
Subtarget.checkVLDnAccessAlignment()386k
) {
3800
4.32k
    switch (DefMCID.getOpcode()) {
3801
4.09k
    default: break;
3802
235
    case ARM::VLD1q8:
3803
235
    case ARM::VLD1q16:
3804
235
    case ARM::VLD1q32:
3805
235
    case ARM::VLD1q64:
3806
235
    case ARM::VLD1q8wb_fixed:
3807
235
    case ARM::VLD1q16wb_fixed:
3808
235
    case ARM::VLD1q32wb_fixed:
3809
235
    case ARM::VLD1q64wb_fixed:
3810
235
    case ARM::VLD1q8wb_register:
3811
235
    case ARM::VLD1q16wb_register:
3812
235
    case ARM::VLD1q32wb_register:
3813
235
    case ARM::VLD1q64wb_register:
3814
235
    case ARM::VLD2d8:
3815
235
    case ARM::VLD2d16:
3816
235
    case ARM::VLD2d32:
3817
235
    case ARM::VLD2q8:
3818
235
    case ARM::VLD2q16:
3819
235
    case ARM::VLD2q32:
3820
235
    case ARM::VLD2d8wb_fixed:
3821
235
    case ARM::VLD2d16wb_fixed:
3822
235
    case ARM::VLD2d32wb_fixed:
3823
235
    case ARM::VLD2q8wb_fixed:
3824
235
    case ARM::VLD2q16wb_fixed:
3825
235
    case ARM::VLD2q32wb_fixed:
3826
235
    case ARM::VLD2d8wb_register:
3827
235
    case ARM::VLD2d16wb_register:
3828
235
    case ARM::VLD2d32wb_register:
3829
235
    case ARM::VLD2q8wb_register:
3830
235
    case ARM::VLD2q16wb_register:
3831
235
    case ARM::VLD2q32wb_register:
3832
235
    case ARM::VLD3d8:
3833
235
    case ARM::VLD3d16:
3834
235
    case ARM::VLD3d32:
3835
235
    case ARM::VLD1d64T:
3836
235
    case ARM::VLD3d8_UPD:
3837
235
    case ARM::VLD3d16_UPD:
3838
235
    case ARM::VLD3d32_UPD:
3839
235
    case ARM::VLD1d64Twb_fixed:
3840
235
    case ARM::VLD1d64Twb_register:
3841
235
    case ARM::VLD3q8_UPD:
3842
235
    case ARM::VLD3q16_UPD:
3843
235
    case ARM::VLD3q32_UPD:
3844
235
    case ARM::VLD4d8:
3845
235
    case ARM::VLD4d16:
3846
235
    case ARM::VLD4d32:
3847
235
    case ARM::VLD1d64Q:
3848
235
    case ARM::VLD4d8_UPD:
3849
235
    case ARM::VLD4d16_UPD:
3850
235
    case ARM::VLD4d32_UPD:
3851
235
    case ARM::VLD1d64Qwb_fixed:
3852
235
    case ARM::VLD1d64Qwb_register:
3853
235
    case ARM::VLD4q8_UPD:
3854
235
    case ARM::VLD4q16_UPD:
3855
235
    case ARM::VLD4q32_UPD:
3856
235
    case ARM::VLD1DUPq8:
3857
235
    case ARM::VLD1DUPq16:
3858
235
    case ARM::VLD1DUPq32:
3859
235
    case ARM::VLD1DUPq8wb_fixed:
3860
235
    case ARM::VLD1DUPq16wb_fixed:
3861
235
    case ARM::VLD1DUPq32wb_fixed:
3862
235
    case ARM::VLD1DUPq8wb_register:
3863
235
    case ARM::VLD1DUPq16wb_register:
3864
235
    case ARM::VLD1DUPq32wb_register:
3865
235
    case ARM::VLD2DUPd8:
3866
235
    case ARM::VLD2DUPd16:
3867
235
    case ARM::VLD2DUPd32:
3868
235
    case ARM::VLD2DUPd8wb_fixed:
3869
235
    case ARM::VLD2DUPd16wb_fixed:
3870
235
    case ARM::VLD2DUPd32wb_fixed:
3871
235
    case ARM::VLD2DUPd8wb_register:
3872
235
    case ARM::VLD2DUPd16wb_register:
3873
235
    case ARM::VLD2DUPd32wb_register:
3874
235
    case ARM::VLD4DUPd8:
3875
235
    case ARM::VLD4DUPd16:
3876
235
    case ARM::VLD4DUPd32:
3877
235
    case ARM::VLD4DUPd8_UPD:
3878
235
    case ARM::VLD4DUPd16_UPD:
3879
235
    case ARM::VLD4DUPd32_UPD:
3880
235
    case ARM::VLD1LNd8:
3881
235
    case ARM::VLD1LNd16:
3882
235
    case ARM::VLD1LNd32:
3883
235
    case ARM::VLD1LNd8_UPD:
3884
235
    case ARM::VLD1LNd16_UPD:
3885
235
    case ARM::VLD1LNd32_UPD:
3886
235
    case ARM::VLD2LNd8:
3887
235
    case ARM::VLD2LNd16:
3888
235
    case ARM::VLD2LNd32:
3889
235
    case ARM::VLD2LNq16:
3890
235
    case ARM::VLD2LNq32:
3891
235
    case ARM::VLD2LNd8_UPD:
3892
235
    case ARM::VLD2LNd16_UPD:
3893
235
    case ARM::VLD2LNd32_UPD:
3894
235
    case ARM::VLD2LNq16_UPD:
3895
235
    case ARM::VLD2LNq32_UPD:
3896
235
    case ARM::VLD4LNd8:
3897
235
    case ARM::VLD4LNd16:
3898
235
    case ARM::VLD4LNd32:
3899
235
    case ARM::VLD4LNq16:
3900
235
    case ARM::VLD4LNq32:
3901
235
    case ARM::VLD4LNd8_UPD:
3902
235
    case ARM::VLD4LNd16_UPD:
3903
235
    case ARM::VLD4LNd32_UPD:
3904
235
    case ARM::VLD4LNq16_UPD:
3905
235
    case ARM::VLD4LNq32_UPD:
3906
235
      // If the address is not 64-bit aligned, the latencies of these
3907
235
      // instructions increases by one.
3908
235
      ++Adjust;
3909
235
      break;
3910
407k
    }
3911
407k
  }
3912
407k
  return Adjust;
3913
407k
}
3914
3915
int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
3916
                                        const MachineInstr &DefMI,
3917
                                        unsigned DefIdx,
3918
                                        const MachineInstr &UseMI,
3919
116k
                                        unsigned UseIdx) const {
3920
116k
  // No operand latency. The caller may fall back to getInstrLatency.
3921
116k
  if (
!ItinData || 116k
ItinData->isEmpty()116k
)
3922
0
    return -1;
3923
116k
3924
116k
  const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
3925
116k
  unsigned Reg = DefMO.getReg();
3926
116k
3927
116k
  const MachineInstr *ResolvedDefMI = &DefMI;
3928
116k
  unsigned DefAdj = 0;
3929
116k
  if (DefMI.isBundle())
3930
1.99k
    ResolvedDefMI =
3931
1.99k
        getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
3932
116k
  if (
ResolvedDefMI->isCopyLike() || 116k
ResolvedDefMI->isInsertSubreg()116k
||
3933
116k
      
ResolvedDefMI->isRegSequence()116k
||
ResolvedDefMI->isImplicitDef()116k
) {
3934
60
    return 1;
3935
60
  }
3936
116k
3937
116k
  const MachineInstr *ResolvedUseMI = &UseMI;
3938
116k
  unsigned UseAdj = 0;
3939
116k
  if (
UseMI.isBundle()116k
) {
3940
6.59k
    ResolvedUseMI =
3941
6.59k
        getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
3942
6.59k
    if (!ResolvedUseMI)
3943
112
      return -1;
3944
116k
  }
3945
116k
3946
116k
  return getOperandLatencyImpl(
3947
116k
      ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
3948
116k
      Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
3949
116k
}
3950
3951
int ARMBaseInstrInfo::getOperandLatencyImpl(
3952
    const InstrItineraryData *ItinData, const MachineInstr &DefMI,
3953
    unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
3954
    const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
3955
116k
    unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
3956
116k
  if (
Reg == ARM::CPSR116k
) {
3957
5.46k
    if (
DefMI.getOpcode() == ARM::FMSTAT5.46k
) {
3958
580
      // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
3959
580
      return Subtarget.isLikeA9() ? 
15
:
20575
;
3960
580
    }
3961
4.88k
3962
4.88k
    // CPSR set and branch can be paired in the same cycle.
3963
4.88k
    
if (4.88k
UseMI.isBranch()4.88k
)
3964
0
      return 0;
3965
4.88k
3966
4.88k
    // Otherwise it takes the instruction latency (generally one).
3967
4.88k
    unsigned Latency = getInstrLatency(ItinData, DefMI);
3968
4.88k
3969
4.88k
    // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
3970
4.88k
    // its uses. Instructions which are otherwise scheduled between them may
3971
4.88k
    // incur a code size penalty (not able to use the CPSR setting 16-bit
3972
4.88k
    // instructions).
3973
4.88k
    if (
Latency > 0 && 4.88k
Subtarget.isThumb2()4.87k
) {
3974
4.04k
      const MachineFunction *MF = DefMI.getParent()->getParent();
3975
4.04k
      // FIXME: Use Function::optForSize().
3976
4.04k
      if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
3977
947
        --Latency;
3978
4.04k
    }
3979
5.46k
    return Latency;
3980
5.46k
  }
3981
110k
3982
110k
  
if (110k
DefMO.isImplicit() || 110k
UseMI.getOperand(UseIdx).isImplicit()98.6k
)
3983
18.6k
    return -1;
3984
91.9k
3985
91.9k
  unsigned DefAlign = DefMI.hasOneMemOperand()
3986
20.2k
                          ? (*DefMI.memoperands_begin())->getAlignment()
3987
71.6k
                          : 0;
3988
91.9k
  unsigned UseAlign = UseMI.hasOneMemOperand()
3989
15.8k
                          ? (*UseMI.memoperands_begin())->getAlignment()
3990
76.0k
                          : 0;
3991
91.9k
3992
91.9k
  // Get the itinerary's latency if possible, and handle variable_ops.
3993
91.9k
  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID,
3994
91.9k
                                  UseIdx, UseAlign);
3995
91.9k
  // Unable to find operand latency. The caller may resort to getInstrLatency.
3996
91.9k
  if (Latency < 0)
3997
17.7k
    return Latency;
3998
74.2k
3999
74.2k
  // Adjust for IT block position.
4000
74.2k
  int Adj = DefAdj + UseAdj;
4001
74.2k
4002
74.2k
  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4003
74.2k
  Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4004
74.2k
  if (
Adj >= 0 || 74.2k
(int)Latency > -Adj349
) {
4005
74.2k
    return Latency + Adj;
4006
74.2k
  }
4007
0
  // Return the itinerary latency, which may be zero but not less than zero.
4008
0
  return Latency;
4009
0
}
4010
4011
int
4012
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
4013
                                    SDNode *DefNode, unsigned DefIdx,
4014
204k
                                    SDNode *UseNode, unsigned UseIdx) const {
4015
204k
  if (!DefNode->isMachineOpcode())
4016
78.3k
    return 1;
4017
125k
4018
125k
  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4019
125k
4020
125k
  if (isZeroCost(DefMCID.Opcode))
4021
7.00k
    return 0;
4022
118k
4023
118k
  
if (118k
!ItinData || 118k
ItinData->isEmpty()118k
)
4024
7.71k
    
return DefMCID.mayLoad() ? 7.71k
3465
:
17.24k
;
4025
110k
4026
110k
  
if (110k
!UseNode->isMachineOpcode()110k
) {
4027
45.8k
    int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4028
45.8k
    int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4029
45.8k
    int Threshold = 1 + Adj;
4030
45.8k
    return Latency <= Threshold ? 
137.7k
:
Latency - Adj8.08k
;
4031
45.8k
  }
4032
65.1k
4033
65.1k
  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4034
65.1k
  const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
4035
65.1k
  unsigned DefAlign = !DefMN->memoperands_empty()
4036
65.1k
    ? 
(*DefMN->memoperands_begin())->getAlignment()16.6k
:
048.5k
;
4037
65.1k
  const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
4038
65.1k
  unsigned UseAlign = !UseMN->memoperands_empty()
4039
65.1k
    ? 
(*UseMN->memoperands_begin())->getAlignment()19.6k
:
045.4k
;
4040
65.1k
  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
4041
65.1k
                                  UseMCID, UseIdx, UseAlign);
4042
65.1k
4043
65.1k
  if (Latency > 1 &&
4044
27.5k
      
(Subtarget.isCortexA8() || 27.5k
Subtarget.isLikeA9()26.5k
||
4045
65.1k
       
Subtarget.isCortexA7()25.9k
)) {
4046
9.47k
    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4047
9.47k
    // variants are one cycle cheaper.
4048
9.47k
    switch (DefMCID.getOpcode()) {
4049
9.09k
    default: break;
4050
9
    case ARM::LDRrs:
4051
9
    case ARM::LDRBrs: {
4052
9
      unsigned ShOpVal =
4053
9
        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4054
9
      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4055
9
      if (ShImm == 0 ||
4056
7
          
(ShImm == 2 && 7
ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl7
))
4057
9
        --Latency;
4058
9
      break;
4059
9
    }
4060
374
    case ARM::t2LDRs:
4061
374
    case ARM::t2LDRBs:
4062
374
    case ARM::t2LDRHs:
4063
374
    case ARM::t2LDRSHs: {
4064
374
      // Thumb2 mode: lsl only.
4065
374
      unsigned ShAmt =
4066
374
        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4067
374
      if (
ShAmt == 0 || 374
ShAmt == 2163
)
4068
349
        --Latency;
4069
374
      break;
4070
65.1k
    }
4071
9.47k
    }
4072
55.6k
  } else 
if (55.6k
DefIdx == 0 && 55.6k
Latency > 254.8k
&&
Subtarget.isSwift()6.91k
) {
4073
0
    // FIXME: Properly handle all of the latency adjustments for address
4074
0
    // writeback.
4075
0
    switch (DefMCID.getOpcode()) {
4076
0
    default: break;
4077
0
    case ARM::LDRrs:
4078
0
    case ARM::LDRBrs: {
4079
0
      unsigned ShOpVal =
4080
0
        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4081
0
      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4082
0
      if (ShImm == 0 ||
4083
0
          
((ShImm == 1 || 0
ShImm == 20
||
ShImm == 30
) &&
4084
0
           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4085
0
        Latency -= 2;
4086
0
      else 
if (0
ShImm == 1 && 0
ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr0
)
4087
0
        --Latency;
4088
0
      break;
4089
0
    }
4090
0
    case ARM::t2LDRs:
4091
0
    case ARM::t2LDRBs:
4092
0
    case ARM::t2LDRHs:
4093
0
    case ARM::t2LDRSHs:
4094
0
      // Thumb2 mode: lsl 0-3 only.
4095
0
      Latency -= 2;
4096
0
      break;
4097
65.1k
    }
4098
65.1k
  }
4099
65.1k
4100
65.1k
  
if (65.1k
DefAlign < 8 && 65.1k
Subtarget.checkVLDnAccessAlignment()59.4k
)
4101
981
    switch (DefMCID.getOpcode()) {
4102
962
    default: break;
4103
19
    case ARM::VLD1q8:
4104
19
    case ARM::VLD1q16:
4105
19
    case ARM::VLD1q32:
4106
19
    case ARM::VLD1q64:
4107
19
    case ARM::VLD1q8wb_register:
4108
19
    case ARM::VLD1q16wb_register:
4109
19
    case ARM::VLD1q32wb_register:
4110
19
    case ARM::VLD1q64wb_register:
4111
19
    case ARM::VLD1q8wb_fixed:
4112
19
    case ARM::VLD1q16wb_fixed:
4113
19
    case ARM::VLD1q32wb_fixed:
4114
19
    case ARM::VLD1q64wb_fixed:
4115
19
    case ARM::VLD2d8:
4116
19
    case ARM::VLD2d16:
4117
19
    case ARM::VLD2d32:
4118
19
    case ARM::VLD2q8Pseudo:
4119
19
    case ARM::VLD2q16Pseudo:
4120
19
    case ARM::VLD2q32Pseudo:
4121
19
    case ARM::VLD2d8wb_fixed:
4122
19
    case ARM::VLD2d16wb_fixed:
4123
19
    case ARM::VLD2d32wb_fixed:
4124
19
    case ARM::VLD2q8PseudoWB_fixed:
4125
19
    case ARM::VLD2q16PseudoWB_fixed:
4126
19
    case ARM::VLD2q32PseudoWB_fixed:
4127
19
    case ARM::VLD2d8wb_register:
4128
19
    case ARM::VLD2d16wb_register:
4129
19
    case ARM::VLD2d32wb_register:
4130
19
    case ARM::VLD2q8PseudoWB_register:
4131
19
    case ARM::VLD2q16PseudoWB_register:
4132
19
    case ARM::VLD2q32PseudoWB_register:
4133
19
    case ARM::VLD3d8Pseudo:
4134
19
    case ARM::VLD3d16Pseudo:
4135
19
    case ARM::VLD3d32Pseudo:
4136
19
    case ARM::VLD1d64TPseudo:
4137
19
    case ARM::VLD1d64TPseudoWB_fixed:
4138
19
    case ARM::VLD3d8Pseudo_UPD:
4139
19
    case ARM::VLD3d16Pseudo_UPD:
4140
19
    case ARM::VLD3d32Pseudo_UPD:
4141
19
    case ARM::VLD3q8Pseudo_UPD:
4142
19
    case ARM::VLD3q16Pseudo_UPD:
4143
19
    case ARM::VLD3q32Pseudo_UPD:
4144
19
    case ARM::VLD3q8oddPseudo:
4145
19
    case ARM::VLD3q16oddPseudo:
4146
19
    case ARM::VLD3q32oddPseudo:
4147
19
    case ARM::VLD3q8oddPseudo_UPD:
4148
19
    case ARM::VLD3q16oddPseudo_UPD:
4149
19
    case ARM::VLD3q32oddPseudo_UPD:
4150
19
    case ARM::VLD4d8Pseudo:
4151
19
    case ARM::VLD4d16Pseudo:
4152
19
    case ARM::VLD4d32Pseudo:
4153
19
    case ARM::VLD1d64QPseudo:
4154
19
    case ARM::VLD1d64QPseudoWB_fixed:
4155
19
    case ARM::VLD4d8Pseudo_UPD:
4156
19
    case ARM::VLD4d16Pseudo_UPD:
4157
19
    case ARM::VLD4d32Pseudo_UPD:
4158
19
    case ARM::VLD4q8Pseudo_UPD:
4159
19
    case ARM::VLD4q16Pseudo_UPD:
4160
19
    case ARM::VLD4q32Pseudo_UPD:
4161
19
    case ARM::VLD4q8oddPseudo:
4162
19
    case ARM::VLD4q16oddPseudo:
4163
19
    case ARM::VLD4q32oddPseudo:
4164
19
    case ARM::VLD4q8oddPseudo_UPD:
4165
19
    case ARM::VLD4q16oddPseudo_UPD:
4166
19
    case ARM::VLD4q32oddPseudo_UPD:
4167
19
    case ARM::VLD1DUPq8:
4168
19
    case ARM::VLD1DUPq16:
4169
19
    case ARM::VLD1DUPq32:
4170
19
    case ARM::VLD1DUPq8wb_fixed:
4171
19
    case ARM::VLD1DUPq16wb_fixed:
4172
19
    case ARM::VLD1DUPq32wb_fixed:
4173
19
    case ARM::VLD1DUPq8wb_register:
4174
19
    case ARM::VLD1DUPq16wb_register:
4175
19
    case ARM::VLD1DUPq32wb_register:
4176
19
    case ARM::VLD2DUPd8:
4177
19
    case ARM::VLD2DUPd16:
4178
19
    case ARM::VLD2DUPd32:
4179
19
    case ARM::VLD2DUPd8wb_fixed:
4180
19
    case ARM::VLD2DUPd16wb_fixed:
4181
19
    case ARM::VLD2DUPd32wb_fixed:
4182
19
    case ARM::VLD2DUPd8wb_register:
4183
19
    case ARM::VLD2DUPd16wb_register:
4184
19
    case ARM::VLD2DUPd32wb_register:
4185
19
    case ARM::VLD4DUPd8Pseudo:
4186
19
    case ARM::VLD4DUPd16Pseudo:
4187
19
    case ARM::VLD4DUPd32Pseudo:
4188
19
    case ARM::VLD4DUPd8Pseudo_UPD:
4189
19
    case ARM::VLD4DUPd16Pseudo_UPD:
4190
19
    case ARM::VLD4DUPd32Pseudo_UPD:
4191
19
    case ARM::VLD1LNq8Pseudo:
4192
19
    case ARM::VLD1LNq16Pseudo:
4193
19
    case ARM::VLD1LNq32Pseudo:
4194
19
    case ARM::VLD1LNq8Pseudo_UPD:
4195
19
    case ARM::VLD1LNq16Pseudo_UPD:
4196
19
    case ARM::VLD1LNq32Pseudo_UPD:
4197
19
    case ARM::VLD2LNd8Pseudo:
4198
19
    case ARM::VLD2LNd16Pseudo:
4199
19
    case ARM::VLD2LNd32Pseudo:
4200
19
    case ARM::VLD2LNq16Pseudo:
4201
19
    case ARM::VLD2LNq32Pseudo:
4202
19
    case ARM::VLD2LNd8Pseudo_UPD:
4203
19
    case ARM::VLD2LNd16Pseudo_UPD:
4204
19
    case ARM::VLD2LNd32Pseudo_UPD:
4205
19
    case ARM::VLD2LNq16Pseudo_UPD:
4206
19
    case ARM::VLD2LNq32Pseudo_UPD:
4207
19
    case ARM::VLD4LNd8Pseudo:
4208
19
    case ARM::VLD4LNd16Pseudo:
4209
19
    case ARM::VLD4LNd32Pseudo:
4210
19
    case ARM::VLD4LNq16Pseudo:
4211
19
    case ARM::VLD4LNq32Pseudo:
4212
19
    case ARM::VLD4LNd8Pseudo_UPD:
4213
19
    case ARM::VLD4LNd16Pseudo_UPD:
4214
19
    case ARM::VLD4LNd32Pseudo_UPD:
4215
19
    case ARM::VLD4LNq16Pseudo_UPD:
4216
19
    case ARM::VLD4LNq32Pseudo_UPD:
4217
19
      // If the address is not 64-bit aligned, the latencies of these
4218
19
      // instructions increases by one.
4219
19
      ++Latency;
4220
19
      break;
4221
65.1k
    }
4222
65.1k
4223
65.1k
  return Latency;
4224
65.1k
}
4225
4226
152k
unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4227
152k
  if (
MI.isCopyLike() || 152k
MI.isInsertSubreg()152k
||
MI.isRegSequence()152k
||
4228
152k
      MI.isImplicitDef())
4229
10
    return 0;
4230
152k
4231
152k
  
if (152k
MI.isBundle()152k
)
4232
27
    return 0;
4233
152k
4234
152k
  const MCInstrDesc &MCID = MI.getDesc();
4235
152k
4236
152k
  if (
MCID.isCall() || 152k
(MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4237
152k
                        
!Subtarget.cheapPredicableCPSRDef()18.8k
)) {
4238
24.9k
    // When predicated, CPSR is an additional source operand for CPSR updating
4239
24.9k
    // instructions, this apparently increases their latencies.
4240
24.9k
    return 1;
4241
24.9k
  }
4242
127k
  return 0;
4243
127k
}
4244
4245
unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4246
                                           const MachineInstr &MI,
4247
349k
                                           unsigned *PredCost) const {
4248
349k
  if (
MI.isCopyLike() || 349k
MI.isInsertSubreg()349k
||
MI.isRegSequence()349k
||
4249
349k
      MI.isImplicitDef())
4250
60
    return 1;
4251
349k
4252
349k
  // An instruction scheduler typically runs on unbundled instructions, however
4253
349k
  // other passes may query the latency of a bundled instruction.
4254
349k
  
if (349k
MI.isBundle()349k
) {
4255
8.16k
    unsigned Latency = 0;
4256
8.16k
    MachineBasicBlock::const_instr_iterator I = MI.getIterator();
4257
8.16k
    MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4258
26.7k
    while (
++I != E && 26.7k
I->isInsideBundle()26.1k
) {
4259
18.6k
      if (I->getOpcode() != ARM::t2IT)
4260
10.6k
        Latency += getInstrLatency(ItinData, *I, PredCost);
4261
18.6k
    }
4262
8.16k
    return Latency;
4263
8.16k
  }
4264
341k
4265
341k
  const MCInstrDesc &MCID = MI.getDesc();
4266
341k
  if (
PredCost && 341k
(MCID.isCall() || 0
(MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4267
341k
                                     
!Subtarget.cheapPredicableCPSRDef()0
))) {
4268
0
    // When predicated, CPSR is an additional source operand for CPSR updating
4269
0
    // instructions, this apparently increases their latencies.
4270
0
    *PredCost = 1;
4271
0
  }
4272
341k
  // Be sure to call getStageLatency for an empty itinerary in case it has a
4273
341k
  // valid MinLatency property.
4274
341k
  if (!ItinData)
4275
0
    
return MI.mayLoad() ? 0
30
:
10
;
4276
341k
4277
341k
  unsigned Class = MCID.getSchedClass();
4278
341k
4279
341k
  // For instructions with variable uops, use uops as latency.
4280
341k
  if (
!ItinData->isEmpty() && 341k
ItinData->getNumMicroOps(Class) < 0324k
)
4281
7.99k
    return getNumMicroOps(ItinData, MI);
4282
333k
4283
333k
  // For the common case, fall back on the itinerary's latency.
4284
333k
  unsigned Latency = ItinData->getStageLatency(Class);
4285
333k
4286
333k
  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4287
333k
  unsigned DefAlign =
4288
333k
      MI.hasOneMemOperand() ? 
(*MI.memoperands_begin())->getAlignment()54.6k
:
0278k
;
4289
333k
  int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4290
333k
  if (
Adj >= 0 || 333k
(int)Latency > -Adj789
) {
4291
333k
    return Latency + Adj;
4292
333k
  }
4293
31
  return Latency;
4294
31
}
4295
4296
int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4297
194k
                                      SDNode *Node) const {
4298
194k
  if (!Node->isMachineOpcode())
4299
0
    return 1;
4300
194k
4301
194k
  
if (194k
!ItinData || 194k
ItinData->isEmpty()194k
)
4302
0
    return 1;
4303
194k
4304
194k
  unsigned Opcode = Node->getMachineOpcode();
4305
194k
  switch (Opcode) {
4306
194k
  default:
4307
194k
    return ItinData->getStageLatency(get(Opcode).getSchedClass());
4308
2
  case ARM::VLDMQIA:
4309
2
  case ARM::VSTMQIA:
4310
2
    return 2;
4311
0
  }
4312
0
}
4313
4314
bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4315
                                             const MachineRegisterInfo *MRI,
4316
                                             const MachineInstr &DefMI,
4317
                                             unsigned DefIdx,
4318
                                             const MachineInstr &UseMI,
4319
230
                                             unsigned UseIdx) const {
4320
230
  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4321
230
  unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4322
230
  if (Subtarget.nonpipelinedVFP() &&
4323
25
      
(DDomain == ARMII::DomainVFP || 25
UDomain == ARMII::DomainVFP25
))
4324
0
    return true;
4325
230
4326
230
  // Hoist VFP / NEON instructions with 4 or higher latency.
4327
230
  unsigned Latency =
4328
230
      SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4329
230
  if (Latency <= 3)
4330
203
    return false;
4331
27
  
return DDomain == ARMII::DomainVFP || 27
DDomain == ARMII::DomainNEON27
||
4332
27
         
UDomain == ARMII::DomainVFP9
||
UDomain == ARMII::DomainNEON9
;
4333
230
}
4334
4335
bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4336
                                        const MachineInstr &DefMI,
4337
1.80k
                                        unsigned DefIdx) const {
4338
1.80k
  const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4339
1.80k
  if (
!ItinData || 1.80k
ItinData->isEmpty()1.33k
)
4340
470
    return false;
4341
1.33k
4342
1.33k
  unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4343
1.33k
  if (
DDomain == ARMII::DomainGeneral1.33k
) {
4344
1.28k
    unsigned DefClass = DefMI.getDesc().getSchedClass();
4345
1.28k
    int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4346
1.27k
    return (DefCycle != -1 && DefCycle <= 2);
4347
1.28k
  }
4348
51
  return false;
4349
51
}
4350
4351
bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4352
1.20M
                                         StringRef &ErrInfo) const {
4353
1.20M
  if (
convertAddSubFlagsOpcode(MI.getOpcode())1.20M
) {
4354
0
    ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4355
0
    return false;
4356
0
  }
4357
1.20M
  return true;
4358
1.20M
}
4359
4360
// LoadStackGuard has so far only been implemented for MachO. Different code
4361
// sequence is needed for other targets.
4362
void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
4363
                                                unsigned LoadImmOpc,
4364
146
                                                unsigned LoadOpc) const {
4365
146
  assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4366
146
         "ROPI/RWPI not currently supported with stack guard");
4367
146
4368
146
  MachineBasicBlock &MBB = *MI->getParent();
4369
146
  DebugLoc DL = MI->getDebugLoc();
4370
146
  unsigned Reg = MI->getOperand(0).getReg();
4371
146
  const GlobalValue *GV =
4372
146
      cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4373
146
  MachineInstrBuilder MIB;
4374
146
4375
146
  BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4376
146
      .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
4377
146
4378
146
  if (
Subtarget.isGVIndirectSymbol(GV)146
) {
4379
138
    MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4380
138
    MIB.addReg(Reg, RegState::Kill).addImm(0);
4381
138
    auto Flags = MachineMemOperand::MOLoad |
4382
138
                 MachineMemOperand::MODereferenceable |
4383
138
                 MachineMemOperand::MOInvariant;
4384
138
    MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
4385
138
        MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
4386
138
    MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
4387
138
  }
4388
146
4389
146
  MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4390
146
  MIB.addReg(Reg, RegState::Kill)
4391
146
     .addImm(0)
4392
146
     .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
4393
146
     .add(predOps(ARMCC::AL));
4394
146
}
4395
4396
bool
4397
ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4398
                                     unsigned &AddSubOpc,
4399
604
                                     bool &NegAcc, bool &HasLane) const {
4400
604
  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
4401
604
  if (I == MLxEntryMap.end())
4402
588
    return false;
4403
16
4404
16
  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4405
16
  MulOpc = Entry.MulOpc;
4406
16
  AddSubOpc = Entry.AddSubOpc;
4407
16
  NegAcc = Entry.NegAcc;
4408
16
  HasLane = Entry.HasLane;
4409
16
  return true;
4410
16
}
4411
4412
//===----------------------------------------------------------------------===//
4413
// Execution domains.
4414
//===----------------------------------------------------------------------===//
4415
//
4416
// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
4417
// and some can go down both.  The vmov instructions go down the VFP pipeline,
4418
// but they can be changed to vorr equivalents that are executed by the NEON
4419
// pipeline.
4420
//
4421
// We use the following execution domain numbering:
4422
//
4423
enum ARMExeDomain {
4424
  ExeGeneric = 0,
4425
  ExeVFP = 1,
4426
  ExeNEON = 2
4427
};
4428
4429
//
4430
// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
4431
//
4432
std::pair<uint16_t, uint16_t>
4433
292k
ARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const {
4434
292k
  // If we don't have access to NEON instructions then we won't be able
4435
292k
  // to swizzle anything to the NEON domain. Check to make sure.
4436
292k
  if (
Subtarget.hasNEON()292k
) {
4437
224k
    // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
4438
224k
    // if they are not predicated.
4439
224k
    if (
MI.getOpcode() == ARM::VMOVD && 224k
!isPredicated(MI)733
)
4440
733
      return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4441
224k
4442
224k
    // CortexA9 is particularly picky about mixing the two and wants these
4443
224k
    // converted.
4444
224k
    
if (224k
Subtarget.useNEONForFPMovs() && 224k
!isPredicated(MI)1.55k
&&
4445
1.52k
        
(MI.getOpcode() == ARM::VMOVRS || 1.52k
MI.getOpcode() == ARM::VMOVSR1.46k
||
4446
1.38k
         MI.getOpcode() == ARM::VMOVS))
4447
150
      return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4448
291k
  }
4449
291k
  // No other instructions can be swizzled, so just determine their domain.
4450
291k
  unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
4451
291k
4452
291k
  if (Domain & ARMII::DomainNEON)
4453
19.1k
    return std::make_pair(ExeNEON, 0);
4454
272k
4455
272k
  // Certain instructions can go either way on Cortex-A8.
4456
272k
  // Treat them as NEON instructions.
4457
272k
  
if (272k
(Domain & ARMII::DomainNEONA8) && 272k
Subtarget.isCortexA8()1.34k
)
4458
124
    return std::make_pair(ExeNEON, 0);
4459
272k
4460
272k
  
if (272k
Domain & ARMII::DomainVFP272k
)
4461
6.54k
    return std::make_pair(ExeVFP, 0);
4462
265k
4463
265k
  return std::make_pair(ExeGeneric, 0);
4464
265k
}
4465
4466
static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
4467
71
                                            unsigned SReg, unsigned &Lane) {
4468
71
  unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
4469
71
  Lane = 0;
4470
71
4471
71
  if (DReg != ARM::NoRegister)
4472
46
   return DReg;
4473
25
4474
25
  Lane = 1;
4475
25
  DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
4476
25
4477
25
  assert(DReg && "S-register with no D super-register?");
4478
25
  return DReg;
4479
25
}
4480
4481
/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
4482
/// set ImplicitSReg to a register number that must be marked as implicit-use or
4483
/// zero if no register needs to be defined as implicit-use.
4484
///
4485
/// If the function cannot determine if an SPR should be marked implicit use or
4486
/// not, it returns false.
4487
///
4488
/// This function handles cases where an instruction is being modified from taking
4489
/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
4490
/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4491
/// lane of the DPR).
4492
///
4493
/// If the other SPR is defined, an implicit-use of it should be added. Else,
4494
/// (including the case where the DPR itself is defined), it should not.
4495
///
4496
static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
4497
                                       MachineInstr &MI, unsigned DReg,
4498
22
                                       unsigned Lane, unsigned &ImplicitSReg) {
4499
22
  // If the DPR is defined or used already, the other SPR lane will be chained
4500
22
  // correctly, so there is nothing to be done.
4501
22
  if (
MI.definesRegister(DReg, TRI) || 22
MI.readsRegister(DReg, TRI)11
) {
4502
13
    ImplicitSReg = 0;
4503
13
    return true;
4504
13
  }
4505
9
4506
9
  // Otherwise we need to go searching to see if the SPR is set explicitly.
4507
9
  ImplicitSReg = TRI->getSubReg(DReg,
4508
9
                                (Lane & 1) ? 
ARM::ssub_02
:
ARM::ssub_17
);
4509
9
  MachineBasicBlock::LivenessQueryResult LQR =
4510
9
      MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
4511
9
4512
9
  if (LQR == MachineBasicBlock::LQR_Live)
4513
0
    return true;
4514
9
  else 
if (9
LQR == MachineBasicBlock::LQR_Unknown9
)
4515
0
    return false;
4516
9
4517
9
  // If the register is known not to be live, there is no need to add an
4518
9
  // implicit-use.
4519
9
  ImplicitSReg = 0;
4520
9
  return true;
4521
9
}
4522
4523
void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
4524
883
                                          unsigned Domain) const {
4525
883
  unsigned DstReg, SrcReg, DReg;
4526
883
  unsigned Lane;
4527
883
  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4528
883
  const TargetRegisterInfo *TRI = &getRegisterInfo();
4529
883
  switch (MI.getOpcode()) {
4530
0
  default:
4531
0
    llvm_unreachable("cannot handle opcode!");
4532
0
    break;
4533
733
  case ARM::VMOVD:
4534
733
    if (Domain != ExeNEON)
4535
297
      break;
4536
436
4537
436
    // Zap the predicate operands.
4538
733
    assert(!isPredicated(MI) && "Cannot predicate a VORRd");
4539
436
4540
436
    // Make sure we've got NEON instructions.
4541
436
    assert(Subtarget.hasNEON() && "VORRd requires NEON");
4542
436
4543
436
    // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
4544
436
    DstReg = MI.getOperand(0).getReg();
4545
436
    SrcReg = MI.getOperand(1).getReg();
4546
436
4547
2.18k
    for (unsigned i = MI.getDesc().getNumOperands(); 
i2.18k
;
--i1.74k
)
4548
1.74k
      MI.RemoveOperand(i - 1);
4549
436
4550
436
    // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
4551
436
    MI.setDesc(get(ARM::VORRd));
4552
436
    MIB.addReg(DstReg, RegState::Define)
4553
436
        .addReg(SrcReg)
4554
436
        .addReg(SrcReg)
4555
436
        .add(predOps(ARMCC::AL));
4556
436
    break;
4557
58
  case ARM::VMOVRS:
4558
58
    if (Domain != ExeNEON)
4559
21
      break;
4560
58
    assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
4561
37
4562
37
    // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
4563
37
    DstReg = MI.getOperand(0).getReg();
4564
37
    SrcReg = MI.getOperand(1).getReg();
4565
37
4566
185
    for (unsigned i = MI.getDesc().getNumOperands(); 
i185
;
--i148
)
4567
148
      MI.RemoveOperand(i - 1);
4568
37
4569
37
    DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
4570
37
4571
37
    // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
4572
37
    // Note that DSrc has been widened and the other lane may be undef, which
4573
37
    // contaminates the entire register.
4574
37
    MI.setDesc(get(ARM::VGETLNi32));
4575
37
    MIB.addReg(DstReg, RegState::Define)
4576
37
        .addReg(DReg, RegState::Undef)
4577
37
        .addImm(Lane)
4578
37
        .add(predOps(ARMCC::AL));
4579
37
4580
37
    // The old source should be an implicit use, otherwise we might think it
4581
37
    // was dead before here.
4582
37
    MIB.addReg(SrcReg, RegState::Implicit);
4583
37
    break;
4584
77
  case ARM::VMOVSR: {
4585
77
    if (Domain != ExeNEON)
4586
67
      break;
4587
77
    assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
4588
10
4589
10
    // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
4590
10
    DstReg = MI.getOperand(0).getReg();
4591
10
    SrcReg = MI.getOperand(1).getReg();
4592
10
4593
10
    DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
4594
10
4595
10
    unsigned ImplicitSReg;
4596
10
    if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
4597
0
      break;
4598
10
4599
50
    
for (unsigned i = MI.getDesc().getNumOperands(); 10
i50
;
--i40
)
4600
40
      MI.RemoveOperand(i - 1);
4601
10
4602
10
    // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
4603
10
    // Again DDst may be undefined at the beginning of this instruction.
4604
10
    MI.setDesc(get(ARM::VSETLNi32));
4605
10
    MIB.addReg(DReg, RegState::Define)
4606
10
        .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
4607
10
        .addReg(SrcReg)
4608
10
        .addImm(Lane)
4609
10
        .add(predOps(ARMCC::AL));
4610
10
4611
10
    // The narrower destination must be marked as set to keep previous chains
4612
10
    // in place.
4613
10
    MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4614
10
    if (ImplicitSReg != 0)
4615
0
      MIB.addReg(ImplicitSReg, RegState::Implicit);
4616
10
    break;
4617
10
    }
4618
15
    case ARM::VMOVS: {
4619
15
      if (Domain != ExeNEON)
4620
3
        break;
4621
12
4622
12
      // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
4623
12
      DstReg = MI.getOperand(0).getReg();
4624
12
      SrcReg = MI.getOperand(1).getReg();
4625
12
4626
12
      unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
4627
12
      DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
4628
12
      DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
4629
12
4630
12
      unsigned ImplicitSReg;
4631
12
      if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
4632
0
        break;
4633
12
4634
60
      
for (unsigned i = MI.getDesc().getNumOperands(); 12
i60
;
--i48
)
4635
48
        MI.RemoveOperand(i - 1);
4636
12
4637
12
      if (
DSrc == DDst12
) {
4638
1
        // Destination can be:
4639
1
        //     %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
4640
1
        MI.setDesc(get(ARM::VDUPLN32d));
4641
1
        MIB.addReg(DDst, RegState::Define)
4642
1
            .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
4643
1
            .addImm(SrcLane)
4644
1
            .add(predOps(ARMCC::AL));
4645
1
4646
1
        // Neither the source or the destination are naturally represented any
4647
1
        // more, so add them in manually.
4648
1
        MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
4649
1
        MIB.addReg(SrcReg, RegState::Implicit);
4650
1
        if (ImplicitSReg != 0)
4651
0
          MIB.addReg(ImplicitSReg, RegState::Implicit);
4652
1
        break;
4653
1
      }
4654
11
4655
11
      // In general there's no single instruction that can perform an S <-> S
4656
11
      // move in NEON space, but a pair of VEXT instructions *can* do the
4657
11
      // job. It turns out that the VEXTs needed will only use DSrc once, with
4658
11
      // the position based purely on the combination of lane-0 and lane-1
4659
11
      // involved. For example
4660
11
      //     vmov s0, s2 -> vext.32 d0, d0, d1, #1  vext.32 d0, d0, d0, #1
4661
11
      //     vmov s1, s3 -> vext.32 d0, d1, d0, #1  vext.32 d0, d0, d0, #1
4662
11
      //     vmov s0, s3 -> vext.32 d0, d0, d0, #1  vext.32 d0, d1, d0, #1
4663
11
      //     vmov s1, s2 -> vext.32 d0, d0, d0, #1  vext.32 d0, d0, d1, #1
4664
11
      //
4665
11
      // Pattern of the MachineInstrs is:
4666
11
      //     %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
4667
11
      MachineInstrBuilder NewMIB;
4668
11
      NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
4669
11
                       DDst);
4670
11
4671
11
      // On the first instruction, both DSrc and DDst may be <undef> if present.
4672
11
      // Specifically when the original instruction didn't have them as an
4673
11
      // <imp-use>.
4674
11
      unsigned CurReg = SrcLane == 1 && 
DstLane == 12
?
DSrc1
:
DDst10
;
4675
11
      bool CurUndef = !MI.readsRegister(CurReg, TRI);
4676
11
      NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
4677
11
4678
11
      CurReg = SrcLane == 0 && 
DstLane == 09
?
DSrc6
:
DDst5
;
4679
11
      CurUndef = !MI.readsRegister(CurReg, TRI);
4680
11
      NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
4681
11
            .addImm(1)
4682
11
            .add(predOps(ARMCC::AL));
4683
11
4684
11
      if (SrcLane == DstLane)
4685
7
        NewMIB.addReg(SrcReg, RegState::Implicit);
4686
11
4687
11
      MI.setDesc(get(ARM::VEXTd32));
4688
11
      MIB.addReg(DDst, RegState::Define);
4689
11
4690
11
      // On the second instruction, DDst has definitely been defined above, so
4691
11
      // it is not <undef>. DSrc, if present, can be <undef> as above.
4692
11
      CurReg = SrcLane == 1 && 
DstLane == 02
?
DSrc1
:
DDst10
;
4693
1
      CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
4694
11
      MIB.addReg(CurReg, getUndefRegState(CurUndef));
4695
11
4696
11
      CurReg = SrcLane == 0 && 
DstLane == 19
?
DSrc3
:
DDst8
;
4697
3
      CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
4698
11
      MIB.addReg(CurReg, getUndefRegState(CurUndef))
4699
11
         .addImm(1)
4700
11
         .add(predOps(ARMCC::AL));
4701
11
4702
11
      if (SrcLane != DstLane)
4703
4
        MIB.addReg(SrcReg, RegState::Implicit);
4704
11
4705
11
      // As before, the original destination is no longer represented, add it
4706
11
      // implicitly.
4707
11
      MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4708
11
      if (ImplicitSReg != 0)
4709
0
        MIB.addReg(ImplicitSReg, RegState::Implicit);
4710
733
      break;
4711
733
    }
4712
883
  }
4713
883
}
4714
4715
//===----------------------------------------------------------------------===//
4716
// Partial register updates
4717
//===----------------------------------------------------------------------===//
4718
//
4719
// Swift renames NEON registers with 64-bit granularity.  That means any
4720
// instruction writing an S-reg implicitly reads the containing D-reg.  The
4721
// problem is mostly avoided by translating f32 operations to v2f32 operations
4722
// on D-registers, but f32 loads are still a problem.
4723
//
4724
// These instructions can load an f32 into a NEON register:
4725
//
4726
// VLDRS - Only writes S, partial D update.
4727
// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
4728
// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
4729
//
4730
// FCONSTD can be used as a dependency-breaking instruction.
4731
unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance(
4732
    const MachineInstr &MI, unsigned OpNum,
4733
29.0k
    const TargetRegisterInfo *TRI) const {
4734
29.0k
  auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
4735
29.0k
  if (!PartialUpdateClearance)
4736
26.1k
    return 0;
4737
2.87k
4738
29.0k
  assert(TRI && "Need TRI instance");
4739
2.87k
4740
2.87k
  const MachineOperand &MO = MI.getOperand(OpNum);
4741
2.87k
  if (MO.readsReg())
4742
0
    return 0;
4743
2.87k
  unsigned Reg = MO.getReg();
4744
2.87k
  int UseOp = -1;
4745
2.87k
4746
2.87k
  switch (MI.getOpcode()) {
4747
2.87k
  // Normal instructions writing only an S-register.
4748
215
  case ARM::VLDRS:
4749
215
  case ARM::FCONSTS:
4750
215
  case ARM::VMOVSR:
4751
215
  case ARM::VMOVv8i8:
4752
215
  case ARM::VMOVv4i16:
4753
215
  case ARM::VMOVv2i32:
4754
215
  case ARM::VMOVv2f32:
4755
215
  case ARM::VMOVv1i64:
4756
215
    UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI);
4757
215
    break;
4758
215
4759
215
    // Explicitly reads the dependency.
4760
20
  case ARM::VLD1LNd32:
4761
20
    UseOp = 3;
4762
20
    break;
4763
2.63k
  default:
4764
2.63k
    return 0;
4765
235
  }
4766
235
4767
235
  // If this instruction actually reads a value from Reg, there is no unwanted
4768
235
  // dependency.
4769
235
  
if (235
UseOp != -1 && 235
MI.getOperand(UseOp).readsReg()28
)
4770
9
    return 0;
4771
226
4772
226
  // We must be able to clobber the whole D-reg.
4773
226
  
if (226
TargetRegisterInfo::isVirtualRegister(Reg)226
) {
4774
0
    // Virtual register must be a foo:ssub_0<def,undef> operand.
4775
0
    if (
!MO.getSubReg() || 0
MI.readsVirtualRegister(Reg)0
)
4776
0
      return 0;
4777
226
  } else 
if (226
ARM::SPRRegClass.contains(Reg)226
) {
4778
37
    // Physical register: MI must define the full D-reg.
4779
37
    unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
4780
37
                                             &ARM::DPRRegClass);
4781
37
    if (
!DReg || 37
!MI.definesRegister(DReg, TRI)32
)
4782
22
      return 0;
4783
204
  }
4784
204
4785
204
  // MI has an unwanted D-register dependency.
4786
204
  // Avoid defs in the previous N instructrions.
4787
204
  return PartialUpdateClearance;
4788
204
}
4789
4790
// Break a partial register dependency after getPartialRegUpdateClearance
4791
// returned non-zero.
4792
void ARMBaseInstrInfo::breakPartialRegDependency(
4793
68
    MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
4794
68
  assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
4795
68
  assert(TRI && "Need TRI instance");
4796
68
4797
68
  const MachineOperand &MO = MI.getOperand(OpNum);
4798
68
  unsigned Reg = MO.getReg();
4799
68
  assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
4800
68
         "Can't break virtual register dependencies.");
4801
68
  unsigned DReg = Reg;
4802
68
4803
68
  // If MI defines an S-reg, find the corresponding D super-register.
4804
68
  if (
ARM::SPRRegClass.contains(Reg)68
) {
4805
0
    DReg = ARM::D0 + (Reg - ARM::S0) / 2;
4806
0
    assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
4807
0
  }
4808
68
4809
68
  assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
4810
68
  assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
4811
68
4812
68
  // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
4813
68
  // the full D-register by loading the same value to both lanes.  The
4814
68
  // instruction is micro-coded with 2 uops, so don't do this until we can
4815
68
  // properly schedule micro-coded instructions.  The dispatcher stalls cause
4816
68
  // too big regressions.
4817
68
4818
68
  // Insert the dependency-breaking FCONSTD before MI.
4819
68
  // 96 is the encoding of 0.5, but the actual value doesn't matter here.
4820
68
  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
4821
68
      .addImm(96)
4822
68
      .add(predOps(ARMCC::AL));
4823
68
  MI.addRegisterKilled(DReg, TRI, true);
4824
68
}
4825
4826
20
bool ARMBaseInstrInfo::hasNOP() const {
4827
20
  return Subtarget.getFeatureBits()[ARM::HasV6KOps];
4828
20
}
4829
4830
8.79k
bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
4831
8.79k
  if (MI->getNumOperands() < 4)
4832
0
    return true;
4833
8.79k
  unsigned ShOpVal = MI->getOperand(3).getImm();
4834
8.79k
  unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
4835
8.79k
  // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
4836
8.79k
  if (
(ShImm == 1 && 8.79k
ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr1.39k
) ||
4837
8.77k
      
((ShImm == 1 || 8.77k
ShImm == 27.39k
) &&
4838
4.70k
       ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
4839
4.22k
    return true;
4840
4.56k
4841
4.56k
  return false;
4842
4.56k
}
4843
4844
bool ARMBaseInstrInfo::getRegSequenceLikeInputs(
4845
    const MachineInstr &MI, unsigned DefIdx,
4846
2.25k
    SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
4847
2.25k
  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
4848
2.25k
  assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
4849
2.25k
4850
2.25k
  switch (MI.getOpcode()) {
4851
2.25k
  case ARM::VMOVDRR:
4852
2.25k
    // dX = VMOVDRR rY, rZ
4853
2.25k
    // is the same as:
4854
2.25k
    // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
4855
2.25k
    // Populate the InputRegs accordingly.
4856
2.25k
    // rY
4857
2.25k
    const MachineOperand *MOReg = &MI.getOperand(1);
4858
2.25k
    InputRegs.push_back(
4859
2.25k
        RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0));
4860
2.25k
    // rZ
4861
2.25k
    MOReg = &MI.getOperand(2);
4862
2.25k
    InputRegs.push_back(
4863
2.25k
        RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1));
4864
2.25k
    return true;
4865
0
  }
4866
0
  
llvm_unreachable0
("Target dependent opcode missing");
4867
0
}
4868
4869
bool ARMBaseInstrInfo::getExtractSubregLikeInputs(
4870
    const MachineInstr &MI, unsigned DefIdx,
4871
3.59k
    RegSubRegPairAndIdx &InputReg) const {
4872
3.59k
  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
4873
3.59k
  assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
4874
3.59k
4875
3.59k
  switch (MI.getOpcode()) {
4876
3.59k
  case ARM::VMOVRRD:
4877
3.59k
    // rX, rY = VMOVRRD dZ
4878
3.59k
    // is the same as:
4879
3.59k
    // rX = EXTRACT_SUBREG dZ, ssub_0
4880
3.59k
    // rY = EXTRACT_SUBREG dZ, ssub_1
4881
3.59k
    const MachineOperand &MOReg = MI.getOperand(2);
4882
3.59k
    InputReg.Reg = MOReg.getReg();
4883
3.59k
    InputReg.SubReg = MOReg.getSubReg();
4884
3.59k
    InputReg.SubIdx = DefIdx == 0 ? 
ARM::ssub_03.36k
:
ARM::ssub_1229
;
4885
3.59k
    return true;
4886
0
  }
4887
0
  
llvm_unreachable0
("Target dependent opcode missing");
4888
0
}
4889
4890
bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
4891
    const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
4892
207
    RegSubRegPairAndIdx &InsertedReg) const {
4893
207
  assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
4894
207
  assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
4895
207
4896
207
  switch (MI.getOpcode()) {
4897
207
  case ARM::VSETLNi32:
4898
207
    // dX = VSETLNi32 dY, rZ, imm
4899
207
    const MachineOperand &MOBaseReg = MI.getOperand(1);
4900
207
    const MachineOperand &MOInsertedReg = MI.getOperand(2);
4901
207
    const MachineOperand &MOIndex = MI.getOperand(3);
4902
207
    BaseReg.Reg = MOBaseReg.getReg();
4903
207
    BaseReg.SubReg = MOBaseReg.getSubReg();
4904
207
4905
207
    InsertedReg.Reg = MOInsertedReg.getReg();
4906
207
    InsertedReg.SubReg = MOInsertedReg.getSubReg();
4907
207
    InsertedReg.SubIdx = MOIndex.getImm() == 0 ? 
ARM::ssub_0107
:
ARM::ssub_1100
;
4908
207
    return true;
4909
0
  }
4910
0
  
llvm_unreachable0
("Target dependent opcode missing");
4911
0
}