Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
10
#include "ARM.h"
11
#include "ARMBaseInstrInfo.h"
12
#include "ARMSubtarget.h"
13
#include "MCTargetDesc/ARMBaseInfo.h"
14
#include "Thumb2InstrInfo.h"
15
#include "llvm/ADT/DenseMap.h"
16
#include "llvm/ADT/PostOrderIterator.h"
17
#include "llvm/ADT/STLExtras.h"
18
#include "llvm/ADT/SmallSet.h"
19
#include "llvm/ADT/SmallVector.h"
20
#include "llvm/ADT/Statistic.h"
21
#include "llvm/ADT/StringRef.h"
22
#include "llvm/CodeGen/MachineBasicBlock.h"
23
#include "llvm/CodeGen/MachineFunction.h"
24
#include "llvm/CodeGen/MachineFunctionPass.h"
25
#include "llvm/CodeGen/MachineInstr.h"
26
#include "llvm/CodeGen/MachineInstrBuilder.h"
27
#include "llvm/CodeGen/MachineOperand.h"
28
#include "llvm/IR/DebugLoc.h"
29
#include "llvm/IR/Function.h"
30
#include "llvm/MC/MCInstrDesc.h"
31
#include "llvm/MC/MCRegisterInfo.h"
32
#include "llvm/Support/CommandLine.h"
33
#include "llvm/Support/Compiler.h"
34
#include "llvm/Support/Debug.h"
35
#include "llvm/Support/ErrorHandling.h"
36
#include "llvm/Support/raw_ostream.h"
37
#include "llvm/Target/TargetInstrInfo.h"
38
#include <algorithm>
39
#include <cassert>
40
#include <cstdint>
41
#include <functional>
42
#include <iterator>
43
#include <utility>
44
45
using namespace llvm;
46
47
#define DEBUG_TYPE "t2-reduce-size"
48
49
STATISTIC(NumNarrows,  "Number of 32-bit instrs reduced to 16-bit ones");
50
STATISTIC(Num2Addrs,   "Number of 32-bit instrs reduced to 2addr 16-bit ones");
51
STATISTIC(NumLdSts,    "Number of 32-bit load / store reduced to 16-bit ones");
52
53
static cl::opt<int> ReduceLimit("t2-reduce-limit",
54
                                cl::init(-1), cl::Hidden);
55
static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
56
                                     cl::init(-1), cl::Hidden);
57
static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
58
                                     cl::init(-1), cl::Hidden);
59
60
namespace {
61
62
  /// ReduceTable - A static table with information on mapping from wide
63
  /// opcodes to narrow
64
  struct ReduceEntry {
65
    uint16_t WideOpc;      // Wide opcode
66
    uint16_t NarrowOpc1;   // Narrow opcode to transform to
67
    uint16_t NarrowOpc2;   // Narrow opcode when it's two-address
68
    uint8_t  Imm1Limit;    // Limit of immediate field (bits)
69
    uint8_t  Imm2Limit;    // Limit of immediate field when it's two-address
70
    unsigned LowRegs1 : 1; // Only possible if low-registers are used
71
    unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
72
    unsigned PredCC1  : 2; // 0 - If predicated, cc is on and vice versa.
73
                           // 1 - No cc field.
74
                           // 2 - Always set CPSR.
75
    unsigned PredCC2  : 2;
76
    unsigned PartFlag : 1; // 16-bit instruction does partial flag update
77
    unsigned Special  : 1; // Needs to be dealt with specially
78
    unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
79
  };
80
81
  static const ReduceEntry ReduceTable[] = {
82
  // Wide,        Narrow1,      Narrow2,     imm1,imm2, lo1, lo2, P/C,PF,S,AM
83
  { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,   0,   1,  0,0, 0,0,0 },
84
  { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,   1,   1,  0,0, 0,1,0 },
85
  { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,   1,   0,  0,1, 0,0,0 },
86
  { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,   1,   1,  2,2, 0,1,0 },
87
  { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,   1,   0,  2,0, 0,1,0 },
88
  { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,   0,   1,  0,0, 1,0,0 },
89
  { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
90
  { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,   0,   1,  0,0, 1,0,1 },
91
  { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,   0,   1,  0,0, 1,0,0 },
92
  //FIXME: Disable CMN, as CCodes are backwards from compare expectations
93
  //{ ARM::t2CMNrr, ARM::tCMN,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
94
  { ARM::t2CMNzrr, ARM::tCMNz,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
95
  { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,   1,   0,  2,0, 0,0,0 },
96
  { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,   0,   0,  2,0, 0,1,0 },
97
  { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,   0,   1,  0,0, 1,0,0 },
98
  // FIXME: adr.n immediate offset must be multiple of 4.
99
  //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0,   0,   0,   1,   0,  1,0, 0,0,0 },
100
  { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
101
  { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,   0,   1,  0,0, 1,0,1 },
102
  { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
103
  { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,   0,   1,  0,0, 1,0,1 },
104
  { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,   1,   0,  0,0, 1,0,0 },
105
  { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,   1,   0,  0,0, 1,1,0 },
106
  // FIXME: Do we need the 16-bit 'S' variant?
107
  { ARM::t2MOVr,ARM::tMOVr,     0,             0,   0,   0,   0,  1,0, 0,0,0 },
108
  { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,   0,   1,  0,0, 1,0,0 },
109
  { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,   1,   0,  0,0, 0,0,0 },
110
  { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,   0,   1,  0,0, 1,0,0 },
111
  { ARM::t2REV,   ARM::tREV,    0,             0,   0,   1,   0,  1,0, 0,0,0 },
112
  { ARM::t2REV16, ARM::tREV16,  0,             0,   0,   1,   0,  1,0, 0,0,0 },
113
  { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,   1,   0,  1,0, 0,0,0 },
114
  { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,   0,   1,  0,0, 1,0,0 },
115
  { ARM::t2RSBri, ARM::tRSB,    0,             0,   0,   1,   0,  0,0, 0,1,0 },
116
  { ARM::t2RSBSri,ARM::tRSB,    0,             0,   0,   1,   0,  2,0, 0,1,0 },
117
  { ARM::t2SBCrr, 0,            ARM::tSBC,     0,   0,   0,   1,  0,0, 0,0,0 },
118
  { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,   1,   1,  0,0, 0,0,0 },
119
  { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,   1,   0,  0,0, 0,0,0 },
120
  { ARM::t2SUBSri,ARM::tSUBi3,  ARM::tSUBi8,   3,   8,   1,   1,  2,2, 0,0,0 },
121
  { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
122
  { ARM::t2SXTB,  ARM::tSXTB,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
123
  { ARM::t2SXTH,  ARM::tSXTH,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
124
  { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,   1,   0,  2,0, 0,0,0 },
125
  { ARM::t2UXTB,  ARM::tUXTB,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
126
  { ARM::t2UXTH,  ARM::tUXTH,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
127
128
  // FIXME: Clean this up after splitting each Thumb load / store opcode
129
  // into multiple ones.
130
  { ARM::t2LDRi12,ARM::tLDRi,   ARM::tLDRspi,  5,   8,   1,   0,  0,0, 0,1,0 },
131
  { ARM::t2LDRs,  ARM::tLDRr,   0,             0,   0,   1,   0,  0,0, 0,1,0 },
132
  { ARM::t2LDRBi12,ARM::tLDRBi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
133
  { ARM::t2LDRBs, ARM::tLDRBr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
134
  { ARM::t2LDRHi12,ARM::tLDRHi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
135
  { ARM::t2LDRHs, ARM::tLDRHr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
136
  { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
137
  { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
138
  { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0,         0,   0,   1,   0,  0,0, 0,1,0 },
139
  { ARM::t2STRi12,ARM::tSTRi,   ARM::tSTRspi,  5,   8,   1,   0,  0,0, 0,1,0 },
140
  { ARM::t2STRs,  ARM::tSTRr,   0,             0,   0,   1,   0,  0,0, 0,1,0 },
141
  { ARM::t2STRBi12,ARM::tSTRBi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
142
  { ARM::t2STRBs, ARM::tSTRBr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
143
  { ARM::t2STRHi12,ARM::tSTRHi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
144
  { ARM::t2STRHs, ARM::tSTRHr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
145
  { ARM::t2STR_POST,ARM::tSTMIA_UPD,0,         0,   0,   1,   0,  0,0, 0,1,0 },
146
147
  { ARM::t2LDMIA, ARM::tLDMIA,  0,             0,   0,   1,   1,  1,1, 0,1,0 },
148
  { ARM::t2LDMIA_RET,0,         ARM::tPOP_RET, 0,   0,   1,   1,  1,1, 0,1,0 },
149
  { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0,   0,   1,   1,  1,1, 0,1,0 },
150
  // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent.
151
  // tSTMIA_UPD is a change in semantics which can only be used if the base
152
  // register is killed. This difference is correctly handled elsewhere.
153
  { ARM::t2STMIA, ARM::tSTMIA_UPD, 0,          0,   0,   1,   1,  1,1, 0,1,0 },
154
  { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0,       0,   0,   1,   1,  1,1, 0,1,0 },
155
  { ARM::t2STMDB_UPD, 0,        ARM::tPUSH,    0,   0,   1,   1,  1,1, 0,1,0 }
156
  };
157
158
  class Thumb2SizeReduce : public MachineFunctionPass {
159
  public:
160
    static char ID;
161
162
    const Thumb2InstrInfo *TII;
163
    const ARMSubtarget *STI;
164
165
    Thumb2SizeReduce(std::function<bool(const Function &)> Ftor);
166
167
    bool runOnMachineFunction(MachineFunction &MF) override;
168
169
8.50k
    MachineFunctionProperties getRequiredProperties() const override {
170
8.50k
      return MachineFunctionProperties().set(
171
8.50k
          MachineFunctionProperties::Property::NoVRegs);
172
8.50k
    }
173
174
8.50k
    StringRef getPassName() const override {
175
8.50k
      return "Thumb2 instruction size reduction pass";
176
8.50k
    }
177
178
  private:
179
    /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
180
    DenseMap<unsigned, unsigned> ReduceOpcodeMap;
181
182
    bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop);
183
184
    bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
185
                         bool is2Addr, ARMCC::CondCodes Pred,
186
                         bool LiveCPSR, bool &HasCC, bool &CCDead);
187
188
    bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
189
                         const ReduceEntry &Entry);
190
191
    bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
192
                       const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop);
193
194
    /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
195
    /// instruction.
196
    bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
197
                       const ReduceEntry &Entry, bool LiveCPSR,
198
                       bool IsSelfLoop);
199
200
    /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
201
    /// non-two-address instruction.
202
    bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
203
                        const ReduceEntry &Entry, bool LiveCPSR,
204
                        bool IsSelfLoop);
205
206
    /// ReduceMI - Attempt to reduce MI, return true on success.
207
    bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
208
                  bool LiveCPSR, bool IsSelfLoop);
209
210
    /// ReduceMBB - Reduce width of instructions in the specified basic block.
211
    bool ReduceMBB(MachineBasicBlock &MBB);
212
213
    bool OptimizeSize;
214
    bool MinimizeSize;
215
216
    // Last instruction to define CPSR in the current block.
217
    MachineInstr *CPSRDef;
218
    // Was CPSR last defined by a high latency instruction?
219
    // When CPSRDef is null, this refers to CPSR defs in predecessors.
220
    bool HighLatencyCPSR;
221
222
    struct MBBInfo {
223
      // The flags leaving this block have high latency.
224
      bool HighLatencyCPSR = false;
225
      // Has this block been visited yet?
226
      bool Visited = false;
227
228
52.9k
      MBBInfo() = default;
229
    };
230
231
    SmallVector<MBBInfo, 8> BlockInfo;
232
233
    std::function<bool(const Function &)> PredicateFtor;
234
  };
235
236
  char Thumb2SizeReduce::ID = 0;
237
238
} // end anonymous namespace
239
240
Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
241
8.51k
    : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
242
8.51k
  OptimizeSize = MinimizeSize = false;
243
527k
  for (unsigned i = 0, e = array_lengthof(ReduceTable); 
i != e527k
;
++i519k
) {
244
519k
    unsigned FromOpc = ReduceTable[i].WideOpc;
245
519k
    if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
246
0
      llvm_unreachable("Duplicated entries?");
247
519k
  }
248
8.51k
}
249
250
17.8k
static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
251
17.8k
  for (const MCPhysReg *Regs = MCID.getImplicitDefs(); 
*Regs17.8k
;
++Regs0
)
252
17.8k
    
if (17.8k
*Regs == ARM::CPSR17.8k
)
253
17.8k
      return true;
254
0
  return false;
255
17.8k
}
256
257
// Check for a likely high-latency flag def.
258
52.3k
static bool isHighLatencyCPSR(MachineInstr *Def) {
259
52.3k
  switch(Def->getOpcode()) {
260
1.43k
  case ARM::FMSTAT:
261
1.43k
  case ARM::tMUL:
262
1.43k
    return true;
263
50.9k
  }
264
50.9k
  return false;
265
50.9k
}
266
267
/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
268
/// the 's' 16-bit instruction partially update CPSR. Abort the
269
/// transformation to avoid adding false dependency on last CPSR setting
270
/// instruction which hurts the ability for out-of-order execution engine
271
/// to do register renaming magic.
272
/// This function checks if there is a read-of-write dependency between the
273
/// last instruction that defines the CPSR and the current instruction. If there
274
/// is, then there is no harm done since the instruction cannot be retired
275
/// before the CPSR setting instruction anyway.
276
/// Note, we are not doing full dependency analysis here for the sake of compile
277
/// time. We're not looking for cases like:
278
/// r0 = muls ...
279
/// r1 = add.w r0, ...
280
/// ...
281
///    = mul.w r1
282
/// In this case it would have been ok to narrow the mul.w to muls since there
283
/// are indirect RAW dependency between the muls and the mul.w
284
bool
285
18.5k
Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
286
18.5k
  // Disable the check for -Oz (aka OptimizeForSizeHarder).
287
18.5k
  if (
MinimizeSize || 18.5k
!STI->avoidCPSRPartialUpdate()15.2k
)
288
14.0k
    return false;
289
4.48k
290
4.48k
  
if (4.48k
!CPSRDef4.48k
)
291
4.48k
    // If this BB loops back to itself, conservatively avoid narrowing the
292
4.48k
    // first instruction that does partial flag update.
293
3.09k
    
return HighLatencyCPSR || 3.09k
FirstInSelfLoop3.06k
;
294
1.39k
295
1.39k
  SmallSet<unsigned, 2> Defs;
296
7.21k
  for (const MachineOperand &MO : CPSRDef->operands()) {
297
7.21k
    if (
!MO.isReg() || 7.21k
MO.isUndef()4.60k
||
MO.isUse()4.60k
)
298
4.56k
      continue;
299
2.65k
    unsigned Reg = MO.getReg();
300
2.65k
    if (
Reg == 0 || 2.65k
Reg == ARM::CPSR2.65k
)
301
1.39k
      continue;
302
1.26k
    Defs.insert(Reg);
303
1.26k
  }
304
1.39k
305
6.80k
  for (const MachineOperand &MO : Use->operands()) {
306
6.80k
    if (
!MO.isReg() || 6.80k
MO.isUndef()4.23k
||
MO.isDef()4.23k
)
307
3.97k
      continue;
308
2.83k
    unsigned Reg = MO.getReg();
309
2.83k
    if (Defs.count(Reg))
310
76
      return false;
311
1.31k
  }
312
1.31k
313
1.31k
  // If the current CPSR has high latency, try to avoid the false dependency.
314
1.31k
  
if (1.31k
HighLatencyCPSR1.31k
)
315
7
    return true;
316
1.30k
317
1.30k
  // tMOVi8 usually doesn't start long dependency chains, and there are a lot
318
1.30k
  // of them, so always shrink them when CPSR doesn't have high latency.
319
1.30k
  
if (1.30k
Use->getOpcode() == ARM::t2MOVi ||
320
74
      Use->getOpcode() == ARM::t2MOVi16)
321
1.25k
    return false;
322
57
323
57
  // No read-after-write dependency. The narrowing will add false dependency.
324
57
  return true;
325
57
}
326
327
bool
328
Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
329
                                  bool is2Addr, ARMCC::CondCodes Pred,
330
50.7k
                                  bool LiveCPSR, bool &HasCC, bool &CCDead) {
331
50.7k
  if (
(is2Addr && 50.7k
Entry.PredCC2 == 07.87k
) ||
332
50.7k
      
(!is2Addr && 44.5k
Entry.PredCC1 == 042.8k
)) {
333
29.8k
    if (
Pred == ARMCC::AL29.8k
) {
334
27.0k
      // Not predicated, must set CPSR.
335
27.0k
      if (
!HasCC27.0k
) {
336
24.2k
        // Original instruction was not setting CPSR, but CPSR is not
337
24.2k
        // currently live anyway. It's ok to set it. The CPSR def is
338
24.2k
        // dead though.
339
24.2k
        if (
!LiveCPSR24.2k
) {
340
22.6k
          HasCC = true;
341
22.6k
          CCDead = true;
342
22.6k
          return true;
343
22.6k
        }
344
1.54k
        return false;
345
1.54k
      }
346
0
    } else {
347
2.76k
      // Predicated, must not set CPSR.
348
2.76k
      if (HasCC)
349
12
        return false;
350
50.7k
    }
351
20.8k
  } else 
if (20.8k
(is2Addr && 20.8k
Entry.PredCC2 == 21.71k
) ||
352
20.8k
             
(!is2Addr && 20.8k
Entry.PredCC1 == 219.1k
)) {
353
17.8k
    /// Old opcode has an optional def of CPSR.
354
17.8k
    if (HasCC)
355
0
      return true;
356
17.8k
    // If old opcode does not implicitly define CPSR, then it's not ok since
357
17.8k
    // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
358
17.8k
    
if (17.8k
!HasImplicitCPSRDef(MI->getDesc())17.8k
)
359
0
      return false;
360
17.8k
    HasCC = true;
361
20.8k
  } else {
362
3.01k
    // 16-bit instruction does not set CPSR.
363
3.01k
    if (HasCC)
364
251
      return false;
365
26.2k
  }
366
26.2k
367
26.2k
  return true;
368
26.2k
}
369
370
46.7k
static bool VerifyLowRegs(MachineInstr *MI) {
371
46.7k
  unsigned Opc = MI->getOpcode();
372
41.5k
  bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD);
373
46.7k
  bool isLROk = (Opc == ARM::t2STMDB_UPD);
374
39.8k
  bool isSPOk = isPCOk || isLROk;
375
278k
  for (unsigned i = 0, e = MI->getNumOperands(); 
i != e278k
;
++i231k
) {
376
241k
    const MachineOperand &MO = MI->getOperand(i);
377
241k
    if (
!MO.isReg() || 241k
MO.isImplicit()174k
)
378
73.2k
      continue;
379
168k
    unsigned Reg = MO.getReg();
380
168k
    if (
Reg == 0 || 168k
Reg == ARM::CPSR128k
)
381
41.0k
      continue;
382
127k
    
if (127k
isPCOk && 127k
Reg == ARM::PC37.2k
)
383
4.98k
      continue;
384
122k
    
if (122k
isLROk && 122k
Reg == ARM::LR24.2k
)
385
3.67k
      continue;
386
118k
    
if (118k
Reg == ARM::SP118k
) {
387
32.5k
      if (isSPOk)
388
22.9k
        continue;
389
9.55k
      
if (9.55k
i == 1 && 9.55k
(Opc == ARM::t2LDRi12 || 9.44k
Opc == ARM::t2STRi125.44k
))
390
9.55k
        // Special case for these ldr / str with sp as base register.
391
9.37k
        continue;
392
86.5k
    }
393
86.5k
    
if (86.5k
!isARMLowRegister(Reg)86.5k
)
394
10.1k
      return false;
395
241k
  }
396
36.5k
  return true;
397
46.7k
}
398
399
bool
400
Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
401
32.1k
                                  const ReduceEntry &Entry) {
402
32.1k
  if (
ReduceLimitLdSt != -1 && 32.1k
((int)NumLdSts >= ReduceLimitLdSt)0
)
403
0
    return false;
404
32.1k
405
32.1k
  unsigned Scale = 1;
406
32.1k
  bool HasImmOffset = false;
407
32.1k
  bool HasShift = false;
408
32.1k
  bool HasOffReg = true;
409
32.1k
  bool isLdStMul = false;
410
32.1k
  unsigned Opc = Entry.NarrowOpc1;
411
32.1k
  unsigned OpNum = 3; // First 'rest' of operands.
412
32.1k
  uint8_t  ImmLimit = Entry.Imm1Limit;
413
32.1k
414
32.1k
  switch (Entry.WideOpc) {
415
0
  default:
416
0
    llvm_unreachable("Unexpected Thumb2 load / store opcode!");
417
19.6k
  case ARM::t2LDRi12:
418
19.6k
  case ARM::t2STRi12:
419
19.6k
    if (
MI->getOperand(1).getReg() == ARM::SP19.6k
) {
420
9.37k
      Opc = Entry.NarrowOpc2;
421
9.37k
      ImmLimit = Entry.Imm2Limit;
422
9.37k
    }
423
19.6k
424
19.6k
    Scale = 4;
425
19.6k
    HasImmOffset = true;
426
19.6k
    HasOffReg = false;
427
19.6k
    break;
428
1.72k
  case ARM::t2LDRBi12:
429
1.72k
  case ARM::t2STRBi12:
430
1.72k
    HasImmOffset = true;
431
1.72k
    HasOffReg = false;
432
1.72k
    break;
433
328
  case ARM::t2LDRHi12:
434
328
  case ARM::t2STRHi12:
435
328
    Scale = 2;
436
328
    HasImmOffset = true;
437
328
    HasOffReg = false;
438
328
    break;
439
1.43k
  case ARM::t2LDRs:
440
1.43k
  case ARM::t2LDRBs:
441
1.43k
  case ARM::t2LDRHs:
442
1.43k
  case ARM::t2LDRSBs:
443
1.43k
  case ARM::t2LDRSHs:
444
1.43k
  case ARM::t2STRs:
445
1.43k
  case ARM::t2STRBs:
446
1.43k
  case ARM::t2STRHs:
447
1.43k
    HasShift = true;
448
1.43k
    OpNum = 4;
449
1.43k
    break;
450
183
  case ARM::t2LDR_POST:
451
183
  case ARM::t2STR_POST: {
452
183
    if (!MBB.getParent()->getFunction()->optForMinSize())
453
175
      return false;
454
8
455
8
    
if (8
!MI->hasOneMemOperand() ||
456
8
        (*MI->memoperands_begin())->getAlignment() < 4)
457
2
      return false;
458
6
459
6
    // We're creating a completely different type of load/store - LDM from LDR.
460
6
    // For this reason we can't reuse the logic at the end of this function; we
461
6
    // have to implement the MI building here.
462
6
    bool IsStore = Entry.WideOpc == ARM::t2STR_POST;
463
6
    unsigned Rt = MI->getOperand(IsStore ? 
13
:
03
).getReg();
464
6
    unsigned Rn = MI->getOperand(IsStore ? 
03
:
13
).getReg();
465
6
    unsigned Offset = MI->getOperand(3).getImm();
466
6
    unsigned PredImm = MI->getOperand(4).getImm();
467
6
    unsigned PredReg = MI->getOperand(5).getReg();
468
6
    assert(isARMLowRegister(Rt));
469
6
    assert(isARMLowRegister(Rn));
470
6
471
6
    if (Offset != 4)
472
2
      return false;
473
4
474
4
    // Add the 16-bit load / store instruction.
475
4
    DebugLoc dl = MI->getDebugLoc();
476
4
    auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1))
477
4
                   .addReg(Rn, RegState::Define)
478
4
                   .addReg(Rn)
479
4
                   .addImm(PredImm)
480
4
                   .addReg(PredReg)
481
4
                   .addReg(Rt, IsStore ? 
01
:
RegState::Define3
);
482
4
483
4
    // Transfer memoperands.
484
4
    MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
485
4
486
4
    // Transfer MI flags.
487
4
    MIB.setMIFlags(MI->getFlags());
488
4
489
4
    // Kill the old instruction.
490
4
    MI->eraseFromBundle();
491
4
    ++NumLdSts;
492
4
    return true;
493
4
  }
494
95
  case ARM::t2LDMIA: {
495
95
    unsigned BaseReg = MI->getOperand(0).getReg();
496
95
    assert(isARMLowRegister(BaseReg));
497
95
498
95
    // For the non-writeback version (this one), the base register must be
499
95
    // one of the registers being loaded.
500
95
    bool isOK = false;
501
360
    for (unsigned i = 3; 
i < MI->getNumOperands()360
;
++i265
) {
502
354
      if (
MI->getOperand(i).getReg() == BaseReg354
) {
503
89
        isOK = true;
504
89
        break;
505
89
      }
506
354
    }
507
95
508
95
    if (!isOK)
509
6
      return false;
510
89
511
89
    OpNum = 0;
512
89
    isLdStMul = true;
513
89
    break;
514
89
  }
515
47
  case ARM::t2STMIA:
516
47
    // If the base register is killed, we don't care what its value is after the
517
47
    // instruction, so we can use an updating STMIA.
518
47
    if (!MI->getOperand(0).isKill())
519
12
      return false;
520
35
521
35
    break;
522
4.98k
  case ARM::t2LDMIA_RET: {
523
4.98k
    unsigned BaseReg = MI->getOperand(1).getReg();
524
4.98k
    if (BaseReg != ARM::SP)
525
0
      return false;
526
4.98k
    Opc = Entry.NarrowOpc2; // tPOP_RET
527
4.98k
    OpNum = 2;
528
4.98k
    isLdStMul = true;
529
4.98k
    break;
530
4.98k
  }
531
3.68k
  case ARM::t2LDMIA_UPD:
532
3.68k
  case ARM::t2STMIA_UPD:
533
3.68k
  case ARM::t2STMDB_UPD: {
534
3.68k
    OpNum = 0;
535
3.68k
536
3.68k
    unsigned BaseReg = MI->getOperand(1).getReg();
537
3.68k
    if (BaseReg == ARM::SP &&
538
3.68k
        (Entry.WideOpc == ARM::t2LDMIA_UPD ||
539
3.68k
         
Entry.WideOpc == ARM::t2STMDB_UPD3.67k
)) {
540
3.68k
      Opc = Entry.NarrowOpc2; // tPOP or tPUSH
541
3.68k
      OpNum = 2;
542
3.68k
    } else 
if (4
!isARMLowRegister(BaseReg) ||
543
4
               (Entry.WideOpc != ARM::t2LDMIA_UPD &&
544
4
                
Entry.WideOpc != ARM::t2STMIA_UPD1
)) {
545
0
      return false;
546
0
    }
547
3.68k
548
3.68k
    isLdStMul = true;
549
3.68k
    break;
550
3.68k
  }
551
31.9k
  }
552
31.9k
553
31.9k
  unsigned OffsetReg = 0;
554
31.9k
  bool OffsetKill = false;
555
31.9k
  bool OffsetInternal = false;
556
31.9k
  if (
HasShift31.9k
) {
557
1.43k
    OffsetReg  = MI->getOperand(2).getReg();
558
1.43k
    OffsetKill = MI->getOperand(2).isKill();
559
1.43k
    OffsetInternal = MI->getOperand(2).isInternalRead();
560
1.43k
561
1.43k
    if (MI->getOperand(3).getImm())
562
1.43k
      // Thumb1 addressing mode doesn't support shift.
563
490
      return false;
564
31.4k
  }
565
31.4k
566
31.4k
  unsigned OffsetImm = 0;
567
31.4k
  if (
HasImmOffset31.4k
) {
568
21.6k
    OffsetImm = MI->getOperand(2).getImm();
569
21.6k
    unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
570
21.6k
571
21.6k
    if (
(OffsetImm & (Scale - 1)) || 21.6k
OffsetImm > MaxOffset21.6k
)
572
21.6k
      // Make sure the immediate field fits.
573
1.60k
      return false;
574
29.8k
  }
575
29.8k
576
29.8k
  // Add the 16-bit load / store instruction.
577
29.8k
  DebugLoc dl = MI->getDebugLoc();
578
29.8k
  MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
579
29.8k
580
29.8k
  // tSTMIA_UPD takes a defining register operand. We've already checked that
581
29.8k
  // the register is killed, so mark it as dead here.
582
29.8k
  if (Entry.WideOpc == ARM::t2STMIA)
583
35
    MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
584
29.8k
585
29.8k
  if (
!isLdStMul29.8k
) {
586
21.0k
    MIB.add(MI->getOperand(0));
587
21.0k
    MIB.add(MI->getOperand(1));
588
21.0k
589
21.0k
    if (HasImmOffset)
590
20.0k
      MIB.addImm(OffsetImm / Scale);
591
21.0k
592
21.0k
    assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
593
21.0k
594
21.0k
    if (HasOffReg)
595
979
      MIB.addReg(OffsetReg, getKillRegState(OffsetKill) |
596
979
                            getInternalReadRegState(OffsetInternal));
597
21.0k
  }
598
29.8k
599
29.8k
  // Transfer the rest of operands.
600
128k
  for (unsigned e = MI->getNumOperands(); 
OpNum != e128k
;
++OpNum98.7k
)
601
98.7k
    MIB.add(MI->getOperand(OpNum));
602
29.8k
603
29.8k
  // Transfer memoperands.
604
29.8k
  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
605
29.8k
606
29.8k
  // Transfer MI flags.
607
29.8k
  MIB.setMIFlags(MI->getFlags());
608
29.8k
609
29.8k
  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
610
32.1k
611
32.1k
  MBB.erase_instr(MI);
612
32.1k
  ++NumLdSts;
613
32.1k
  return true;
614
32.1k
}
615
616
bool
617
Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
618
                                const ReduceEntry &Entry,
619
61.8k
                                bool LiveCPSR, bool IsSelfLoop) {
620
61.8k
  unsigned Opc = MI->getOpcode();
621
61.8k
  if (
Opc == ARM::t2ADDri61.8k
) {
622
10.9k
    // If the source register is SP, try to reduce to tADDrSPi, otherwise
623
10.9k
    // it's a normal reduce.
624
10.9k
    if (
MI->getOperand(1).getReg() != ARM::SP10.9k
) {
625
6.11k
      if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
626
1.95k
        return true;
627
4.16k
      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
628
4.16k
    }
629
4.81k
    // Try to reduce to tADDrSPi.
630
4.81k
    unsigned Imm = MI->getOperand(2).getImm();
631
4.81k
    // The immediate must be in range, the destination register must be a low
632
4.81k
    // reg, the predicate must be "always" and the condition flags must not
633
4.81k
    // be being set.
634
4.81k
    if (
Imm & 3 || 4.81k
Imm > 10204.81k
)
635
807
      return false;
636
4.01k
    
if (4.01k
!isARMLowRegister(MI->getOperand(0).getReg())4.01k
)
637
327
      return false;
638
3.68k
    
if (3.68k
MI->getOperand(3).getImm() != ARMCC::AL3.68k
)
639
3
      return false;
640
3.68k
    const MCInstrDesc &MCID = MI->getDesc();
641
3.68k
    if (MCID.hasOptionalDef() &&
642
3.68k
        MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
643
0
      return false;
644
3.68k
645
3.68k
    MachineInstrBuilder MIB =
646
3.68k
        BuildMI(MBB, MI, MI->getDebugLoc(),
647
3.68k
                TII->get(ARM::tADDrSPi))
648
3.68k
            .add(MI->getOperand(0))
649
3.68k
            .add(MI->getOperand(1))
650
3.68k
            .addImm(Imm / 4) // The tADDrSPi has an implied scale by four.
651
3.68k
            .add(predOps(ARMCC::AL));
652
3.68k
653
3.68k
    // Transfer MI flags.
654
3.68k
    MIB.setMIFlags(MI->getFlags());
655
3.68k
656
3.68k
    DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " <<*MIB);
657
10.9k
658
10.9k
    MBB.erase_instr(MI);
659
10.9k
    ++NumNarrows;
660
10.9k
    return true;
661
10.9k
  }
662
50.9k
663
50.9k
  
if (50.9k
Entry.LowRegs1 && 50.9k
!VerifyLowRegs(MI)46.7k
)
664
10.1k
    return false;
665
40.7k
666
40.7k
  
if (40.7k
MI->mayLoadOrStore()40.7k
)
667
32.1k
    return ReduceLoadStore(MBB, MI, Entry);
668
8.64k
669
8.64k
  switch (Opc) {
670
0
  default: break;
671
0
  case ARM::t2ADDSri:
672
0
  case ARM::t2ADDSrr: {
673
0
    unsigned PredReg = 0;
674
0
    if (
getInstrPredicate(*MI, PredReg) == ARMCC::AL0
) {
675
0
      switch (Opc) {
676
0
      default: break;
677
0
      case ARM::t2ADDSri:
678
0
        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
679
0
          return true;
680
0
        
LLVM_FALLTHROUGH0
;
681
0
      case ARM::t2ADDSrr:
682
0
        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
683
0
      }
684
0
    }
685
0
    break;
686
0
  }
687
1.99k
  case ARM::t2RSBri:
688
1.99k
  case ARM::t2RSBSri:
689
1.99k
  case ARM::t2SXTB:
690
1.99k
  case ARM::t2SXTH:
691
1.99k
  case ARM::t2UXTB:
692
1.99k
  case ARM::t2UXTH:
693
1.99k
    if (MI->getOperand(2).getImm() == 0)
694
986
      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
695
1.01k
    break;
696
2.46k
  case ARM::t2MOVi16:
697
2.46k
    // Can convert only 'pure' immediate operands, not immediates obtained as
698
2.46k
    // globals' addresses.
699
2.46k
    if (MI->getOperand(1).isImm())
700
2.17k
      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
701
291
    break;
702
4.17k
  case ARM::t2CMPrr: {
703
4.17k
    // Try to reduce to the lo-reg only version first. Why there are two
704
4.17k
    // versions of the instruction is a mystery.
705
4.17k
    // It would be nice to just have two entries in the master table that
706
4.17k
    // are prioritized, but the table assumes a unique entry for each
707
4.17k
    // source insn opcode. So for now, we hack a local entry record to use.
708
4.17k
    static const ReduceEntry NarrowEntry =
709
4.17k
      { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
710
4.17k
    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop))
711
2.80k
      return true;
712
1.37k
    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
713
1.37k
  }
714
1.30k
  }
715
1.30k
  return false;
716
1.30k
}
717
718
bool
719
Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
720
                                const ReduceEntry &Entry,
721
21.8k
                                bool LiveCPSR, bool IsSelfLoop) {
722
21.8k
  if (
ReduceLimit2Addr != -1 && 21.8k
((int)Num2Addrs >= ReduceLimit2Addr)6
)
723
6
    return false;
724
21.8k
725
21.8k
  
if (21.8k
!OptimizeSize && 21.8k
Entry.AvoidMovs16.3k
&&
STI->avoidMOVsShifterOperand()1.29k
)
726
21.8k
    // Don't issue movs with shifter operand for some CPUs unless we
727
21.8k
    // are optimizing for size.
728
378
    return false;
729
21.4k
730
21.4k
  unsigned Reg0 = MI->getOperand(0).getReg();
731
21.4k
  unsigned Reg1 = MI->getOperand(1).getReg();
732
21.4k
  // t2MUL is "special". The tied source operand is second, not first.
733
21.4k
  if (
MI->getOpcode() == ARM::t2MUL21.4k
) {
734
254
    unsigned Reg2 = MI->getOperand(2).getReg();
735
254
    // Early exit if the regs aren't all low regs.
736
254
    if (
!isARMLowRegister(Reg0) || 254
!isARMLowRegister(Reg1)230
737
221
        || !isARMLowRegister(Reg2))
738
68
      return false;
739
186
    
if (186
Reg0 != Reg2186
) {
740
140
      // If the other operand also isn't the same as the destination, we
741
140
      // can't reduce.
742
140
      if (Reg1 != Reg0)
743
52
        return false;
744
88
      // Try to commute the operands to make it a 2-address instruction.
745
88
      MachineInstr *CommutedMI = TII->commuteInstruction(*MI);
746
88
      if (!CommutedMI)
747
0
        return false;
748
21.4k
    }
749
21.1k
  } else 
if (21.1k
Reg0 != Reg121.1k
) {
750
14.2k
    // Try to commute the operands to make it a 2-address instruction.
751
14.2k
    unsigned CommOpIdx1 = 1;
752
14.2k
    unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
753
14.2k
    if (!TII->findCommutedOpIndices(*MI, CommOpIdx1, CommOpIdx2) ||
754
5.56k
        MI->getOperand(CommOpIdx2).getReg() != Reg0)
755
12.1k
      return false;
756
2.07k
    MachineInstr *CommutedMI =
757
2.07k
        TII->commuteInstruction(*MI, false, CommOpIdx1, CommOpIdx2);
758
2.07k
    if (!CommutedMI)
759
0
      return false;
760
9.11k
  }
761
9.11k
  
if (9.11k
Entry.LowRegs2 && 9.11k
!isARMLowRegister(Reg0)7.39k
)
762
571
    return false;
763
8.54k
  
if (8.54k
Entry.Imm2Limit8.54k
) {
764
3.24k
    unsigned Imm = MI->getOperand(2).getImm();
765
3.24k
    unsigned Limit = (1 << Entry.Imm2Limit) - 1;
766
3.24k
    if (Imm > Limit)
767
203
      return false;
768
5.29k
  } else {
769
5.29k
    unsigned Reg2 = MI->getOperand(2).getReg();
770
5.29k
    if (
Entry.LowRegs2 && 5.29k
!isARMLowRegister(Reg2)3.57k
)
771
470
      return false;
772
7.87k
  }
773
7.87k
774
7.87k
  // Check if it's possible / necessary to transfer the predicate.
775
7.87k
  const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
776
7.87k
  unsigned PredReg = 0;
777
7.87k
  ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
778
7.87k
  bool SkipPred = false;
779
7.87k
  if (
Pred != ARMCC::AL7.87k
) {
780
401
    if (!NewMCID.isPredicable())
781
401
      // Can't transfer predicate, fail.
782
0
      return false;
783
7.47k
  } else {
784
7.47k
    SkipPred = !NewMCID.isPredicable();
785
7.47k
  }
786
7.87k
787
7.87k
  bool HasCC = false;
788
7.87k
  bool CCDead = false;
789
7.87k
  const MCInstrDesc &MCID = MI->getDesc();
790
7.87k
  if (
MCID.hasOptionalDef()7.87k
) {
791
7.73k
    unsigned NumOps = MCID.getNumOperands();
792
7.73k
    HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
793
7.73k
    if (
HasCC && 7.73k
MI->getOperand(NumOps-1).isDead()1.56k
)
794
0
      CCDead = true;
795
7.73k
  }
796
7.87k
  if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
797
776
    return false;
798
7.09k
799
7.09k
  // Avoid adding a false dependency on partial flag update by some 16-bit
800
7.09k
  // instructions which has the 's' bit set.
801
7.09k
  
if (7.09k
Entry.PartFlag && 7.09k
NewMCID.hasOptionalDef()2.57k
&&
HasCC2.57k
&&
802
2.52k
      canAddPseudoFlagDep(MI, IsSelfLoop))
803
80
    return false;
804
7.01k
805
7.01k
  // Add the 16-bit instruction.
806
7.01k
  DebugLoc dl = MI->getDebugLoc();
807
7.01k
  MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
808
7.01k
  MIB.add(MI->getOperand(0));
809
7.01k
  if (NewMCID.hasOptionalDef())
810
5.54k
    
MIB.add(HasCC ? 5.54k
t1CondCodeOp(CCDead)5.28k
:
condCodeOp()264
);
811
7.01k
812
7.01k
  // Transfer the rest of operands.
813
7.01k
  unsigned NumOps = MCID.getNumOperands();
814
42.9k
  for (unsigned i = 1, e = MI->getNumOperands(); 
i != e42.9k
;
++i35.9k
) {
815
35.9k
    if (
i < NumOps && 35.9k
MCID.OpInfo[i].isOptionalDef()34.9k
)
816
6.89k
      continue;
817
29.0k
    
if (29.0k
SkipPred && 29.0k
MCID.OpInfo[i].isPredicate()0
)
818
0
      continue;
819
29.0k
    MIB.add(MI->getOperand(i));
820
29.0k
  }
821
7.01k
822
7.01k
  // Transfer MI flags.
823
7.01k
  MIB.setMIFlags(MI->getFlags());
824
7.01k
825
7.01k
  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
826
21.8k
827
21.8k
  MBB.erase_instr(MI);
828
21.8k
  ++Num2Addrs;
829
21.8k
  return true;
830
21.8k
}
831
832
bool
833
Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
834
                                 const ReduceEntry &Entry,
835
59.4k
                                 bool LiveCPSR, bool IsSelfLoop) {
836
59.4k
  if (
ReduceLimit != -1 && 59.4k
((int)NumNarrows >= ReduceLimit)5
)
837
5
    return false;
838
59.4k
839
59.4k
  
if (59.4k
!OptimizeSize && 59.4k
Entry.AvoidMovs49.7k
&&
STI->avoidMOVsShifterOperand()1.46k
)
840
59.4k
    // Don't issue movs with shifter operand for some CPUs unless we
841
59.4k
    // are optimizing for size.
842
410
    return false;
843
59.0k
844
59.0k
  unsigned Limit = ~0U;
845
59.0k
  if (Entry.Imm1Limit)
846
47.8k
    Limit = (1 << Entry.Imm1Limit) - 1;
847
59.0k
848
59.0k
  const MCInstrDesc &MCID = MI->getDesc();
849
274k
  for (unsigned i = 0, e = MCID.getNumOperands(); 
i != e274k
;
++i215k
) {
850
231k
    if (MCID.OpInfo[i].isPredicate())
851
85.7k
      continue;
852
146k
    const MachineOperand &MO = MI->getOperand(i);
853
146k
    if (
MO.isReg()146k
) {
854
102k
      unsigned Reg = MO.getReg();
855
102k
      if (
!Reg || 102k
Reg == ARM::CPSR80.1k
)
856
24.1k
        continue;
857
78.6k
      
if (78.6k
Entry.LowRegs1 && 78.6k
!isARMLowRegister(Reg)74.0k
)
858
8.40k
        return false;
859
43.2k
    } else 
if (43.2k
MO.isImm() &&
860
43.2k
               
!MCID.OpInfo[i].isPredicate()43.2k
) {
861
43.2k
      if (((unsigned)MO.getImm()) > Limit)
862
7.75k
        return false;
863
43.2k
    }
864
231k
  }
865
59.0k
866
59.0k
  // Check if it's possible / necessary to transfer the predicate.
867
42.8k
  const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
868
42.8k
  unsigned PredReg = 0;
869
42.8k
  ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
870
42.8k
  bool SkipPred = false;
871
42.8k
  if (
Pred != ARMCC::AL42.8k
) {
872
3.80k
    if (!NewMCID.isPredicable())
873
3.80k
      // Can't transfer predicate, fail.
874
0
      return false;
875
39.0k
  } else {
876
39.0k
    SkipPred = !NewMCID.isPredicable();
877
39.0k
  }
878
42.8k
879
42.8k
  bool HasCC = false;
880
42.8k
  bool CCDead = false;
881
42.8k
  if (
MCID.hasOptionalDef()42.8k
) {
882
24.1k
    unsigned NumOps = MCID.getNumOperands();
883
24.1k
    HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
884
24.1k
    if (
HasCC && 24.1k
MI->getOperand(NumOps-1).isDead()1.52k
)
885
0
      CCDead = true;
886
24.1k
  }
887
42.8k
  if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
888
1.02k
    return false;
889
41.8k
890
41.8k
  // Avoid adding a false dependency on partial flag update by some 16-bit
891
41.8k
  // instructions which has the 's' bit set.
892
41.8k
  
if (41.8k
Entry.PartFlag && 41.8k
NewMCID.hasOptionalDef()18.3k
&&
HasCC18.3k
&&
893
16.0k
      canAddPseudoFlagDep(MI, IsSelfLoop))
894
33
    return false;
895
41.7k
896
41.7k
  // Add the 16-bit instruction.
897
41.7k
  DebugLoc dl = MI->getDebugLoc();
898
41.7k
  MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
899
41.7k
  MIB.add(MI->getOperand(0));
900
41.7k
  if (NewMCID.hasOptionalDef())
901
22.6k
    
MIB.add(HasCC ? 22.6k
t1CondCodeOp(CCDead)20.1k
:
condCodeOp()2.49k
);
902
41.7k
903
41.7k
  // Transfer the rest of operands.
904
41.7k
  unsigned NumOps = MCID.getNumOperands();
905
221k
  for (unsigned i = 1, e = MI->getNumOperands(); 
i != e221k
;
++i179k
) {
906
179k
    if (
i < NumOps && 179k
MCID.OpInfo[i].isOptionalDef()154k
)
907
23.1k
      continue;
908
156k
    
if (156k
(MCID.getOpcode() == ARM::t2RSBSri ||
909
156k
         MCID.getOpcode() == ARM::t2RSBri ||
910
153k
         MCID.getOpcode() == ARM::t2SXTB ||
911
153k
         MCID.getOpcode() == ARM::t2SXTH ||
912
153k
         MCID.getOpcode() == ARM::t2UXTB ||
913
156k
         
MCID.getOpcode() == ARM::t2UXTH152k
) &&
i == 24.03k
)
914
156k
      // Skip the zero immediate operand, it's now implicit.
915
982
      continue;
916
155k
    
bool isPred = (i < NumOps && 155k
MCID.OpInfo[i].isPredicate()130k
);
917
155k
    if (
SkipPred && 155k
isPred0
)
918
0
        continue;
919
155k
    const MachineOperand &MO = MI->getOperand(i);
920
155k
    if (
MO.isReg() && 155k
MO.isImplicit()79.8k
&&
MO.getReg() == ARM::CPSR24.5k
)
921
155k
      // Skip implicit def of CPSR. Either it's modeled as an optional
922
155k
      // def now or it's already an implicit def on the new instruction.
923
17.8k
      continue;
924
137k
    MIB.add(MO);
925
137k
  }
926
41.7k
  if (
!MCID.isPredicable() && 41.7k
NewMCID.isPredicable()0
)
927
0
    MIB.add(predOps(ARMCC::AL));
928
41.7k
929
41.7k
  // Transfer MI flags.
930
41.7k
  MIB.setMIFlags(MI->getFlags());
931
41.7k
932
41.7k
  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
933
59.4k
934
59.4k
  MBB.erase_instr(MI);
935
59.4k
  ++NumNarrows;
936
59.4k
  return true;
937
59.4k
}
938
939
391k
static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
940
391k
  bool HasDef = false;
941
1.83M
  for (const MachineOperand &MO : MI.operands()) {
942
1.83M
    if (
!MO.isReg() || 1.83M
MO.isUndef()1.19M
||
MO.isUse()1.19M
)
943
1.44M
      continue;
944
392k
    
if (392k
MO.getReg() != ARM::CPSR392k
)
945
340k
      continue;
946
52.4k
947
52.4k
    DefCPSR = true;
948
52.4k
    if (!MO.isDead())
949
29.5k
      HasDef = true;
950
1.83M
  }
951
391k
952
362k
  return HasDef || LiveCPSR;
953
391k
}
954
955
391k
static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
956
1.83M
  for (const MachineOperand &MO : MI.operands()) {
957
1.83M
    if (
!MO.isReg() || 1.83M
MO.isUndef()1.19M
||
MO.isDef()1.19M
)
958
1.01M
      continue;
959
818k
    
if (818k
MO.getReg() != ARM::CPSR818k
)
960
785k
      continue;
961
818k
    assert(LiveCPSR && "CPSR liveness tracking is wrong!");
962
33.1k
    if (
MO.isKill()33.1k
) {
963
28.8k
      LiveCPSR = false;
964
28.8k
      break;
965
28.8k
    }
966
391k
  }
967
391k
968
391k
  return LiveCPSR;
969
391k
}
970
971
bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
972
391k
                                bool LiveCPSR, bool IsSelfLoop) {
973
391k
  unsigned Opcode = MI->getOpcode();
974
391k
  DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
975
391k
  if (OPI == ReduceOpcodeMap.end())
976
271k
    return false;
977
120k
  const ReduceEntry &Entry = ReduceTable[OPI->second];
978
120k
979
120k
  // Don't attempt normal reductions on "special" cases for now.
980
120k
  if (Entry.Special)
981
61.8k
    return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
982
58.1k
983
58.1k
  // Try to transform to a 16-bit two-address instruction.
984
58.1k
  
if (58.1k
Entry.NarrowOpc2 &&
985
15.6k
      ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
986
5.06k
    return true;
987
53.1k
988
53.1k
  // Try to transform to a 16-bit non-two-address instruction.
989
53.1k
  
if (53.1k
Entry.NarrowOpc1 &&
990
46.5k
      ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
991
35.3k
    return true;
992
17.8k
993
17.8k
  return false;
994
17.8k
}
995
996
52.0k
bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
997
52.0k
  bool Modified = false;
998
52.0k
999
52.0k
  // Yes, CPSR could be livein.
1000
52.0k
  bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
1001
52.0k
  MachineInstr *BundleMI = nullptr;
1002
52.0k
1003
52.0k
  CPSRDef = nullptr;
1004
52.0k
  HighLatencyCPSR = false;
1005
52.0k
1006
52.0k
  // Check predecessors for the latest CPSRDef.
1007
51.9k
  for (auto *Pred : MBB.predecessors()) {
1008
51.9k
    const MBBInfo &PInfo = BlockInfo[Pred->getNumber()];
1009
51.9k
    if (
!PInfo.Visited51.9k
) {
1010
4.56k
      // Since blocks are visited in RPO, this must be a back-edge.
1011
4.56k
      continue;
1012
4.56k
    }
1013
47.3k
    
if (47.3k
PInfo.HighLatencyCPSR47.3k
) {
1014
725
      HighLatencyCPSR = true;
1015
725
      break;
1016
725
    }
1017
52.0k
  }
1018
52.0k
1019
52.0k
  // If this BB loops back to itself, conservatively avoid narrowing the
1020
52.0k
  // first instruction that does partial flag update.
1021
52.0k
  bool IsSelfLoop = MBB.isSuccessor(&MBB);
1022
52.0k
  MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end();
1023
52.0k
  MachineBasicBlock::instr_iterator NextMII;
1024
450k
  for (; 
MII != E450k
;
MII = NextMII398k
) {
1025
398k
    NextMII = std::next(MII);
1026
398k
1027
398k
    MachineInstr *MI = &*MII;
1028
398k
    if (
MI->isBundle()398k
) {
1029
6.80k
      BundleMI = MI;
1030
6.80k
      continue;
1031
6.80k
    }
1032
391k
    
if (391k
MI->isDebugValue()391k
)
1033
141
      continue;
1034
391k
1035
391k
    LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
1036
391k
1037
391k
    // Does NextMII belong to the same bundle as MI?
1038
339k
    bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
1039
391k
1040
391k
    if (
ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)391k
) {
1041
82.3k
      Modified = true;
1042
82.3k
      MachineBasicBlock::instr_iterator I = std::prev(NextMII);
1043
82.3k
      MI = &*I;
1044
82.3k
      // Removing and reinserting the first instruction in a bundle will break
1045
82.3k
      // up the bundle. Fix the bundling if it was broken.
1046
82.3k
      if (
NextInSameBundle && 82.3k
!NextMII->isBundledWithPred()1.23k
)
1047
0
        NextMII->bundleWithPred();
1048
82.3k
    }
1049
391k
1050
391k
    if (
BundleMI && 391k
!NextInSameBundle38.9k
&&
MI->isInsideBundle()29.5k
) {
1051
6.80k
      // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
1052
6.80k
      // marker is only on the BUNDLE instruction. Process the BUNDLE
1053
6.80k
      // instruction as we finish with the bundled instruction to work around
1054
6.80k
      // the inconsistency.
1055
6.80k
      if (BundleMI->killsRegister(ARM::CPSR))
1056
6.19k
        LiveCPSR = false;
1057
6.80k
      MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR);
1058
6.80k
      if (
MO && 6.80k
!MO->isDead()566
)
1059
546
        LiveCPSR = true;
1060
6.80k
      MO = BundleMI->findRegisterUseOperand(ARM::CPSR);
1061
6.80k
      if (
MO && 6.80k
!MO->isKill()6.75k
)
1062
567
        LiveCPSR = true;
1063
6.80k
    }
1064
391k
1065
391k
    bool DefCPSR = false;
1066
391k
    LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
1067
391k
    if (
MI->isCall()391k
) {
1068
20.6k
      // Calls don't really set CPSR.
1069
20.6k
      CPSRDef = nullptr;
1070
20.6k
      HighLatencyCPSR = false;
1071
20.6k
      IsSelfLoop = false;
1072
391k
    } else 
if (371k
DefCPSR371k
) {
1073
52.3k
      // This is the last CPSR defining instruction.
1074
52.3k
      CPSRDef = MI;
1075
52.3k
      HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
1076
52.3k
      IsSelfLoop = false;
1077
52.3k
    }
1078
398k
  }
1079
52.0k
1080
52.0k
  MBBInfo &Info = BlockInfo[MBB.getNumber()];
1081
52.0k
  Info.HighLatencyCPSR = HighLatencyCPSR;
1082
52.0k
  Info.Visited = true;
1083
52.0k
  return Modified;
1084
52.0k
}
1085
1086
32.8k
bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
1087
32.8k
  if (
PredicateFtor && 32.8k
!PredicateFtor(*MF.getFunction())15.8k
)
1088
15.2k
    return false;
1089
17.6k
1090
17.6k
  STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
1091
17.6k
  if (
STI->isThumb1Only() || 17.6k
STI->prefers32BitThumb()16.5k
)
1092
1.10k
    return false;
1093
16.5k
1094
16.5k
  TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
1095
16.5k
1096
16.5k
  // Optimizing / minimizing size? Minimizing size implies optimizing for size.
1097
16.5k
  OptimizeSize = MF.getFunction()->optForSize();
1098
16.5k
  MinimizeSize = MF.getFunction()->optForMinSize();
1099
16.5k
1100
16.5k
  BlockInfo.clear();
1101
16.5k
  BlockInfo.resize(MF.getNumBlockIDs());
1102
16.5k
1103
16.5k
  // Visit blocks in reverse post-order so LastCPSRDef is known for all
1104
16.5k
  // predecessors.
1105
16.5k
  ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
1106
16.5k
  bool Modified = false;
1107
16.5k
  for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator
1108
68.5k
       I = RPOT.begin(), E = RPOT.end(); 
I != E68.5k
;
++I52.0k
)
1109
52.0k
    Modified |= ReduceMBB(**I);
1110
32.8k
  return Modified;
1111
32.8k
}
1112
1113
/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
1114
/// reduction pass.
1115
FunctionPass *llvm::createThumb2SizeReductionPass(
1116
8.51k
    std::function<bool(const Function &)> Ftor) {
1117
8.51k
  return new Thumb2SizeReduce(std::move(Ftor));
1118
8.51k
}