Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ARMConstantIslandPass.cpp - ARM constant islands -------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file contains a pass that splits the constant pool up into 'islands'
11
// which are scattered through-out the function.  This is required due to the
12
// limited pc-relative displacements that ARM has.
13
//
14
//===----------------------------------------------------------------------===//
15
16
#include "ARM.h"
17
#include "ARMBaseInstrInfo.h"
18
#include "ARMBasicBlockInfo.h"
19
#include "ARMMachineFunctionInfo.h"
20
#include "ARMSubtarget.h"
21
#include "MCTargetDesc/ARMBaseInfo.h"
22
#include "Thumb2InstrInfo.h"
23
#include "Utils/ARMBaseInfo.h"
24
#include "llvm/ADT/DenseMap.h"
25
#include "llvm/ADT/STLExtras.h"
26
#include "llvm/ADT/SmallSet.h"
27
#include "llvm/ADT/SmallVector.h"
28
#include "llvm/ADT/Statistic.h"
29
#include "llvm/ADT/StringRef.h"
30
#include "llvm/CodeGen/MachineBasicBlock.h"
31
#include "llvm/CodeGen/MachineConstantPool.h"
32
#include "llvm/CodeGen/MachineFunction.h"
33
#include "llvm/CodeGen/MachineFunctionPass.h"
34
#include "llvm/CodeGen/MachineInstr.h"
35
#include "llvm/CodeGen/MachineJumpTableInfo.h"
36
#include "llvm/CodeGen/MachineOperand.h"
37
#include "llvm/CodeGen/MachineRegisterInfo.h"
38
#include "llvm/IR/DataLayout.h"
39
#include "llvm/IR/DebugLoc.h"
40
#include "llvm/MC/MCInstrDesc.h"
41
#include "llvm/Pass.h"
42
#include "llvm/Support/CommandLine.h"
43
#include "llvm/Support/Compiler.h"
44
#include "llvm/Support/Debug.h"
45
#include "llvm/Support/ErrorHandling.h"
46
#include "llvm/Support/Format.h"
47
#include "llvm/Support/MathExtras.h"
48
#include "llvm/Support/raw_ostream.h"
49
#include <algorithm>
50
#include <cassert>
51
#include <cstdint>
52
#include <iterator>
53
#include <utility>
54
#include <vector>
55
56
using namespace llvm;
57
58
#define DEBUG_TYPE "arm-cp-islands"
59
60
#define ARM_CP_ISLANDS_OPT_NAME \
61
4.38k
  "ARM constant island placement and branch shortening pass"
62
STATISTIC(NumCPEs,       "Number of constpool entries");
63
STATISTIC(NumSplit,      "Number of uncond branches inserted");
64
STATISTIC(NumCBrFixed,   "Number of cond branches fixed");
65
STATISTIC(NumUBrFixed,   "Number of uncond branches fixed");
66
STATISTIC(NumTBs,        "Number of table branches generated");
67
STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
68
STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
69
STATISTIC(NumCBZ,        "Number of CBZ / CBNZ formed");
70
STATISTIC(NumJTMoved,    "Number of jump table destination blocks moved");
71
STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
72
73
static cl::opt<bool>
74
AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
75
          cl::desc("Adjust basic block layout to better use TB[BH]"));
76
77
static cl::opt<unsigned>
78
CPMaxIteration("arm-constant-island-max-iteration", cl::Hidden, cl::init(30),
79
          cl::desc("The max number of iteration for converge"));
80
81
static cl::opt<bool> SynthesizeThumb1TBB(
82
    "arm-synthesize-thumb-1-tbb", cl::Hidden, cl::init(true),
83
    cl::desc("Use compressed jump tables in Thumb-1 by synthesizing an "
84
             "equivalent to the TBB/TBH instructions"));
85
86
namespace {
87
88
  /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
89
  /// requires constant pool entries to be scattered among the instructions
90
  /// inside a function.  To do this, it completely ignores the normal LLVM
91
  /// constant pool; instead, it places constants wherever it feels like with
92
  /// special instructions.
93
  ///
94
  /// The terminology used in this pass includes:
95
  ///   Islands - Clumps of constants placed in the function.
96
  ///   Water   - Potential places where an island could be formed.
97
  ///   CPE     - A constant pool entry that has been placed somewhere, which
98
  ///             tracks a list of users.
99
  class ARMConstantIslands : public MachineFunctionPass {
100
    std::vector<BasicBlockInfo> BBInfo;
101
102
    /// WaterList - A sorted list of basic blocks where islands could be placed
103
    /// (i.e. blocks that don't fall through to the following block, due
104
    /// to a return, unreachable, or unconditional branch).
105
    std::vector<MachineBasicBlock*> WaterList;
106
107
    /// NewWaterList - The subset of WaterList that was created since the
108
    /// previous iteration by inserting unconditional branches.
109
    SmallSet<MachineBasicBlock*, 4> NewWaterList;
110
111
    using water_iterator = std::vector<MachineBasicBlock *>::iterator;
112
113
    /// CPUser - One user of a constant pool, keeping the machine instruction
114
    /// pointer, the constant pool being referenced, and the max displacement
115
    /// allowed from the instruction to the CP.  The HighWaterMark records the
116
    /// highest basic block where a new CPEntry can be placed.  To ensure this
117
    /// pass terminates, the CP entries are initially placed at the end of the
118
    /// function and then move monotonically to lower addresses.  The
119
    /// exception to this rule is when the current CP entry for a particular
120
    /// CPUser is out of range, but there is another CP entry for the same
121
    /// constant value in range.  We want to use the existing in-range CP
122
    /// entry, but if it later moves out of range, the search for new water
123
    /// should resume where it left off.  The HighWaterMark is used to record
124
    /// that point.
125
    struct CPUser {
126
      MachineInstr *MI;
127
      MachineInstr *CPEMI;
128
      MachineBasicBlock *HighWaterMark;
129
      unsigned MaxDisp;
130
      bool NegOk;
131
      bool IsSoImm;
132
      bool KnownAlignment = false;
133
134
      CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
135
             bool neg, bool soimm)
136
5.51k
        : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {
137
5.51k
        HighWaterMark = CPEMI->getParent();
138
5.51k
      }
139
140
      /// getMaxDisp - Returns the maximum displacement supported by MI.
141
      /// Correct for unknown alignment.
142
      /// Conservatively subtract 2 bytes to handle weird alignment effects.
143
20.6k
      unsigned getMaxDisp() const {
144
20.6k
        return (KnownAlignment ? 
MaxDisp3.28k
:
MaxDisp - 217.3k
) - 2;
145
20.6k
      }
146
    };
147
148
    /// CPUsers - Keep track of all of the machine instructions that use various
149
    /// constant pools and their max displacement.
150
    std::vector<CPUser> CPUsers;
151
152
    /// CPEntry - One per constant pool entry, keeping the machine instruction
153
    /// pointer, the constpool index, and the number of CPUser's which
154
    /// reference this entry.
155
    struct CPEntry {
156
      MachineInstr *CPEMI;
157
      unsigned CPI;
158
      unsigned RefCount;
159
160
      CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
161
5.30k
        : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
162
    };
163
164
    /// CPEntries - Keep track of all of the constant pool entry machine
165
    /// instructions. For each original constpool index (i.e. those that existed
166
    /// upon entry to this pass), it keeps a vector of entries.  Original
167
    /// elements are cloned as we go along; the clones are put in the vector of
168
    /// the original element, but have distinct CPIs.
169
    ///
170
    /// The first half of CPEntries contains generic constants, the second half
171
    /// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up
172
    /// which vector it will be in here.
173
    std::vector<std::vector<CPEntry>> CPEntries;
174
175
    /// Maps a JT index to the offset in CPEntries containing copies of that
176
    /// table. The equivalent map for a CONSTPOOL_ENTRY is the identity.
177
    DenseMap<int, int> JumpTableEntryIndices;
178
179
    /// Maps a JT index to the LEA that actually uses the index to calculate its
180
    /// base address.
181
    DenseMap<int, int> JumpTableUserIndices;
182
183
    /// ImmBranch - One per immediate branch, keeping the machine instruction
184
    /// pointer, conditional or unconditional, the max displacement,
185
    /// and (if isCond is true) the corresponding unconditional branch
186
    /// opcode.
187
    struct ImmBranch {
188
      MachineInstr *MI;
189
      unsigned MaxDisp : 31;
190
      bool isCond : 1;
191
      unsigned UncondBr;
192
193
      ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, unsigned ubr)
194
27.3k
        : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
195
    };
196
197
    /// ImmBranches - Keep track of all the immediate branch instructions.
198
    std::vector<ImmBranch> ImmBranches;
199
200
    /// PushPopMIs - Keep track of all the Thumb push / pop instructions.
201
    SmallVector<MachineInstr*, 4> PushPopMIs;
202
203
    /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions.
204
    SmallVector<MachineInstr*, 4> T2JumpTables;
205
206
    /// HasFarJump - True if any far jump instruction has been emitted during
207
    /// the branch fix up pass.
208
    bool HasFarJump;
209
210
    MachineFunction *MF;
211
    MachineConstantPool *MCP;
212
    const ARMBaseInstrInfo *TII;
213
    const ARMSubtarget *STI;
214
    ARMFunctionInfo *AFI;
215
    bool isThumb;
216
    bool isThumb1;
217
    bool isThumb2;
218
    bool isPositionIndependentOrROPI;
219
220
  public:
221
    static char ID;
222
223
4.39k
    ARMConstantIslands() : MachineFunctionPass(ID) {}
224
225
    bool runOnMachineFunction(MachineFunction &MF) override;
226
227
4.38k
    MachineFunctionProperties getRequiredProperties() const override {
228
4.38k
      return MachineFunctionProperties().set(
229
4.38k
          MachineFunctionProperties::Property::NoVRegs);
230
4.38k
    }
231
232
4.38k
    StringRef getPassName() const override {
233
4.38k
      return ARM_CP_ISLANDS_OPT_NAME;
234
4.38k
    }
235
236
  private:
237
    void doInitialConstPlacement(std::vector<MachineInstr *> &CPEMIs);
238
    void doInitialJumpTablePlacement(std::vector<MachineInstr *> &CPEMIs);
239
    bool BBHasFallthrough(MachineBasicBlock *MBB);
240
    CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
241
    unsigned getCPELogAlign(const MachineInstr *CPEMI);
242
    void scanFunctionJumpTables();
243
    void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
244
    MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
245
    void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
246
    void adjustBBOffsetsAfter(MachineBasicBlock *BB);
247
    bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
248
    unsigned getCombinedIndex(const MachineInstr *CPEMI);
249
    int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
250
    bool findAvailableWater(CPUser&U, unsigned UserOffset,
251
                            water_iterator &WaterIter, bool CloserWater);
252
    void createNewWater(unsigned CPUserIndex, unsigned UserOffset,
253
                        MachineBasicBlock *&NewMBB);
254
    bool handleConstantPoolUser(unsigned CPUserIndex, bool CloserWater);
255
    void removeDeadCPEMI(MachineInstr *CPEMI);
256
    bool removeUnusedCPEntries();
257
    bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
258
                          MachineInstr *CPEMI, unsigned Disp, bool NegOk,
259
                          bool DoDump = false);
260
    bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water,
261
                        CPUser &U, unsigned &Growth);
262
    bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
263
    bool fixupImmediateBr(ImmBranch &Br);
264
    bool fixupConditionalBr(ImmBranch &Br);
265
    bool fixupUnconditionalBr(ImmBranch &Br);
266
    bool undoLRSpillRestore();
267
    bool optimizeThumb2Instructions();
268
    bool optimizeThumb2Branches();
269
    bool reorderThumb2JumpTables();
270
    bool preserveBaseRegister(MachineInstr *JumpMI, MachineInstr *LEAMI,
271
                              unsigned &DeadSize, bool &CanDeleteLEA,
272
                              bool &BaseRegKill);
273
    bool optimizeThumb2JumpTables();
274
    MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
275
                                                  MachineBasicBlock *JTBB);
276
277
    unsigned getOffsetOf(MachineInstr *MI) const;
278
    unsigned getUserOffset(CPUser&) const;
279
    void dumpBBs();
280
    void verify();
281
282
    bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
283
                         unsigned Disp, bool NegativeOK, bool IsSoImm = false);
284
    bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
285
12.9k
                         const CPUser &U) {
286
12.9k
      return isOffsetInRange(UserOffset, TrialOffset,
287
12.9k
                             U.getMaxDisp(), U.NegOk, U.IsSoImm);
288
12.9k
    }
289
  };
290
291
} // end anonymous namespace
292
293
char ARMConstantIslands::ID = 0;
294
295
/// verify - check BBOffsets, BBSizes, alignment of islands
296
17.0k
void ARMConstantIslands::verify() {
297
#ifndef NDEBUG
298
  assert(std::is_sorted(MF->begin(), MF->end(),
299
                        [this](const MachineBasicBlock &LHS,
300
                               const MachineBasicBlock &RHS) {
301
                          return BBInfo[LHS.getNumber()].postOffset() <
302
                                 BBInfo[RHS.getNumber()].postOffset();
303
                        }));
304
  DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
305
  for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
306
    CPUser &U = CPUsers[i];
307
    unsigned UserOffset = getUserOffset(U);
308
    // Verify offset using the real max displacement without the safety
309
    // adjustment.
310
    if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp()+2, U.NegOk,
311
                         /* DoDump = */ true)) {
312
      DEBUG(dbgs() << "OK\n");
313
      continue;
314
    }
315
    DEBUG(dbgs() << "Out of range.\n");
316
    dumpBBs();
317
    DEBUG(MF->dump());
318
    llvm_unreachable("Constant pool entry out of range!");
319
  }
320
#endif
321
}
322
323
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
324
/// print block size and offset information - debugging
325
LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
326
  DEBUG({
327
    for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
328
      const BasicBlockInfo &BBI = BBInfo[J];
329
      dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
330
             << " kb=" << unsigned(BBI.KnownBits)
331
             << " ua=" << unsigned(BBI.Unalign)
332
             << " pa=" << unsigned(BBI.PostAlign)
333
             << format(" size=%#x\n", BBInfo[J].Size);
334
    }
335
  });
336
}
337
#endif
338
339
17.0k
bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
340
17.0k
  MF = &mf;
341
17.0k
  MCP = mf.getConstantPool();
342
17.0k
343
17.0k
  DEBUG(dbgs() << "***** ARMConstantIslands: "
344
17.0k
               << MCP->getConstants().size() << " CP entries, aligned to "
345
17.0k
               << MCP->getConstantPoolAlignment() << " bytes *****\n");
346
17.0k
347
17.0k
  STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget());
348
17.0k
  TII = STI->getInstrInfo();
349
17.0k
  isPositionIndependentOrROPI =
350
10.3k
      STI->getTargetLowering()->isPositionIndependent() || STI->isROPI();
351
17.0k
  AFI = MF->getInfo<ARMFunctionInfo>();
352
17.0k
353
17.0k
  isThumb = AFI->isThumbFunction();
354
17.0k
  isThumb1 = AFI->isThumb1OnlyFunction();
355
17.0k
  isThumb2 = AFI->isThumb2Function();
356
17.0k
357
17.0k
  HasFarJump = false;
358
8.37k
  bool GenerateTBB = isThumb2 || 
(isThumb1 && 8.37k
SynthesizeThumb1TBB1.10k
);
359
17.0k
360
17.0k
  // This pass invalidates liveness information when it splits basic blocks.
361
17.0k
  MF->getRegInfo().invalidateLiveness();
362
17.0k
363
17.0k
  // Renumber all of the machine basic blocks in the function, guaranteeing that
364
17.0k
  // the numbers agree with the position of the block in the function.
365
17.0k
  MF->RenumberBlocks();
366
17.0k
367
17.0k
  // Try to reorder and otherwise adjust the block layout to make good use
368
17.0k
  // of the TB[BH] instructions.
369
17.0k
  bool MadeChange = false;
370
17.0k
  if (
GenerateTBB && 17.0k
AdjustJumpTableBlocks9.73k
) {
371
9.73k
    scanFunctionJumpTables();
372
9.73k
    MadeChange |= reorderThumb2JumpTables();
373
9.73k
    // Data is out of date, so clear it. It'll be re-computed later.
374
9.73k
    T2JumpTables.clear();
375
9.73k
    // Blocks may have shifted around. Keep the numbering up to date.
376
9.73k
    MF->RenumberBlocks();
377
9.73k
  }
378
17.0k
379
17.0k
  // Perform the initial placement of the constant pool entries.  To start with,
380
17.0k
  // we put them all at the end of the function.
381
17.0k
  std::vector<MachineInstr*> CPEMIs;
382
17.0k
  if (!MCP->isEmpty())
383
1.81k
    doInitialConstPlacement(CPEMIs);
384
17.0k
385
17.0k
  if (MF->getJumpTableInfo())
386
351
    doInitialJumpTablePlacement(CPEMIs);
387
17.0k
388
17.0k
  /// The next UID to take is the first unused one.
389
17.0k
  AFI->initPICLabelUId(CPEMIs.size());
390
17.0k
391
17.0k
  // Do the initial scan of the function, building up information about the
392
17.0k
  // sizes of each block, the location of all the water, and finding all of the
393
17.0k
  // constant pool users.
394
17.0k
  initializeFunctionInfo(CPEMIs);
395
17.0k
  CPEMIs.clear();
396
17.0k
  DEBUG(dumpBBs());
397
17.0k
398
17.0k
  // Functions with jump tables need an alignment of 4 because they use the ADR
399
17.0k
  // instruction, which aligns the PC to 4 bytes before adding an offset.
400
17.0k
  if (!T2JumpTables.empty())
401
326
    MF->ensureAlignment(2);
402
17.0k
403
17.0k
  /// Remove dead constant pool entries.
404
17.0k
  MadeChange |= removeUnusedCPEntries();
405
17.0k
406
17.0k
  // Iteratively place constant pool entries and fix up branches until there
407
17.0k
  // is no change.
408
17.0k
  unsigned NoCPIters = 0, NoBRIters = 0;
409
17.0k
  while (
true17.0k
) {
410
17.0k
    DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
411
17.0k
    bool CPChange = false;
412
24.6k
    for (unsigned i = 0, e = CPUsers.size(); 
i != e24.6k
;
++i7.54k
)
413
17.0k
      // For most inputs, it converges in no more than 5 iterations.
414
17.0k
      // If it doesn't end in 10, the input may have huge BB or many CPEs.
415
17.0k
      // In this case, we will try different heuristics.
416
7.54k
      CPChange |= handleConstantPoolUser(i, NoCPIters >= CPMaxIteration / 2);
417
17.0k
    if (
CPChange && 17.0k
++NoCPIters > CPMaxIteration34
)
418
0
      report_fatal_error("Constant Island pass failed to converge!");
419
17.0k
    
DEBUG17.0k
(dumpBBs());
420
17.0k
421
17.0k
    // Clear NewWaterList now.  If we split a block for branches, it should
422
17.0k
    // appear as "new water" for the next iteration of constant pool placement.
423
17.0k
    NewWaterList.clear();
424
17.0k
425
17.0k
    DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
426
17.0k
    bool BRChange = false;
427
46.5k
    for (unsigned i = 0, e = ImmBranches.size(); 
i != e46.5k
;
++i29.5k
)
428
29.5k
      BRChange |= fixupImmediateBr(ImmBranches[i]);
429
17.0k
    if (
BRChange && 17.0k
++NoBRIters > 3030
)
430
0
      report_fatal_error("Branch Fix Up pass failed to converge!");
431
17.0k
    
DEBUG17.0k
(dumpBBs());
432
17.0k
433
17.0k
    if (
!CPChange && 17.0k
!BRChange17.0k
)
434
17.0k
      break;
435
59
    MadeChange = true;
436
59
  }
437
17.0k
438
17.0k
  // Shrink 32-bit Thumb2 load and store instructions.
439
17.0k
  
if (17.0k
isThumb2 && 17.0k
!STI->prefers32BitThumb()8.63k
)
440
8.63k
    MadeChange |= optimizeThumb2Instructions();
441
17.0k
442
17.0k
  // Shrink 32-bit branch instructions.
443
17.0k
  if (
isThumb && 17.0k
STI->hasV8MBaselineOps()9.73k
)
444
8.72k
    MadeChange |= optimizeThumb2Branches();
445
17.0k
446
17.0k
  // Optimize jump tables using TBB / TBH.
447
17.0k
  if (
GenerateTBB && 17.0k
!STI->genExecuteOnly()9.73k
)
448
9.65k
    MadeChange |= optimizeThumb2JumpTables();
449
17.0k
450
17.0k
  // After a while, this might be made debug-only, but it is not expensive.
451
17.0k
  verify();
452
17.0k
453
17.0k
  // If LR has been forced spilled and no far jump (i.e. BL) has been issued,
454
17.0k
  // undo the spill / restore of LR if possible.
455
17.0k
  if (
isThumb && 17.0k
!HasFarJump9.73k
&&
AFI->isLRSpilledForFarJump()9.73k
)
456
1
    MadeChange |= undoLRSpillRestore();
457
17.0k
458
17.0k
  // Save the mapping between original and cloned constpool entries.
459
21.5k
  for (unsigned i = 0, e = CPEntries.size(); 
i != e21.5k
;
++i4.56k
) {
460
9.86k
    for (unsigned j = 0, je = CPEntries[i].size(); 
j != je9.86k
;
++j5.30k
) {
461
5.30k
      const CPEntry & CPE = CPEntries[i][j];
462
5.30k
      if (
CPE.CPEMI && 5.30k
CPE.CPEMI->getOperand(1).isCPI()4.59k
)
463
4.20k
        AFI->recordCPEClone(i, CPE.CPI);
464
5.30k
    }
465
4.56k
  }
466
17.0k
467
17.0k
  DEBUG(dbgs() << '\n'; dumpBBs());
468
17.0k
469
17.0k
  BBInfo.clear();
470
17.0k
  WaterList.clear();
471
17.0k
  CPUsers.clear();
472
17.0k
  CPEntries.clear();
473
17.0k
  JumpTableEntryIndices.clear();
474
17.0k
  JumpTableUserIndices.clear();
475
17.0k
  ImmBranches.clear();
476
17.0k
  PushPopMIs.clear();
477
17.0k
  T2JumpTables.clear();
478
17.0k
479
17.0k
  return MadeChange;
480
17.0k
}
481
482
/// \brief Perform the initial placement of the regular constant pool entries.
483
/// To start with, we put them all at the end of the function.
484
void
485
1.81k
ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs) {
486
1.81k
  // Create the basic block to hold the CPE's.
487
1.81k
  MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
488
1.81k
  MF->push_back(BB);
489
1.81k
490
1.81k
  // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
491
1.81k
  unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
492
1.81k
493
1.81k
  // Mark the basic block as required by the const-pool.
494
1.81k
  BB->setAlignment(MaxAlign);
495
1.81k
496
1.81k
  // The function needs to be as aligned as the basic blocks. The linker may
497
1.81k
  // move functions around based on their alignment.
498
1.81k
  MF->ensureAlignment(BB->getAlignment());
499
1.81k
500
1.81k
  // Order the entries in BB by descending alignment.  That ensures correct
501
1.81k
  // alignment of all entries as long as BB is sufficiently aligned.  Keep
502
1.81k
  // track of the insertion point for each alignment.  We are going to bucket
503
1.81k
  // sort the entries as they are created.
504
1.81k
  SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
505
1.81k
506
1.81k
  // Add all of the constants from the constant pool to the end block, use an
507
1.81k
  // identity mapping of CPI's to CPE's.
508
1.81k
  const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
509
1.81k
510
1.81k
  const DataLayout &TD = MF->getDataLayout();
511
5.98k
  for (unsigned i = 0, e = CPs.size(); 
i != e5.98k
;
++i4.16k
) {
512
4.16k
    unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
513
4.16k
    assert(Size >= 4 && "Too small constant pool entry");
514
4.16k
    unsigned Align = CPs[i].getAlignment();
515
4.16k
    assert(isPowerOf2_32(Align) && "Invalid alignment");
516
4.16k
    // Verify that all constant pool entries are a multiple of their alignment.
517
4.16k
    // If not, we would have to pad them out so that instructions stay aligned.
518
4.16k
    assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
519
4.16k
520
4.16k
    // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
521
4.16k
    unsigned LogAlign = Log2_32(Align);
522
4.16k
    MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
523
4.16k
    MachineInstr *CPEMI =
524
4.16k
      BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
525
4.16k
        .addImm(i).addConstantPoolIndex(i).addImm(Size);
526
4.16k
    CPEMIs.push_back(CPEMI);
527
4.16k
528
4.16k
    // Ensure that future entries with higher alignment get inserted before
529
4.16k
    // CPEMI. This is bucket sort with iterators.
530
4.86k
    for (unsigned a = LogAlign + 1; 
a <= MaxAlign4.86k
;
++a702
)
531
702
      
if (702
InsPoint[a] == InsAt702
)
532
50
        InsPoint[a] = CPEMI;
533
4.16k
534
4.16k
    // Add a new CPEntry, but no corresponding CPUser yet.
535
4.16k
    CPEntries.emplace_back(1, CPEntry(CPEMI, i));
536
4.16k
    ++NumCPEs;
537
4.16k
    DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
538
4.16k
                 << Size << ", align = " << Align <<'\n');
539
4.16k
  }
540
1.81k
  DEBUG(BB->dump());
541
1.81k
}
542
543
/// \brief Do initial placement of the jump tables. Because Thumb2's TBB and TBH
544
/// instructions can be made more efficient if the jump table immediately
545
/// follows the instruction, it's best to place them immediately next to their
546
/// jumps to begin with. In almost all cases they'll never be moved from that
547
/// position.
548
void ARMConstantIslands::doInitialJumpTablePlacement(
549
351
    std::vector<MachineInstr *> &CPEMIs) {
550
351
  unsigned i = CPEntries.size();
551
351
  auto MJTI = MF->getJumpTableInfo();
552
351
  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
553
351
554
351
  MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
555
7.23k
  for (MachineBasicBlock &MBB : *MF) {
556
7.23k
    auto MI = MBB.getLastNonDebugInstr();
557
7.23k
    if (MI == MBB.end())
558
65
      continue;
559
7.17k
560
7.17k
    unsigned JTOpcode;
561
7.17k
    switch (MI->getOpcode()) {
562
6.77k
    default:
563
6.77k
      continue;
564
49
    case ARM::BR_JTadd:
565
49
    case ARM::BR_JTr:
566
49
    case ARM::tBR_JTr:
567
49
    case ARM::BR_JTm:
568
49
      JTOpcode = ARM::JUMPTABLE_ADDRS;
569
49
      break;
570
344
    case ARM::t2BR_JT:
571
344
      JTOpcode = ARM::JUMPTABLE_INSTS;
572
344
      break;
573
0
    case ARM::tTBB_JT:
574
0
    case ARM::t2TBB_JT:
575
0
      JTOpcode = ARM::JUMPTABLE_TBB;
576
0
      break;
577
0
    case ARM::tTBH_JT:
578
0
    case ARM::t2TBH_JT:
579
0
      JTOpcode = ARM::JUMPTABLE_TBH;
580
0
      break;
581
393
    }
582
393
583
393
    unsigned NumOps = MI->getDesc().getNumOperands();
584
393
    MachineOperand JTOp =
585
393
      MI->getOperand(NumOps - (MI->isPredicable() ? 
20
:
1393
));
586
393
    unsigned JTI = JTOp.getIndex();
587
393
    unsigned Size = JT[JTI].MBBs.size() * sizeof(uint32_t);
588
393
    MachineBasicBlock *JumpTableBB = MF->CreateMachineBasicBlock();
589
393
    MF->insert(std::next(MachineFunction::iterator(MBB)), JumpTableBB);
590
393
    MachineInstr *CPEMI = BuildMI(*JumpTableBB, JumpTableBB->begin(),
591
393
                                  DebugLoc(), TII->get(JTOpcode))
592
393
                              .addImm(i++)
593
393
                              .addJumpTableIndex(JTI)
594
393
                              .addImm(Size);
595
393
    CPEMIs.push_back(CPEMI);
596
393
    CPEntries.emplace_back(1, CPEntry(CPEMI, JTI));
597
393
    JumpTableEntryIndices.insert(std::make_pair(JTI, CPEntries.size() - 1));
598
393
    if (!LastCorrectlyNumberedBB)
599
351
      LastCorrectlyNumberedBB = &MBB;
600
7.23k
  }
601
351
602
351
  // If we did anything then we need to renumber the subsequent blocks.
603
351
  
if (351
LastCorrectlyNumberedBB351
)
604
351
    MF->RenumberBlocks(LastCorrectlyNumberedBB);
605
351
}
606
607
/// BBHasFallthrough - Return true if the specified basic block can fallthrough
608
/// into the block immediately after it.
609
57.7k
bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
610
57.7k
  // Get the next machine basic block in the function.
611
57.7k
  MachineFunction::iterator MBBI = MBB->getIterator();
612
57.7k
  // Can't fall off end of function.
613
57.7k
  if (std::next(MBBI) == MBB->getParent()->end())
614
17.0k
    return false;
615
40.6k
616
40.6k
  MachineBasicBlock *NextBB = &*std::next(MBBI);
617
40.6k
  if (!MBB->isSuccessor(NextBB))
618
12.7k
    return false;
619
27.9k
620
27.9k
  // Try to analyze the end of the block. A potential fallthrough may already
621
27.9k
  // have an unconditional branch for whatever reason.
622
27.9k
  MachineBasicBlock *TBB, *FBB;
623
27.9k
  SmallVector<MachineOperand, 4> Cond;
624
27.9k
  bool TooDifficult = TII->analyzeBranch(*MBB, TBB, FBB, Cond);
625
27.7k
  return TooDifficult || FBB == nullptr;
626
57.7k
}
627
628
/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
629
/// look up the corresponding CPEntry.
630
ARMConstantIslands::CPEntry *
631
ARMConstantIslands::findConstPoolEntry(unsigned CPI,
632
6.41k
                                       const MachineInstr *CPEMI) {
633
6.41k
  std::vector<CPEntry> &CPEs = CPEntries[CPI];
634
6.41k
  // Number of entries per constpool index should be small, just do a
635
6.41k
  // linear search.
636
6.42k
  for (unsigned i = 0, e = CPEs.size(); 
i != e6.42k
;
++i8
) {
637
6.42k
    if (CPEs[i].CPEMI == CPEMI)
638
6.41k
      return &CPEs[i];
639
6.42k
  }
640
0
  return nullptr;
641
6.41k
}
642
643
/// getCPELogAlign - Returns the required alignment of the constant pool entry
644
/// represented by CPEMI.  Alignment is measured in log2(bytes) units.
645
14.6k
unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
646
14.6k
  switch (CPEMI->getOpcode()) {
647
14.6k
  case ARM::CONSTPOOL_ENTRY:
648
14.6k
    break;
649
0
  case ARM::JUMPTABLE_TBB:
650
0
    return isThumb1 ? 
20
:
00
;
651
0
  case ARM::JUMPTABLE_TBH:
652
0
    return isThumb1 ? 
20
:
10
;
653
0
  case ARM::JUMPTABLE_INSTS:
654
0
    return 1;
655
7
  case ARM::JUMPTABLE_ADDRS:
656
7
    return 2;
657
0
  default:
658
0
    llvm_unreachable("unknown constpool entry kind");
659
14.6k
  }
660
14.6k
661
14.6k
  unsigned CPI = getCombinedIndex(CPEMI);
662
14.6k
  assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
663
14.6k
  unsigned Align = MCP->getConstants()[CPI].getAlignment();
664
14.6k
  assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
665
14.6k
  return Log2_32(Align);
666
14.6k
}
667
668
/// scanFunctionJumpTables - Do a scan of the function, building up
669
/// information about the sizes of each block and the locations of all
670
/// the jump tables.
671
9.73k
void ARMConstantIslands::scanFunctionJumpTables() {
672
46.2k
  for (MachineBasicBlock &MBB : *MF) {
673
46.2k
    for (MachineInstr &I : MBB)
674
342k
      
if (342k
I.isBranch() &&
675
26.8k
          
(I.getOpcode() == ARM::t2BR_JT || 26.8k
I.getOpcode() == ARM::tBR_JTr26.4k
))
676
366
        T2JumpTables.push_back(&I);
677
46.2k
  }
678
9.73k
}
679
680
/// initializeFunctionInfo - Do the initial scan of the function, building up
681
/// information about the sizes of each block, the location of all the water,
682
/// and finding all of the constant pool users.
683
void ARMConstantIslands::
684
17.0k
initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
685
17.0k
686
17.0k
  BBInfo = computeAllBlockSizes(MF);
687
17.0k
688
17.0k
  // The known bits of the entry block offset are determined by the function
689
17.0k
  // alignment.
690
17.0k
  BBInfo.front().KnownBits = MF->getAlignment();
691
17.0k
692
17.0k
  // Compute block offsets and known bits.
693
17.0k
  adjustBBOffsetsAfter(&MF->front());
694
17.0k
695
17.0k
  // Now go back through the instructions and build up our data structures.
696
57.5k
  for (MachineBasicBlock &MBB : *MF) {
697
57.5k
    // If this block doesn't fall through into the next MBB, then this is
698
57.5k
    // 'water' that a constant pool island could be placed.
699
57.5k
    if (!BBHasFallthrough(&MBB))
700
29.8k
      WaterList.push_back(&MBB);
701
57.5k
702
425k
    for (MachineInstr &I : MBB) {
703
425k
      if (I.isDebugValue())
704
150
        continue;
705
424k
706
424k
      unsigned Opc = I.getOpcode();
707
424k
      if (
I.isBranch()424k
) {
708
27.7k
        bool isCond = false;
709
27.7k
        unsigned Bits = 0;
710
27.7k
        unsigned Scale = 1;
711
27.7k
        int UOpc = Opc;
712
27.7k
        switch (Opc) {
713
183
        default:
714
183
          continue;  // Ignore other JT branches
715
368
        case ARM::t2BR_JT:
716
368
        case ARM::tBR_JTr:
717
368
          T2JumpTables.push_back(&I);
718
368
          continue;   // Does not get an entry in ImmBranches
719
678
        case ARM::Bcc:
720
678
          isCond = true;
721
678
          UOpc = ARM::B;
722
678
          LLVM_FALLTHROUGH;
723
889
        case ARM::B:
724
889
          Bits = 24;
725
889
          Scale = 4;
726
889
          break;
727
1.68k
        case ARM::tBcc:
728
1.68k
          isCond = true;
729
1.68k
          UOpc = ARM::tB;
730
1.68k
          Bits = 8;
731
1.68k
          Scale = 2;
732
1.68k
          break;
733
508
        case ARM::tB:
734
508
          Bits = 11;
735
508
          Scale = 2;
736
508
          break;
737
19.1k
        case ARM::t2Bcc:
738
19.1k
          isCond = true;
739
19.1k
          UOpc = ARM::t2B;
740
19.1k
          Bits = 20;
741
19.1k
          Scale = 2;
742
19.1k
          break;
743
4.97k
        case ARM::t2B:
744
4.97k
          Bits = 24;
745
4.97k
          Scale = 2;
746
4.97k
          break;
747
27.2k
        }
748
27.2k
749
27.2k
        // Record this immediate branch.
750
27.2k
        unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
751
27.2k
        ImmBranches.push_back(ImmBranch(&I, MaxOffs, isCond, UOpc));
752
27.2k
      }
753
424k
754
424k
      
if (424k
Opc == ARM::tPUSH || 424k
Opc == ARM::tPOP_RET419k
)
755
10.0k
        PushPopMIs.push_back(&I);
756
424k
757
424k
      if (
Opc == ARM::CONSTPOOL_ENTRY || 424k
Opc == ARM::JUMPTABLE_ADDRS420k
||
758
424k
          
Opc == ARM::JUMPTABLE_INSTS420k
||
Opc == ARM::JUMPTABLE_TBB419k
||
759
419k
          Opc == ARM::JUMPTABLE_TBH)
760
4.56k
        continue;
761
419k
762
419k
      // Scan the instructions for constant pool operands.
763
2.37M
      
for (unsigned op = 0, e = I.getNumOperands(); 419k
op != e2.37M
;
++op1.95M
)
764
1.96M
        
if (1.96M
I.getOperand(op).isCPI() || 1.96M
I.getOperand(op).isJTI()1.95M
) {
765
5.51k
          // We found one.  The addressing mode tells us the max displacement
766
5.51k
          // from the PC that this instruction permits.
767
5.51k
768
5.51k
          // Basic size info comes from the TSFlags field.
769
5.51k
          unsigned Bits = 0;
770
5.51k
          unsigned Scale = 1;
771
5.51k
          bool NegOk = false;
772
5.51k
          bool IsSoImm = false;
773
5.51k
774
5.51k
          switch (Opc) {
775
0
          default:
776
0
            llvm_unreachable("Unknown addressing mode for CP reference!");
777
5.51k
778
5.51k
          // Taking the address of a CP entry.
779
97
          case ARM::LEApcrel:
780
97
          case ARM::LEApcrelJT:
781
97
            // This takes a SoImm, which is 8 bit immediate rotated. We'll
782
97
            // pretend the maximum offset is 255 * 4. Since each instruction
783
97
            // 4 byte wide, this is always correct. We'll check for other
784
97
            // displacements that fits in a SoImm as well.
785
97
            Bits = 8;
786
97
            Scale = 4;
787
97
            NegOk = true;
788
97
            IsSoImm = true;
789
97
            break;
790
528
          case ARM::t2LEApcrel:
791
528
          case ARM::t2LEApcrelJT:
792
528
            Bits = 12;
793
528
            NegOk = true;
794
528
            break;
795
61
          case ARM::tLEApcrel:
796
61
          case ARM::tLEApcrelJT:
797
61
            Bits = 8;
798
61
            Scale = 4;
799
61
            break;
800
61
801
701
          case ARM::LDRBi12:
802
701
          case ARM::LDRi12:
803
701
          case ARM::LDRcp:
804
701
          case ARM::t2LDRpci:
805
701
          case ARM::t2LDRHpci:
806
701
          case ARM::t2LDRBpci:
807
701
            Bits = 12;  // +-offset_12
808
701
            NegOk = true;
809
701
            break;
810
701
811
2.94k
          case ARM::tLDRpci:
812
2.94k
            Bits = 8;
813
2.94k
            Scale = 4;  // +(offset_8*4)
814
2.94k
            break;
815
701
816
1.18k
          case ARM::VLDRD:
817
1.18k
          case ARM::VLDRS:
818
1.18k
            Bits = 8;
819
1.18k
            Scale = 4;  // +-(offset_8*4)
820
1.18k
            NegOk = true;
821
1.18k
            break;
822
1.18k
823
0
          case ARM::tLDRHi:
824
0
            Bits = 5;
825
0
            Scale = 2; // +(offset_5*2)
826
0
            break;
827
5.51k
          }
828
5.51k
829
5.51k
          // Remember that this is a user of a CP entry.
830
5.51k
          unsigned CPI = I.getOperand(op).getIndex();
831
5.51k
          if (
I.getOperand(op).isJTI()5.51k
) {
832
393
            JumpTableUserIndices.insert(std::make_pair(CPI, CPUsers.size()));
833
393
            CPI = JumpTableEntryIndices[CPI];
834
393
          }
835
1.96M
836
1.96M
          MachineInstr *CPEMI = CPEMIs[CPI];
837
1.96M
          unsigned MaxOffs = ((1 << Bits)-1) * Scale;
838
1.96M
          CPUsers.push_back(CPUser(&I, CPEMI, MaxOffs, NegOk, IsSoImm));
839
1.96M
840
1.96M
          // Increment corresponding CPEntry reference count.
841
1.96M
          CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
842
1.96M
          assert(CPE && "Cannot find a corresponding CPEntry!");
843
1.96M
          CPE->RefCount++;
844
1.96M
845
1.96M
          // Instructions can only use one CP entry, don't bother scanning the
846
1.96M
          // rest of the operands.
847
1.96M
          break;
848
1.96M
        }
849
425k
    }
850
57.5k
  }
851
17.0k
}
852
853
/// getOffsetOf - Return the current offset of the specified machine instruction
854
/// from the start of the function.  This offset changes as stuff is moved
855
/// around inside the function.
856
81.6k
unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const {
857
81.6k
  MachineBasicBlock *MBB = MI->getParent();
858
81.6k
859
81.6k
  // The offset is composed of two things: the sum of the sizes of all MBB's
860
81.6k
  // before this instruction's block, and the offset from the start of the block
861
81.6k
  // it is in.
862
81.6k
  unsigned Offset = BBInfo[MBB->getNumber()].Offset;
863
81.6k
864
81.6k
  // Sum instructions before MI in MBB.
865
1.24M
  for (MachineBasicBlock::iterator I = MBB->begin(); 
&*I != MI1.24M
;
++I1.16M
) {
866
1.16M
    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
867
1.16M
    Offset += TII->getInstSizeInBytes(*I);
868
1.16M
  }
869
81.6k
  return Offset;
870
81.6k
}
871
872
/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
873
/// ID.
874
static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
875
2.64k
                              const MachineBasicBlock *RHS) {
876
2.64k
  return LHS->getNumber() < RHS->getNumber();
877
2.64k
}
878
879
/// updateForInsertedWaterBlock - When a block is newly inserted into the
880
/// machine function, it upsets all of the block numbers.  Renumber the blocks
881
/// and update the arrays that parallel this numbering.
882
744
void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
883
744
  // Renumber the MBB's to keep them consecutive.
884
744
  NewBB->getParent()->RenumberBlocks(NewBB);
885
744
886
744
  // Insert an entry into BBInfo to align it properly with the (newly
887
744
  // renumbered) block numbers.
888
744
  BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
889
744
890
744
  // Next, update WaterList.  Specifically, we need to add NewMBB as having
891
744
  // available water after it.
892
744
  water_iterator IP =
893
744
    std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
894
744
                     CompareMBBNumbers);
895
744
  WaterList.insert(IP, NewBB);
896
744
}
897
898
/// Split the basic block containing MI into two blocks, which are joined by
899
/// an unconditional branch.  Update data structures and renumber blocks to
900
/// account for this change and returns the newly created block.
901
21
MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
902
21
  MachineBasicBlock *OrigBB = MI->getParent();
903
21
904
21
  // Create a new MBB for the code after the OrigBB.
905
21
  MachineBasicBlock *NewBB =
906
21
    MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
907
21
  MachineFunction::iterator MBBI = ++OrigBB->getIterator();
908
21
  MF->insert(MBBI, NewBB);
909
21
910
21
  // Splice the instructions starting with MI over to NewBB.
911
21
  NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
912
21
913
21
  // Add an unconditional branch from OrigBB to NewBB.
914
21
  // Note the new unconditional branch is not being recorded.
915
21
  // There doesn't seem to be meaningful DebugInfo available; this doesn't
916
21
  // correspond to anything in the source.
917
21
  unsigned Opc = isThumb ? 
(isThumb2 ? 18
ARM::t2B18
:
ARM::tB0
) :
ARM::B3
;
918
21
  if (!isThumb)
919
3
    BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
920
21
  else
921
18
    BuildMI(OrigBB, DebugLoc(), TII->get(Opc))
922
18
        .addMBB(NewBB)
923
18
        .add(predOps(ARMCC::AL));
924
21
  ++NumSplit;
925
21
926
21
  // Update the CFG.  All succs of OrigBB are now succs of NewBB.
927
21
  NewBB->transferSuccessors(OrigBB);
928
21
929
21
  // OrigBB branches to NewBB.
930
21
  OrigBB->addSuccessor(NewBB);
931
21
932
21
  // Update internal data structures to account for the newly inserted MBB.
933
21
  // This is almost the same as updateForInsertedWaterBlock, except that
934
21
  // the Water goes after OrigBB, not NewBB.
935
21
  MF->RenumberBlocks(NewBB);
936
21
937
21
  // Insert an entry into BBInfo to align it properly with the (newly
938
21
  // renumbered) block numbers.
939
21
  BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
940
21
941
21
  // Next, update WaterList.  Specifically, we need to add OrigMBB as having
942
21
  // available water after it (but not if it's already there, which happens
943
21
  // when splitting before a conditional branch that is followed by an
944
21
  // unconditional branch - in that case we want to insert NewBB).
945
21
  water_iterator IP =
946
21
    std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
947
21
                     CompareMBBNumbers);
948
21
  MachineBasicBlock* WaterBB = *IP;
949
21
  if (WaterBB == OrigBB)
950
15
    WaterList.insert(std::next(IP), NewBB);
951
21
  else
952
6
    WaterList.insert(IP, OrigBB);
953
21
  NewWaterList.insert(OrigBB);
954
21
955
21
  // Figure out how large the OrigBB is.  As the first half of the original
956
21
  // block, it cannot contain a tablejump.  The size includes
957
21
  // the new jump we added.  (It should be possible to do this without
958
21
  // recounting everything, but it's very confusing, and this is rarely
959
21
  // executed.)
960
21
  computeBlockSize(MF, OrigBB, BBInfo[OrigBB->getNumber()]);
961
21
962
21
  // Figure out how large the NewMBB is.  As the second half of the original
963
21
  // block, it may contain a tablejump.
964
21
  computeBlockSize(MF, NewBB, BBInfo[NewBB->getNumber()]);
965
21
966
21
  // All BBOffsets following these blocks must be modified.
967
21
  adjustBBOffsetsAfter(OrigBB);
968
21
969
21
  return NewBB;
970
21
}
971
972
/// getUserOffset - Compute the offset of U.MI as seen by the hardware
973
/// displacement computation.  Update U.KnownAlignment to match its current
974
/// basic block location.
975
7.84k
unsigned ARMConstantIslands::getUserOffset(CPUser &U) const {
976
7.84k
  unsigned UserOffset = getOffsetOf(U.MI);
977
7.84k
  const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()];
978
7.84k
  unsigned KnownBits = BBI.internalKnownBits();
979
7.84k
980
7.84k
  // The value read from PC is offset from the actual instruction address.
981
7.84k
  UserOffset += (isThumb ? 
46.83k
:
81.01k
);
982
7.84k
983
7.84k
  // Because of inline assembly, we may not know the alignment (mod 4) of U.MI.
984
7.84k
  // Make sure U.getMaxDisp() returns a constrained range.
985
7.84k
  U.KnownAlignment = (KnownBits >= 2);
986
7.84k
987
7.84k
  // On Thumb, offsets==2 mod 4 are rounded down by the hardware for
988
7.84k
  // purposes of the displacement computation; compensate for that here.
989
7.84k
  // For unknown alignments, getMaxDisp() constrains the range instead.
990
7.84k
  if (
isThumb && 7.84k
U.KnownAlignment6.83k
)
991
419
    UserOffset &= ~3u;
992
7.84k
993
7.84k
  return UserOffset;
994
7.84k
}
995
996
/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool
997
/// reference) is within MaxDisp of TrialOffset (a proposed location of a
998
/// constant pool entry).
999
/// UserOffset is computed by getUserOffset above to include PC adjustments. If
1000
/// the mod 4 alignment of UserOffset is not known, the uncertainty must be
1001
/// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that.
1002
bool ARMConstantIslands::isOffsetInRange(unsigned UserOffset,
1003
                                         unsigned TrialOffset, unsigned MaxDisp,
1004
20.9k
                                         bool NegativeOK, bool IsSoImm) {
1005
20.9k
  if (
UserOffset <= TrialOffset20.9k
) {
1006
16.2k
    // User before the Trial.
1007
16.2k
    if (TrialOffset - UserOffset <= MaxDisp)
1008
9.88k
      return true;
1009
20.9k
    // FIXME: Make use full range of soimm values.
1010
4.66k
  } else 
if (4.66k
NegativeOK4.66k
) {
1011
3.30k
    if (UserOffset - TrialOffset <= MaxDisp)
1012
932
      return true;
1013
10.1k
    // FIXME: Make use full range of soimm values.
1014
10.1k
  }
1015
10.1k
  return false;
1016
10.1k
}
1017
1018
/// isWaterInRange - Returns true if a CPE placed after the specified
1019
/// Water (a basic block) will be in range for the specific MI.
1020
///
1021
/// Compute how much the function will grow by inserting a CPE after Water.
1022
bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
1023
                                        MachineBasicBlock* Water, CPUser &U,
1024
12.4k
                                        unsigned &Growth) {
1025
12.4k
  unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
1026
12.4k
  unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
1027
12.4k
  unsigned NextBlockOffset, NextBlockAlignment;
1028
12.4k
  MachineFunction::const_iterator NextBlock = Water->getIterator();
1029
12.4k
  if (
++NextBlock == MF->end()12.4k
) {
1030
744
    NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
1031
744
    NextBlockAlignment = 0;
1032
12.4k
  } else {
1033
11.6k
    NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
1034
11.6k
    NextBlockAlignment = NextBlock->getAlignment();
1035
11.6k
  }
1036
12.4k
  unsigned Size = U.CPEMI->getOperand(2).getImm();
1037
12.4k
  unsigned CPEEnd = CPEOffset + Size;
1038
12.4k
1039
12.4k
  // The CPE may be able to hide in the alignment padding before the next
1040
12.4k
  // block. It may also cause more padding to be required if it is more aligned
1041
12.4k
  // that the next block.
1042
12.4k
  if (
CPEEnd > NextBlockOffset12.4k
) {
1043
12.1k
    Growth = CPEEnd - NextBlockOffset;
1044
12.1k
    // Compute the padding that would go at the end of the CPE to align the next
1045
12.1k
    // block.
1046
12.1k
    Growth += OffsetToAlignment(CPEEnd, 1ULL << NextBlockAlignment);
1047
12.1k
1048
12.1k
    // If the CPE is to be inserted before the instruction, that will raise
1049
12.1k
    // the offset of the instruction. Also account for unknown alignment padding
1050
12.1k
    // in blocks between CPE and the user.
1051
12.1k
    if (CPEOffset < UserOffset)
1052
4.03k
      UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign);
1053
12.1k
  } else
1054
12.4k
    // CPE fits in existing padding.
1055
309
    Growth = 0;
1056
12.4k
1057
12.4k
  return isOffsetInRange(UserOffset, CPEOffset, U);
1058
12.4k
}
1059
1060
/// isCPEntryInRange - Returns true if the distance between specific MI and
1061
/// specific ConstPool entry instruction can fit in MI's displacement field.
1062
bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
1063
                                      MachineInstr *CPEMI, unsigned MaxDisp,
1064
8.06k
                                      bool NegOk, bool DoDump) {
1065
8.06k
  unsigned CPEOffset  = getOffsetOf(CPEMI);
1066
8.06k
1067
8.06k
  if (
DoDump8.06k
) {
1068
7.84k
    DEBUG({
1069
7.84k
      unsigned Block = MI->getParent()->getNumber();
1070
7.84k
      const BasicBlockInfo &BBI = BBInfo[Block];
1071
7.84k
      dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
1072
7.84k
             << " max delta=" << MaxDisp
1073
7.84k
             << format(" insn address=%#x", UserOffset)
1074
7.84k
             << " in BB#" << Block << ": "
1075
7.84k
             << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
1076
7.84k
             << format("CPE address=%#x offset=%+d: ", CPEOffset,
1077
7.84k
                       int(CPEOffset-UserOffset));
1078
7.84k
    });
1079
7.84k
  }
1080
8.06k
1081
8.06k
  return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
1082
8.06k
}
1083
1084
#ifndef NDEBUG
1085
/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
1086
/// unconditionally branches to its only successor.
1087
static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
1088
  if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
1089
    return false;
1090
1091
  MachineBasicBlock *Succ = *MBB->succ_begin();
1092
  MachineBasicBlock *Pred = *MBB->pred_begin();
1093
  MachineInstr *PredMI = &Pred->back();
1094
  if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB
1095
      || PredMI->getOpcode() == ARM::t2B)
1096
    return PredMI->getOperand(0).getMBB() == Succ;
1097
  return false;
1098
}
1099
#endif // NDEBUG
1100
1101
44.3k
void ARMConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
1102
44.3k
  unsigned BBNum = BB->getNumber();
1103
473k
  for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); 
i < e473k
;
++i429k
) {
1104
429k
    // Get the offset and known bits at the end of the layout predecessor.
1105
429k
    // Include the alignment of the current block.
1106
429k
    unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
1107
429k
    unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
1108
429k
    unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
1109
429k
1110
429k
    // This is where block i begins.  Stop if the offset is already correct,
1111
429k
    // and we have updated 2 blocks.  This is the maximum number of blocks
1112
429k
    // changed before calling this function.
1113
429k
    if (i > BBNum + 2 &&
1114
368k
        BBInfo[i].Offset == Offset &&
1115
0
        BBInfo[i].KnownBits == KnownBits)
1116
0
      break;
1117
429k
1118
429k
    BBInfo[i].Offset = Offset;
1119
429k
    BBInfo[i].KnownBits = KnownBits;
1120
429k
  }
1121
44.3k
}
1122
1123
/// decrementCPEReferenceCount - find the constant pool entry with index CPI
1124
/// and instruction CPEMI, and decrement its refcount.  If the refcount
1125
/// becomes 0 remove the entry and instruction.  Returns true if we removed
1126
/// the entry, false if we didn't.
1127
bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI,
1128
906
                                                    MachineInstr *CPEMI) {
1129
906
  // Find the old entry. Eliminate it if it is no longer used.
1130
906
  CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
1131
906
  assert(CPE && "Unexpected!");
1132
906
  if (
--CPE->RefCount == 0906
) {
1133
705
    removeDeadCPEMI(CPEMI);
1134
705
    CPE->CPEMI = nullptr;
1135
705
    --NumCPEs;
1136
705
    return true;
1137
705
  }
1138
201
  return false;
1139
201
}
1140
1141
23.0k
unsigned ARMConstantIslands::getCombinedIndex(const MachineInstr *CPEMI) {
1142
23.0k
  if (CPEMI->getOperand(1).isCPI())
1143
22.6k
    return CPEMI->getOperand(1).getIndex();
1144
413
1145
413
  return JumpTableEntryIndices[CPEMI->getOperand(1).getIndex()];
1146
413
}
1147
1148
/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
1149
/// if not, see if an in-range clone of the CPE is in range, and if so,
1150
/// change the data structures so the user references the clone.  Returns:
1151
/// 0 = no existing entry found
1152
/// 1 = entry found, and there were no code insertions or deletions
1153
/// 2 = entry found, and there were code insertions or deletions
1154
7.54k
int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) {
1155
7.54k
  MachineInstr *UserMI = U.MI;
1156
7.54k
  MachineInstr *CPEMI  = U.CPEMI;
1157
7.54k
1158
7.54k
  // Check to see if the CPE is already in-range.
1159
7.54k
  if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk,
1160
7.54k
                       true)) {
1161
6.64k
    DEBUG(dbgs() << "In range\n");
1162
6.64k
    return 1;
1163
6.64k
  }
1164
906
1165
906
  // No.  Look for previously created clones of the CPE that are in range.
1166
906
  unsigned CPI = getCombinedIndex(CPEMI);
1167
906
  std::vector<CPEntry> &CPEs = CPEntries[CPI];
1168
1.86k
  for (unsigned i = 0, e = CPEs.size(); 
i != e1.86k
;
++i961
) {
1169
1.12k
    // We already tried this one
1170
1.12k
    if (CPEs[i].CPEMI == CPEMI)
1171
906
      continue;
1172
217
    // Removing CPEs can leave empty entries, skip
1173
217
    
if (217
CPEs[i].CPEMI == nullptr217
)
1174
4
      continue;
1175
213
    
if (213
isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(),
1176
213
                     U.NegOk)) {
1177
162
      DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
1178
162
                   << CPEs[i].CPI << "\n");
1179
162
      // Point the CPUser node to the replacement
1180
162
      U.CPEMI = CPEs[i].CPEMI;
1181
162
      // Change the CPI in the instruction operand to refer to the clone.
1182
324
      for (unsigned j = 0, e = UserMI->getNumOperands(); 
j != e324
;
++j162
)
1183
324
        
if (324
UserMI->getOperand(j).isCPI()324
) {
1184
162
          UserMI->getOperand(j).setIndex(CPEs[i].CPI);
1185
162
          break;
1186
162
        }
1187
162
      // Adjust the refcount of the clone...
1188
162
      CPEs[i].RefCount++;
1189
162
      // ...and the original.  If we didn't remove the old entry, none of the
1190
162
      // addresses changed, so we don't need another pass.
1191
162
      return decrementCPEReferenceCount(CPI, CPEMI) ? 
218
:
1144
;
1192
162
    }
1193
1.12k
  }
1194
744
  return 0;
1195
7.54k
}
1196
1197
/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
1198
/// the specific unconditional branch instruction.
1199
107
static inline unsigned getUnconditionalBrDisp(int Opc) {
1200
107
  switch (Opc) {
1201
100
  case ARM::tB:
1202
100
    return ((1<<10)-1)*2;
1203
4
  case ARM::t2B:
1204
4
    return ((1<<23)-1)*2;
1205
3
  default:
1206
3
    break;
1207
3
  }
1208
3
1209
3
  return ((1<<23)-1)*4;
1210
3
}
1211
1212
/// findAvailableWater - Look for an existing entry in the WaterList in which
1213
/// we can place the CPE referenced from U so it's within range of U's MI.
1214
/// Returns true if found, false if not.  If it returns true, WaterIter
1215
/// is set to the WaterList entry.  For Thumb, prefer water that will not
1216
/// introduce padding to water that will.  To ensure that this pass
1217
/// terminates, the CPE location for a particular CPUser is only allowed to
1218
/// move to a lower address, so search backward from the end of the list and
1219
/// prefer the first water that is in range.
1220
bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
1221
                                            water_iterator &WaterIter,
1222
744
                                            bool CloserWater) {
1223
744
  if (WaterList.empty())
1224
0
    return false;
1225
744
1226
744
  unsigned BestGrowth = ~0u;
1227
744
  // The nearest water without splitting the UserBB is right after it.
1228
744
  // If the distance is still large (we have a big BB), then we need to split it
1229
744
  // if we don't converge after certain iterations. This helps the following
1230
744
  // situation to converge:
1231
744
  //   BB0:
1232
744
  //      Big BB
1233
744
  //   BB1:
1234
744
  //      Constant Pool
1235
744
  // When a CP access is out of range, BB0 may be used as water. However,
1236
744
  // inserting islands between BB0 and BB1 makes other accesses out of range.
1237
744
  MachineBasicBlock *UserBB = U.MI->getParent();
1238
744
  unsigned MinNoSplitDisp =
1239
744
      BBInfo[UserBB->getNumber()].postOffset(getCPELogAlign(U.CPEMI));
1240
744
  if (
CloserWater && 744
MinNoSplitDisp > U.getMaxDisp() / 20
)
1241
0
    return false;
1242
744
  for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();;
1243
11.6k
       
--IP11.6k
) {
1244
12.4k
    MachineBasicBlock* WaterBB = *IP;
1245
12.4k
    // Check if water is in range and is either at a lower address than the
1246
12.4k
    // current "high water mark" or a new water block that was created since
1247
12.4k
    // the previous iteration by inserting an unconditional branch.  In the
1248
12.4k
    // latter case, we want to allow resetting the high water mark back to
1249
12.4k
    // this new water since we haven't seen it before.  Inserting branches
1250
12.4k
    // should be relatively uncommon and when it does happen, we want to be
1251
12.4k
    // sure to take advantage of it for all the CPEs near that block, so that
1252
12.4k
    // we don't insert more branches than necessary.
1253
12.4k
    // When CloserWater is true, we try to find the lowest address after (or
1254
12.4k
    // equal to) user MI's BB no matter of padding growth.
1255
12.4k
    unsigned Growth;
1256
12.4k
    if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
1257
3.39k
        (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
1258
3.39k
         
NewWaterList.count(WaterBB)6
||
WaterBB == U.MI->getParent()4
) &&
1259
12.4k
        
Growth < BestGrowth3.38k
) {
1260
802
      // This is the least amount of required padding seen so far.
1261
802
      BestGrowth = Growth;
1262
802
      WaterIter = IP;
1263
802
      DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
1264
802
                   << " Growth=" << Growth << '\n');
1265
802
1266
802
      if (
CloserWater && 802
WaterBB == U.MI->getParent()0
)
1267
0
        return true;
1268
802
      // Keep looking unless it is perfect and we're not looking for the lowest
1269
802
      // possible address.
1270
802
      
if (802
!CloserWater && 802
BestGrowth == 0802
)
1271
8
        return true;
1272
12.4k
    }
1273
12.4k
    
if (12.4k
IP == B12.4k
)
1274
736
      break;
1275
12.4k
  }
1276
736
  return BestGrowth != ~0u;
1277
744
}
1278
1279
/// createNewWater - No existing WaterList entry will work for
1280
/// CPUsers[CPUserIndex], so create a place to put the CPE.  The end of the
1281
/// block is used if in range, and the conditional branch munged so control
1282
/// flow is correct.  Otherwise the block is split to create a hole with an
1283
/// unconditional branch around it.  In either case NewMBB is set to a
1284
/// block following which the new island can be inserted (the WaterList
1285
/// is not adjusted).
1286
void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
1287
                                        unsigned UserOffset,
1288
28
                                        MachineBasicBlock *&NewMBB) {
1289
28
  CPUser &U = CPUsers[CPUserIndex];
1290
28
  MachineInstr *UserMI = U.MI;
1291
28
  MachineInstr *CPEMI  = U.CPEMI;
1292
28
  unsigned CPELogAlign = getCPELogAlign(CPEMI);
1293
28
  MachineBasicBlock *UserMBB = UserMI->getParent();
1294
28
  const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
1295
28
1296
28
  // If the block does not end in an unconditional branch already, and if the
1297
28
  // end of the block is within range, make new water there.  (The addition
1298
28
  // below is for the unconditional branch we will be adding: 4 bytes on ARM +
1299
28
  // Thumb2, 2 on Thumb1.
1300
28
  if (
BBHasFallthrough(UserMBB)28
) {
1301
9
    // Size of branch to insert.
1302
9
    unsigned Delta = isThumb1 ? 
20
:
49
;
1303
9
    // Compute the offset where the CPE will begin.
1304
9
    unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta;
1305
9
1306
9
    if (
isOffsetInRange(UserOffset, CPEOffset, U)9
) {
1307
7
      DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
1308
7
            << format(", expected CPE offset %#x\n", CPEOffset));
1309
7
      NewMBB = &*++UserMBB->getIterator();
1310
7
      // Add an unconditional branch from UserMBB to fallthrough block.  Record
1311
7
      // it for branch lengthening; this new branch will not get out of range,
1312
7
      // but if the preceding conditional branch is out of range, the targets
1313
7
      // will be exchanged, and the altered branch may be out of range, so the
1314
7
      // machinery has to know about it.
1315
7
      int UncondBr = isThumb ? 
((isThumb2) ? 4
ARM::t2B4
:
ARM::tB0
) :
ARM::B3
;
1316
7
      if (!isThumb)
1317
3
        BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
1318
7
      else
1319
4
        BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr))
1320
4
            .addMBB(NewMBB)
1321
4
            .add(predOps(ARMCC::AL));
1322
7
      unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
1323
7
      ImmBranches.push_back(ImmBranch(&UserMBB->back(),
1324
7
                                      MaxDisp, false, UncondBr));
1325
7
      computeBlockSize(MF, UserMBB, BBInfo[UserMBB->getNumber()]);
1326
7
      adjustBBOffsetsAfter(UserMBB);
1327
7
      return;
1328
7
    }
1329
21
  }
1330
21
1331
21
  // What a big block.  Find a place within the block to split it.  This is a
1332
21
  // little tricky on Thumb1 since instructions are 2 bytes and constant pool
1333
21
  // entries are 4 bytes: if instruction I references island CPE, and
1334
21
  // instruction I+1 references CPE', it will not work well to put CPE as far
1335
21
  // forward as possible, since then CPE' cannot immediately follow it (that
1336
21
  // location is 2 bytes farther away from I+1 than CPE was from I) and we'd
1337
21
  // need to create a new island.  So, we make a first guess, then walk through
1338
21
  // the instructions between the one currently being looked at and the
1339
21
  // possible insertion point, and make sure any other instructions that
1340
21
  // reference CPEs will be able to use the same island area; if not, we back
1341
21
  // up the insertion point.
1342
21
1343
21
  // Try to split the block so it's fully aligned.  Compute the latest split
1344
21
  // point where we can add a 4-byte branch instruction, and then align to
1345
21
  // LogAlign which is the largest possible alignment in the function.
1346
21
  unsigned LogAlign = MF->getAlignment();
1347
21
  assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
1348
21
  unsigned KnownBits = UserBBI.internalKnownBits();
1349
21
  unsigned UPad = UnknownPadding(LogAlign, KnownBits);
1350
21
  unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad;
1351
21
  DEBUG(dbgs() << format("Split in middle of big block before %#x",
1352
21
                         BaseInsertOffset));
1353
21
1354
21
  // The 4 in the following is for the unconditional branch we'll be inserting
1355
21
  // (allows for long branch on Thumb1).  Alignment of the island is handled
1356
21
  // inside isOffsetInRange.
1357
21
  BaseInsertOffset -= 4;
1358
21
1359
21
  DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
1360
21
               << " la=" << LogAlign
1361
21
               << " kb=" << KnownBits
1362
21
               << " up=" << UPad << '\n');
1363
21
1364
21
  // This could point off the end of the block if we've already got constant
1365
21
  // pool entries following this block; only the last one is in the water list.
1366
21
  // Back past any possible branches (allow for a conditional and a maximally
1367
21
  // long unconditional).
1368
21
  if (
BaseInsertOffset + 8 >= UserBBI.postOffset()21
) {
1369
0
    // Ensure BaseInsertOffset is larger than the offset of the instruction
1370
0
    // following UserMI so that the loop which searches for the split point
1371
0
    // iterates at least once.
1372
0
    BaseInsertOffset =
1373
0
        std::max(UserBBI.postOffset() - UPad - 8,
1374
0
                 UserOffset + TII->getInstSizeInBytes(*UserMI) + 1);
1375
0
    DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
1376
0
  }
1377
21
  unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
1378
21
    CPEMI->getOperand(2).getImm();
1379
21
  MachineBasicBlock::iterator MI = UserMI;
1380
21
  ++MI;
1381
21
  unsigned CPUIndex = CPUserIndex+1;
1382
21
  unsigned NumCPUsers = CPUsers.size();
1383
21
  MachineInstr *LastIT = nullptr;
1384
21
  for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(*UserMI);
1385
8.66k
       Offset < BaseInsertOffset;
1386
8.64k
       
Offset += TII->getInstSizeInBytes(*MI), MI = std::next(MI)8.64k
) {
1387
8.64k
    assert(MI != UserMBB->end() && "Fell off end of block");
1388
8.64k
    if (
CPUIndex < NumCPUsers && 8.64k
CPUsers[CPUIndex].MI == &*MI8.38k
) {
1389
473
      CPUser &U = CPUsers[CPUIndex];
1390
473
      if (
!isOffsetInRange(Offset, EndInsertOffset, U)473
) {
1391
55
        // Shift intertion point by one unit of alignment so it is within reach.
1392
55
        BaseInsertOffset -= 1u << LogAlign;
1393
55
        EndInsertOffset  -= 1u << LogAlign;
1394
55
      }
1395
473
      // This is overly conservative, as we don't account for CPEMIs being
1396
473
      // reused within the block, but it doesn't matter much.  Also assume CPEs
1397
473
      // are added in order with alignment padding.  We may eventually be able
1398
473
      // to pack the aligned CPEs better.
1399
473
      EndInsertOffset += U.CPEMI->getOperand(2).getImm();
1400
473
      CPUIndex++;
1401
473
    }
1402
8.64k
1403
8.64k
    // Remember the last IT instruction.
1404
8.64k
    if (MI->getOpcode() == ARM::t2IT)
1405
1
      LastIT = &*MI;
1406
8.64k
  }
1407
21
1408
21
  --MI;
1409
21
1410
21
  // Avoid splitting an IT block.
1411
21
  if (
LastIT21
) {
1412
1
    unsigned PredReg = 0;
1413
1
    ARMCC::CondCodes CC = getITInstrPredicate(*MI, PredReg);
1414
1
    if (CC != ARMCC::AL)
1415
0
      MI = LastIT;
1416
1
  }
1417
21
1418
21
  // We really must not split an IT block.
1419
21
  DEBUG(unsigned PredReg;
1420
28
        assert(!isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL));
1421
28
1422
28
  NewMBB = splitBlockBeforeInstr(&*MI);
1423
28
}
1424
1425
/// handleConstantPoolUser - Analyze the specified user, checking to see if it
1426
/// is out-of-range.  If so, pick up the constant pool value and move it some
1427
/// place in-range.  Return true if we changed any addresses (thus must run
1428
/// another pass of branch lengthening), false otherwise.
1429
bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
1430
7.54k
                                                bool CloserWater) {
1431
7.54k
  CPUser &U = CPUsers[CPUserIndex];
1432
7.54k
  MachineInstr *UserMI = U.MI;
1433
7.54k
  MachineInstr *CPEMI  = U.CPEMI;
1434
7.54k
  unsigned CPI = getCombinedIndex(CPEMI);
1435
7.54k
  unsigned Size = CPEMI->getOperand(2).getImm();
1436
7.54k
  // Compute this only once, it's expensive.
1437
7.54k
  unsigned UserOffset = getUserOffset(U);
1438
7.54k
1439
7.54k
  // See if the current entry is within range, or there is a clone of it
1440
7.54k
  // in range.
1441
7.54k
  int result = findInRangeCPEntry(U, UserOffset);
1442
7.54k
  if (
result==17.54k
)
return false6.78k
;
1443
762
  else 
if (762
result==2762
)
return true18
;
1444
744
1445
744
  // No existing clone of this CPE is within range.
1446
744
  // We will be generating a new clone.  Get a UID for it.
1447
744
  unsigned ID = AFI->createPICLabelUId();
1448
744
1449
744
  // Look for water where we can place this CPE.
1450
744
  MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
1451
744
  MachineBasicBlock *NewMBB;
1452
744
  water_iterator IP;
1453
744
  if (
findAvailableWater(U, UserOffset, IP, CloserWater)744
) {
1454
716
    DEBUG(dbgs() << "Found water in range\n");
1455
716
    MachineBasicBlock *WaterBB = *IP;
1456
716
1457
716
    // If the original WaterList entry was "new water" on this iteration,
1458
716
    // propagate that to the new island.  This is just keeping NewWaterList
1459
716
    // updated to match the WaterList, which will be updated below.
1460
716
    if (NewWaterList.erase(WaterBB))
1461
609
      NewWaterList.insert(NewIsland);
1462
716
1463
716
    // The new CPE goes before the following block (NewMBB).
1464
716
    NewMBB = &*++WaterBB->getIterator();
1465
744
  } else {
1466
28
    // No water found.
1467
28
    DEBUG(dbgs() << "No water found\n");
1468
28
    createNewWater(CPUserIndex, UserOffset, NewMBB);
1469
28
1470
28
    // splitBlockBeforeInstr adds to WaterList, which is important when it is
1471
28
    // called while handling branches so that the water will be seen on the
1472
28
    // next iteration for constant pools, but in this context, we don't want
1473
28
    // it.  Check for this so it will be removed from the WaterList.
1474
28
    // Also remove any entry from NewWaterList.
1475
28
    MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
1476
28
    IP = find(WaterList, WaterBB);
1477
28
    if (IP != WaterList.end())
1478
21
      NewWaterList.erase(WaterBB);
1479
28
1480
28
    // We are adding new water.  Update NewWaterList.
1481
28
    NewWaterList.insert(NewIsland);
1482
28
  }
1483
744
1484
744
  // Remove the original WaterList entry; we want subsequent insertions in
1485
744
  // this vicinity to go after the one we're about to insert.  This
1486
744
  // considerably reduces the number of times we have to move the same CPE
1487
744
  // more than once and is also important to ensure the algorithm terminates.
1488
744
  if (IP != WaterList.end())
1489
737
    WaterList.erase(IP);
1490
744
1491
744
  // Okay, we know we can put an island before NewMBB now, do it!
1492
744
  MF->insert(NewMBB->getIterator(), NewIsland);
1493
744
1494
744
  // Update internal data structures to account for the newly inserted MBB.
1495
744
  updateForInsertedWaterBlock(NewIsland);
1496
744
1497
744
  // Now that we have an island to add the CPE to, clone the original CPE and
1498
744
  // add it to the island.
1499
744
  U.HighWaterMark = NewIsland;
1500
744
  U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc())
1501
744
                .addImm(ID)
1502
744
                .add(CPEMI->getOperand(1))
1503
744
                .addImm(Size);
1504
744
  CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
1505
744
  ++NumCPEs;
1506
744
1507
744
  // Decrement the old entry, and remove it if refcount becomes 0.
1508
744
  decrementCPEReferenceCount(CPI, CPEMI);
1509
744
1510
744
  // Mark the basic block as aligned as required by the const-pool entry.
1511
744
  NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
1512
744
1513
744
  // Increase the size of the island block to account for the new entry.
1514
744
  BBInfo[NewIsland->getNumber()].Size += Size;
1515
744
  adjustBBOffsetsAfter(&*--NewIsland->getIterator());
1516
744
1517
744
  // Finally, change the CPI in the instruction operand to be ID.
1518
1.49k
  for (unsigned i = 0, e = UserMI->getNumOperands(); 
i != e1.49k
;
++i747
)
1519
1.49k
    
if (1.49k
UserMI->getOperand(i).isCPI()1.49k
) {
1520
743
      UserMI->getOperand(i).setIndex(ID);
1521
743
      break;
1522
743
    }
1523
744
1524
744
  DEBUG(dbgs() << "  Moved CPE to #" << ID << " CPI=" << CPI
1525
7.54k
        << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
1526
7.54k
1527
7.54k
  return true;
1528
7.54k
}
1529
1530
/// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update
1531
/// sizes and offsets of impacted basic blocks.
1532
711
void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
1533
711
  MachineBasicBlock *CPEBB = CPEMI->getParent();
1534
711
  unsigned Size = CPEMI->getOperand(2).getImm();
1535
711
  CPEMI->eraseFromParent();
1536
711
  BBInfo[CPEBB->getNumber()].Size -= Size;
1537
711
  // All succeeding offsets have the current size value added in, fix this.
1538
711
  if (
CPEBB->empty()711
) {
1539
25
    BBInfo[CPEBB->getNumber()].Size = 0;
1540
25
1541
25
    // This block no longer needs to be aligned.
1542
25
    CPEBB->setAlignment(0);
1543
25
  } else
1544
711
    // Entries are sorted by descending alignment, so realign from the front.
1545
686
    CPEBB->setAlignment(getCPELogAlign(&*CPEBB->begin()));
1546
711
1547
711
  adjustBBOffsetsAfter(CPEBB);
1548
711
  // An island has only one predecessor BB and one successor BB. Check if
1549
711
  // this BB's predecessor jumps directly to this BB's successor. This
1550
711
  // shouldn't happen currently.
1551
711
  assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
1552
711
  // FIXME: remove the empty blocks after all the work is done?
1553
711
}
1554
1555
/// removeUnusedCPEntries - Remove constant pool entries whose refcounts
1556
/// are zero.
1557
17.0k
bool ARMConstantIslands::removeUnusedCPEntries() {
1558
17.0k
  unsigned MadeChange = false;
1559
21.5k
  for (unsigned i = 0, e = CPEntries.size(); 
i != e21.5k
;
++i4.56k
) {
1560
4.56k
      std::vector<CPEntry> &CPEs = CPEntries[i];
1561
9.12k
      for (unsigned j = 0, ee = CPEs.size(); 
j != ee9.12k
;
++j4.56k
) {
1562
4.56k
        if (
CPEs[j].RefCount == 0 && 4.56k
CPEs[j].CPEMI6
) {
1563
6
          removeDeadCPEMI(CPEs[j].CPEMI);
1564
6
          CPEs[j].CPEMI = nullptr;
1565
6
          MadeChange = true;
1566
6
        }
1567
4.56k
      }
1568
4.56k
  }
1569
17.0k
  return MadeChange;
1570
17.0k
}
1571
1572
/// isBBInRange - Returns true if the distance between specific MI and
1573
/// specific BB can fit in MI's displacement field.
1574
bool ARMConstantIslands::isBBInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
1575
53.6k
                                     unsigned MaxDisp) {
1576
53.6k
  unsigned PCAdj      = isThumb ? 
452.7k
:
8975
;
1577
53.6k
  unsigned BrOffset   = getOffsetOf(MI) + PCAdj;
1578
53.6k
  unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
1579
53.6k
1580
53.6k
  DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
1581
53.6k
               << " from BB#" << MI->getParent()->getNumber()
1582
53.6k
               << " max delta=" << MaxDisp
1583
53.6k
               << " from " << getOffsetOf(MI) << " to " << DestOffset
1584
53.6k
               << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
1585
53.6k
1586
53.6k
  if (
BrOffset <= DestOffset53.6k
) {
1587
40.6k
    // Branch before the Dest.
1588
40.6k
    if (DestOffset-BrOffset <= MaxDisp)
1589
38.7k
      return true;
1590
13.0k
  } else {
1591
13.0k
    if (BrOffset-DestOffset <= MaxDisp)
1592
12.6k
      return true;
1593
2.26k
  }
1594
2.26k
  return false;
1595
2.26k
}
1596
1597
/// fixupImmediateBr - Fix up an immediate branch whose destination is too far
1598
/// away to fit in its displacement field.
1599
29.5k
bool ARMConstantIslands::fixupImmediateBr(ImmBranch &Br) {
1600
29.5k
  MachineInstr *MI = Br.MI;
1601
29.5k
  MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
1602
29.5k
1603
29.5k
  // Check to see if the DestBB is already in-range.
1604
29.5k
  if (isBBInRange(MI, DestBB, Br.MaxDisp))
1605
29.4k
    return false;
1606
102
1607
102
  
if (102
!Br.isCond102
)
1608
0
    return fixupUnconditionalBr(Br);
1609
102
  return fixupConditionalBr(Br);
1610
102
}
1611
1612
/// fixupUnconditionalBr - Fix up an unconditional branch whose destination is
1613
/// too far away to fit in its displacement field. If the LR register has been
1614
/// spilled in the epilogue, then we can use BL to implement a far jump.
1615
/// Otherwise, add an intermediate branch instruction to a branch.
1616
bool
1617
0
ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
1618
0
  MachineInstr *MI = Br.MI;
1619
0
  MachineBasicBlock *MBB = MI->getParent();
1620
0
  if (!isThumb1)
1621
0
    llvm_unreachable("fixupUnconditionalBr is Thumb1 only!");
1622
0
1623
0
  // Use BL to implement far jump.
1624
0
  Br.MaxDisp = (1 << 21) * 2;
1625
0
  MI->setDesc(TII->get(ARM::tBfar));
1626
0
  BBInfo[MBB->getNumber()].Size += 2;
1627
0
  adjustBBOffsetsAfter(MBB);
1628
0
  HasFarJump = true;
1629
0
  ++NumUBrFixed;
1630
0
1631
0
  DEBUG(dbgs() << "  Changed B to long jump " << *MI);
1632
0
1633
0
  return true;
1634
0
}
1635
1636
/// fixupConditionalBr - Fix up a conditional branch whose destination is too
1637
/// far away to fit in its displacement field. It is converted to an inverse
1638
/// conditional branch + an unconditional branch to the destination.
1639
bool
1640
102
ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
1641
102
  MachineInstr *MI = Br.MI;
1642
102
  MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
1643
102
1644
102
  // Add an unconditional branch to the destination and invert the branch
1645
102
  // condition to jump over it:
1646
102
  // blt L1
1647
102
  // =>
1648
102
  // bge L2
1649
102
  // b   L1
1650
102
  // L2:
1651
102
  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();
1652
102
  CC = ARMCC::getOppositeCondition(CC);
1653
102
  unsigned CCReg = MI->getOperand(2).getReg();
1654
102
1655
102
  // If the branch is at the end of its MBB and that has a fall-through block,
1656
102
  // direct the updated conditional branch to the fall-through block. Otherwise,
1657
102
  // split the MBB before the next instruction.
1658
102
  MachineBasicBlock *MBB = MI->getParent();
1659
102
  MachineInstr *BMI = &MBB->back();
1660
100
  bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
1661
102
1662
102
  ++NumCBrFixed;
1663
102
  if (
BMI != MI102
) {
1664
2
    if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) &&
1665
2
        
BMI->getOpcode() == Br.UncondBr2
) {
1666
2
      // Last MI in the BB is an unconditional branch. Can we simply invert the
1667
2
      // condition and swap destinations:
1668
2
      // beq L1
1669
2
      // b   L2
1670
2
      // =>
1671
2
      // bne L2
1672
2
      // b   L1
1673
2
      MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
1674
2
      if (
isBBInRange(MI, NewDest, Br.MaxDisp)2
) {
1675
2
        DEBUG(dbgs() << "  Invert Bcc condition and swap its destination with "
1676
2
                     << *BMI);
1677
2
        BMI->getOperand(0).setMBB(DestBB);
1678
2
        MI->getOperand(0).setMBB(NewDest);
1679
2
        MI->getOperand(1).setImm(CC);
1680
2
        return true;
1681
2
      }
1682
100
    }
1683
2
  }
1684
100
1685
100
  
if (100
NeedSplit100
) {
1686
0
    splitBlockBeforeInstr(MI);
1687
0
    // No need for the branch to the next block. We're adding an unconditional
1688
0
    // branch to the destination.
1689
0
    int delta = TII->getInstSizeInBytes(MBB->back());
1690
0
    BBInfo[MBB->getNumber()].Size -= delta;
1691
0
    MBB->back().eraseFromParent();
1692
0
    // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
1693
0
  }
1694
100
  MachineBasicBlock *NextBB = &*++MBB->getIterator();
1695
100
1696
100
  DEBUG(dbgs() << "  Insert B to BB#" << DestBB->getNumber()
1697
100
               << " also invert condition and change dest. to BB#"
1698
100
               << NextBB->getNumber() << "\n");
1699
100
1700
100
  // Insert a new conditional branch and a new unconditional branch.
1701
100
  // Also update the ImmBranch as well as adding a new entry for the new branch.
1702
100
  BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
1703
100
    .addMBB(NextBB).addImm(CC).addReg(CCReg);
1704
100
  Br.MI = &MBB->back();
1705
100
  BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
1706
100
  if (isThumb)
1707
100
    BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr))
1708
100
        .addMBB(DestBB)
1709
100
        .add(predOps(ARMCC::AL));
1710
100
  else
1711
0
    BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
1712
102
  BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
1713
102
  unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
1714
102
  ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
1715
102
1716
102
  // Remove the old conditional branch.  It may or may not still be in MBB.
1717
102
  BBInfo[MI->getParent()->getNumber()].Size -= TII->getInstSizeInBytes(*MI);
1718
102
  MI->eraseFromParent();
1719
102
  adjustBBOffsetsAfter(MBB);
1720
102
  return true;
1721
102
}
1722
1723
/// undoLRSpillRestore - Remove Thumb push / pop instructions that only spills
1724
/// LR / restores LR to pc. FIXME: This is done here because it's only possible
1725
/// to do this if tBfar is not used.
1726
1
bool ARMConstantIslands::undoLRSpillRestore() {
1727
1
  bool MadeChange = false;
1728
3
  for (unsigned i = 0, e = PushPopMIs.size(); 
i != e3
;
++i2
) {
1729
2
    MachineInstr *MI = PushPopMIs[i];
1730
2
    // First two operands are predicates.
1731
2
    if (MI->getOpcode() == ARM::tPOP_RET &&
1732
1
        MI->getOperand(2).getReg() == ARM::PC &&
1733
2
        
MI->getNumExplicitOperands() == 31
) {
1734
1
      // Create the new insn and copy the predicate from the old.
1735
1
      BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET))
1736
1
          .add(MI->getOperand(0))
1737
1
          .add(MI->getOperand(1));
1738
1
      MI->eraseFromParent();
1739
1
      MadeChange = true;
1740
2
    } else 
if (1
MI->getOpcode() == ARM::tPUSH &&
1741
1
               MI->getOperand(2).getReg() == ARM::LR &&
1742
1
               
MI->getNumExplicitOperands() == 31
) {
1743
1
      // Just remove the push.
1744
1
      MI->eraseFromParent();
1745
1
      MadeChange = true;
1746
1
    }
1747
2
  }
1748
1
  return MadeChange;
1749
1
}
1750
1751
8.63k
bool ARMConstantIslands::optimizeThumb2Instructions() {
1752
8.63k
  bool MadeChange = false;
1753
8.63k
1754
8.63k
  // Shrink ADR and LDR from constantpool.
1755
11.7k
  for (unsigned i = 0, e = CPUsers.size(); 
i != e11.7k
;
++i3.08k
) {
1756
3.08k
    CPUser &U = CPUsers[i];
1757
3.08k
    unsigned Opcode = U.MI->getOpcode();
1758
3.08k
    unsigned NewOpc = 0;
1759
3.08k
    unsigned Scale = 1;
1760
3.08k
    unsigned Bits = 0;
1761
3.08k
    switch (Opcode) {
1762
2.77k
    default: break;
1763
187
    case ARM::t2LEApcrel:
1764
187
      if (
isARMLowRegister(U.MI->getOperand(0).getReg())187
) {
1765
187
        NewOpc = ARM::tLEApcrel;
1766
187
        Bits = 8;
1767
187
        Scale = 4;
1768
187
      }
1769
187
      break;
1770
124
    case ARM::t2LDRpci:
1771
124
      if (
isARMLowRegister(U.MI->getOperand(0).getReg())124
) {
1772
115
        NewOpc = ARM::tLDRpci;
1773
115
        Bits = 8;
1774
115
        Scale = 4;
1775
115
      }
1776
124
      break;
1777
3.08k
    }
1778
3.08k
1779
3.08k
    
if (3.08k
!NewOpc3.08k
)
1780
2.78k
      continue;
1781
302
1782
302
    unsigned UserOffset = getUserOffset(U);
1783
302
    unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
1784
302
1785
302
    // Be conservative with inline asm.
1786
302
    if (!U.KnownAlignment)
1787
302
      MaxOffs -= 2;
1788
302
1789
302
    // FIXME: Check if offset is multiple of scale if scale is not 4.
1790
302
    if (
isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)302
) {
1791
197
      DEBUG(dbgs() << "Shrink: " << *U.MI);
1792
197
      U.MI->setDesc(TII->get(NewOpc));
1793
197
      MachineBasicBlock *MBB = U.MI->getParent();
1794
197
      BBInfo[MBB->getNumber()].Size -= 2;
1795
197
      adjustBBOffsetsAfter(MBB);
1796
197
      ++NumT2CPShrunk;
1797
197
      MadeChange = true;
1798
197
    }
1799
3.08k
  }
1800
8.63k
1801
8.63k
  return MadeChange;
1802
8.63k
}
1803
1804
8.72k
bool ARMConstantIslands::optimizeThumb2Branches() {
1805
8.72k
  bool MadeChange = false;
1806
8.72k
1807
8.72k
  // The order in which branches appear in ImmBranches is approximately their
1808
8.72k
  // order within the function body. By visiting later branches first, we reduce
1809
8.72k
  // the distance between earlier forward branches and their targets, making it
1810
8.72k
  // more likely that the cbn?z optimization, which can only apply to forward
1811
8.72k
  // branches, will succeed.
1812
32.9k
  for (unsigned i = ImmBranches.size(); 
i != 032.9k
;
--i24.2k
) {
1813
24.2k
    ImmBranch &Br = ImmBranches[i-1];
1814
24.2k
    unsigned Opcode = Br.MI->getOpcode();
1815
24.2k
    unsigned NewOpc = 0;
1816
24.2k
    unsigned Scale = 1;
1817
24.2k
    unsigned Bits = 0;
1818
24.2k
    switch (Opcode) {
1819
53
    default: break;
1820
4.97k
    case ARM::t2B:
1821
4.97k
      NewOpc = ARM::tB;
1822
4.97k
      Bits = 11;
1823
4.97k
      Scale = 2;
1824
4.97k
      break;
1825
19.1k
    case ARM::t2Bcc:
1826
19.1k
      NewOpc = ARM::tBcc;
1827
19.1k
      Bits = 8;
1828
19.1k
      Scale = 2;
1829
19.1k
      break;
1830
24.2k
    }
1831
24.2k
    
if (24.2k
NewOpc24.2k
) {
1832
24.1k
      unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
1833
24.1k
      MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
1834
24.1k
      if (
isBBInRange(Br.MI, DestBB, MaxOffs)24.1k
) {
1835
21.9k
        DEBUG(dbgs() << "Shrink branch: " << *Br.MI);
1836
21.9k
        Br.MI->setDesc(TII->get(NewOpc));
1837
21.9k
        MachineBasicBlock *MBB = Br.MI->getParent();
1838
21.9k
        BBInfo[MBB->getNumber()].Size -= 2;
1839
21.9k
        adjustBBOffsetsAfter(MBB);
1840
21.9k
        ++NumT2BrShrunk;
1841
21.9k
        MadeChange = true;
1842
21.9k
      }
1843
24.1k
    }
1844
24.2k
1845
24.2k
    Opcode = Br.MI->getOpcode();
1846
24.2k
    if (Opcode != ARM::tBcc)
1847
7.11k
      continue;
1848
17.0k
1849
17.0k
    // If the conditional branch doesn't kill CPSR, then CPSR can be liveout
1850
17.0k
    // so this transformation is not safe.
1851
17.0k
    
if (17.0k
!Br.MI->killsRegister(ARM::CPSR)17.0k
)
1852
538
      continue;
1853
16.5k
1854
16.5k
    NewOpc = 0;
1855
16.5k
    unsigned PredReg = 0;
1856
16.5k
    ARMCC::CondCodes Pred = getInstrPredicate(*Br.MI, PredReg);
1857
16.5k
    if (Pred == ARMCC::EQ)
1858
5.78k
      NewOpc = ARM::tCBZ;
1859
10.7k
    else 
if (10.7k
Pred == ARMCC::NE10.7k
)
1860
5.88k
      NewOpc = ARM::tCBNZ;
1861
16.5k
    if (!NewOpc)
1862
4.88k
      continue;
1863
11.6k
    MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
1864
11.6k
    // Check if the distance is within 126. Subtract starting offset by 2
1865
11.6k
    // because the cmp will be eliminated.
1866
11.6k
    unsigned BrOffset = getOffsetOf(Br.MI) + 4 - 2;
1867
11.6k
    unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
1868
11.6k
    if (
BrOffset < DestOffset && 11.6k
(DestOffset - BrOffset) <= 1268.04k
) {
1869
6.65k
      MachineBasicBlock::iterator CmpMI = Br.MI;
1870
6.65k
      if (
CmpMI != Br.MI->getParent()->begin()6.65k
) {
1871
6.63k
        --CmpMI;
1872
6.63k
        if (
CmpMI->getOpcode() == ARM::tCMPi86.63k
) {
1873
3.97k
          unsigned Reg = CmpMI->getOperand(0).getReg();
1874
3.97k
          Pred = getInstrPredicate(*CmpMI, PredReg);
1875
3.97k
          if (Pred == ARMCC::AL &&
1876
3.81k
              CmpMI->getOperand(1).getImm() == 0 &&
1877
3.97k
              
isARMLowRegister(Reg)3.22k
) {
1878
3.22k
            MachineBasicBlock *MBB = Br.MI->getParent();
1879
3.22k
            DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
1880
3.22k
            MachineInstr *NewBR =
1881
3.22k
              BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
1882
3.22k
              .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
1883
3.22k
            CmpMI->eraseFromParent();
1884
3.22k
            Br.MI->eraseFromParent();
1885
3.22k
            Br.MI = NewBR;
1886
3.22k
            BBInfo[MBB->getNumber()].Size -= 2;
1887
3.22k
            adjustBBOffsetsAfter(MBB);
1888
3.22k
            ++NumCBZ;
1889
3.22k
            MadeChange = true;
1890
3.22k
          }
1891
3.97k
        }
1892
6.63k
      }
1893
6.65k
    }
1894
24.2k
  }
1895
8.72k
1896
8.72k
  return MadeChange;
1897
8.72k
}
1898
1899
static bool isSimpleIndexCalc(MachineInstr &I, unsigned EntryReg,
1900
370
                              unsigned BaseReg) {
1901
370
  if (I.getOpcode() != ARM::t2ADDrs)
1902
32
    return false;
1903
338
1904
338
  
if (338
I.getOperand(0).getReg() != EntryReg338
)
1905
0
    return false;
1906
338
1907
338
  
if (338
I.getOperand(1).getReg() != BaseReg338
)
1908
0
    return false;
1909
338
1910
338
  // FIXME: what about CC and IdxReg?
1911
338
  return true;
1912
338
}
1913
1914
/// \brief While trying to form a TBB/TBH instruction, we may (if the table
1915
/// doesn't immediately follow the BR_JT) need access to the start of the
1916
/// jump-table. We know one instruction that produces such a register; this
1917
/// function works out whether that definition can be preserved to the BR_JT,
1918
/// possibly by removing an intervening addition (which is usually needed to
1919
/// calculate the actual entry to jump to).
1920
bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
1921
                                              MachineInstr *LEAMI,
1922
                                              unsigned &DeadSize,
1923
                                              bool &CanDeleteLEA,
1924
338
                                              bool &BaseRegKill) {
1925
338
  if (JumpMI->getParent() != LEAMI->getParent())
1926
0
    return false;
1927
338
1928
338
  // Now we hope that we have at least these instructions in the basic block:
1929
338
  //     BaseReg = t2LEA ...
1930
338
  //     [...]
1931
338
  //     EntryReg = t2ADDrs BaseReg, ...
1932
338
  //     [...]
1933
338
  //     t2BR_JT EntryReg
1934
338
  //
1935
338
  // We have to be very conservative about what we recognise here though. The
1936
338
  // main perturbing factors to watch out for are:
1937
338
  //    + Spills at any point in the chain: not direct problems but we would
1938
338
  //      expect a blocking Def of the spilled register so in practice what we
1939
338
  //      can do is limited.
1940
338
  //    + EntryReg == BaseReg: this is the one situation we should allow a Def
1941
338
  //      of BaseReg, but only if the t2ADDrs can be removed.
1942
338
  //    + Some instruction other than t2ADDrs computing the entry. Not seen in
1943
338
  //      the wild, but we should be careful.
1944
338
  unsigned EntryReg = JumpMI->getOperand(0).getReg();
1945
338
  unsigned BaseReg = LEAMI->getOperand(0).getReg();
1946
338
1947
338
  CanDeleteLEA = true;
1948
338
  BaseRegKill = false;
1949
338
  MachineInstr *RemovableAdd = nullptr;
1950
338
  MachineBasicBlock::iterator I(LEAMI);
1951
370
  for (++I; 
&*I != JumpMI370
;
++I32
) {
1952
370
    if (
isSimpleIndexCalc(*I, EntryReg, BaseReg)370
) {
1953
338
      RemovableAdd = &*I;
1954
338
      break;
1955
338
    }
1956
32
1957
176
    
for (unsigned K = 0, E = I->getNumOperands(); 32
K != E176
;
++K144
) {
1958
144
      const MachineOperand &MO = I->getOperand(K);
1959
144
      if (
!MO.isReg() || 144
!MO.getReg()80
)
1960
89
        continue;
1961
55
      
if (55
MO.isDef() && 55
MO.getReg() == BaseReg37
)
1962
0
        return false;
1963
55
      
if (55
MO.isUse() && 55
MO.getReg() == BaseReg18
) {
1964
0
        BaseRegKill = BaseRegKill || MO.isKill();
1965
0
        CanDeleteLEA = false;
1966
0
      }
1967
144
    }
1968
370
  }
1969
338
1970
338
  
if (338
!RemovableAdd338
)
1971
0
    return true;
1972
338
1973
338
  // Check the add really is removable, and that nothing else in the block
1974
338
  // clobbers BaseReg.
1975
372
  
for (++I; 338
&*I != JumpMI372
;
++I34
) {
1976
149
    for (unsigned K = 0, E = I->getNumOperands(); 
K != E149
;
++K114
) {
1977
115
      const MachineOperand &MO = I->getOperand(K);
1978
115
      if (
!MO.isReg() || 115
!MO.getReg()65
)
1979
50
        continue;
1980
65
      
if (65
MO.isDef() && 65
MO.getReg() == BaseReg35
)
1981
1
        return false;
1982
64
      
if (64
MO.isUse() && 64
MO.getReg() == EntryReg30
)
1983
0
        RemovableAdd = nullptr;
1984
115
    }
1985
35
  }
1986
338
1987
337
  
if (337
RemovableAdd337
) {
1988
337
    RemovableAdd->eraseFromParent();
1989
337
    DeadSize += isThumb2 ? 
4337
:
20
;
1990
0
  } else 
if (0
BaseReg == EntryReg0
) {
1991
0
    // The add wasn't removable, but clobbered the base for the TBB. So we can't
1992
0
    // preserve it.
1993
0
    return false;
1994
0
  }
1995
337
1996
337
  // We reached the end of the block without seeing another definition of
1997
337
  // BaseReg (except, possibly the t2ADDrs, which was removed). BaseReg can be
1998
337
  // used in the TBB/TBH if necessary.
1999
337
  return true;
2000
337
}
2001
2002
/// \brief Returns whether CPEMI is the first instruction in the block
2003
/// immediately following JTMI (assumed to be a TBB or TBH terminator). If so,
2004
/// we can switch the first register to PC and usually remove the address
2005
/// calculation that preceded it.
2006
695
static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
2007
695
  MachineFunction::iterator MBB = JTMI->getParent()->getIterator();
2008
695
  MachineFunction *MF = MBB->getParent();
2009
695
  ++MBB;
2010
695
2011
695
  return MBB != MF->end() && MBB->begin() != MBB->end() &&
2012
695
         &*MBB->begin() == CPEMI;
2013
695
}
2014
2015
static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI,
2016
                                         MachineInstr *JumpMI,
2017
337
                                         unsigned &DeadSize) {
2018
337
  // Remove a dead add between the LEA and JT, which used to compute EntryReg,
2019
337
  // but the JT now uses PC. Finds the last ADD (if any) that def's EntryReg
2020
337
  // and is not clobbered / used.
2021
337
  MachineInstr *RemovableAdd = nullptr;
2022
337
  unsigned EntryReg = JumpMI->getOperand(0).getReg();
2023
337
2024
337
  // Find the last ADD to set EntryReg
2025
337
  MachineBasicBlock::iterator I(LEAMI);
2026
742
  for (++I; 
&*I != JumpMI742
;
++I405
) {
2027
405
    if (
I->getOpcode() == ARM::t2ADDrs && 405
I->getOperand(0).getReg() == EntryReg1
)
2028
1
      RemovableAdd = &*I;
2029
405
  }
2030
337
2031
337
  if (!RemovableAdd)
2032
336
    return;
2033
1
2034
1
  // Ensure EntryReg is not clobbered or used.
2035
1
  MachineBasicBlock::iterator J(RemovableAdd);
2036
3
  for (++J; 
&*J != JumpMI3
;
++J2
) {
2037
11
    for (unsigned K = 0, E = J->getNumOperands(); 
K != E11
;
++K9
) {
2038
9
      const MachineOperand &MO = J->getOperand(K);
2039
9
      if (
!MO.isReg() || 9
!MO.getReg()5
)
2040
5
        continue;
2041
4
      
if (4
MO.isDef() && 4
MO.getReg() == EntryReg2
)
2042
0
        return;
2043
4
      
if (4
MO.isUse() && 4
MO.getReg() == EntryReg2
)
2044
0
        return;
2045
9
    }
2046
2
  }
2047
1
2048
1
  
DEBUG1
(dbgs() << "Removing Dead Add: " << *RemovableAdd);
2049
1
  RemovableAdd->eraseFromParent();
2050
1
  DeadSize += 4;
2051
1
}
2052
2053
static bool registerDefinedBetween(unsigned Reg,
2054
                                   MachineBasicBlock::iterator From,
2055
                                   MachineBasicBlock::iterator To,
2056
34
                                   const TargetRegisterInfo *TRI) {
2057
59
  for (auto I = From; 
I != To59
;
++I25
)
2058
26
    
if (26
I->modifiesRegister(Reg, TRI)26
)
2059
1
      return true;
2060
33
  return false;
2061
34
}
2062
2063
/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
2064
/// jumptables when it's possible.
2065
9.65k
bool ARMConstantIslands::optimizeThumb2JumpTables() {
2066
9.65k
  bool MadeChange = false;
2067
9.65k
2068
9.65k
  // FIXME: After the tables are shrunk, can we get rid some of the
2069
9.65k
  // constantpool tables?
2070
9.65k
  MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
2071
9.65k
  if (
!MJTI9.65k
)
return false9.33k
;
2072
323
2073
323
  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
2074
688
  for (unsigned i = 0, e = T2JumpTables.size(); 
i != e688
;
++i365
) {
2075
365
    MachineInstr *MI = T2JumpTables[i];
2076
365
    const MCInstrDesc &MCID = MI->getDesc();
2077
365
    unsigned NumOps = MCID.getNumOperands();
2078
365
    unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 
20
:
1365
);
2079
365
    MachineOperand JTOP = MI->getOperand(JTOpIdx);
2080
365
    unsigned JTI = JTOP.getIndex();
2081
365
    assert(JTI < JT.size());
2082
365
2083
365
    bool ByteOk = true;
2084
365
    bool HalfWordOk = true;
2085
365
    unsigned JTOffset = getOffsetOf(MI) + 4;
2086
365
    const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
2087
4.39k
    for (unsigned j = 0, ee = JTBBs.size(); 
j != ee4.39k
;
++j4.02k
) {
2088
4.02k
      MachineBasicBlock *MBB = JTBBs[j];
2089
4.02k
      unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
2090
4.02k
      // Negative offset is not ok. FIXME: We should change BB layout to make
2091
4.02k
      // sure all the branches are forward.
2092
4.02k
      if (
ByteOk && 4.02k
(DstOffset - JTOffset) > ((1<<8)-1)*22.92k
)
2093
44
        ByteOk = false;
2094
4.02k
      unsigned TBHLimit = ((1<<16)-1)*2;
2095
4.02k
      if (
HalfWordOk && 4.02k
(DstOffset - JTOffset) > TBHLimit4.02k
)
2096
2
        HalfWordOk = false;
2097
4.02k
      if (
!ByteOk && 4.02k
!HalfWordOk1.15k
)
2098
2
        break;
2099
4.02k
    }
2100
365
2101
365
    if (
!ByteOk && 365
!HalfWordOk44
)
2102
2
      continue;
2103
363
2104
363
    CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
2105
363
    MachineBasicBlock *MBB = MI->getParent();
2106
363
    if (!MI->getOperand(0).isKill()) // FIXME: needed now?
2107
0
      continue;
2108
363
2109
363
    unsigned DeadSize = 0;
2110
363
    bool CanDeleteLEA = false;
2111
363
    bool BaseRegKill = false;
2112
363
    
2113
363
    unsigned IdxReg = ~0U;
2114
363
    bool IdxRegKill = true;
2115
363
    if (
isThumb2363
) {
2116
338
      IdxReg = MI->getOperand(1).getReg();
2117
338
      IdxRegKill = MI->getOperand(1).isKill();
2118
338
2119
338
      bool PreservedBaseReg =
2120
338
        preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill);
2121
338
      if (
!jumpTableFollowsTB(MI, User.CPEMI) && 338
!PreservedBaseReg1
)
2122
0
        continue;
2123
25
    } else {
2124
25
      // We're in thumb-1 mode, so we must have something like:
2125
25
      //   %idx = tLSLri %idx, 2
2126
25
      //   %base = tLEApcrelJT
2127
25
      //   %t = tLDRr %idx, %base
2128
25
      unsigned BaseReg = User.MI->getOperand(0).getReg();
2129
25
2130
25
      if (User.MI->getIterator() == User.MI->getParent()->begin())
2131
0
        continue;
2132
25
      MachineInstr *Shift = User.MI->getPrevNode();
2133
25
      if (Shift->getOpcode() != ARM::tLSLri ||
2134
25
          Shift->getOperand(3).getImm() != 2 ||
2135
25
          !Shift->getOperand(2).isKill())
2136
3
        continue;
2137
22
      IdxReg = Shift->getOperand(2).getReg();
2138
22
      unsigned ShiftedIdxReg = Shift->getOperand(0).getReg();
2139
22
2140
22
      // It's important that IdxReg is live until the actual TBB/TBH. Most of
2141
22
      // the range is checked later, but the LEA might still clobber it and not
2142
22
      // actually get removed.
2143
22
      if (
BaseReg == IdxReg && 22
!jumpTableFollowsTB(MI, User.CPEMI)3
)
2144
1
        continue;
2145
21
2146
21
      MachineInstr *Load = User.MI->getNextNode();
2147
21
      if (Load->getOpcode() != ARM::tLDRr)
2148
4
        continue;
2149
17
      
if (17
Load->getOperand(1).getReg() != ShiftedIdxReg ||
2150
17
          Load->getOperand(2).getReg() != BaseReg ||
2151
17
          !Load->getOperand(1).isKill())
2152
0
        continue;
2153
17
2154
17
      // If we're in PIC mode, there should be another ADD following.
2155
17
      auto *TRI = STI->getRegisterInfo();
2156
17
2157
17
      // %base cannot be redefined after the load as it will appear before
2158
17
      // TBB/TBH like:
2159
17
      //      %base =
2160
17
      //      %base =
2161
17
      //      tBB %base, %idx
2162
17
      if (registerDefinedBetween(BaseReg, Load->getNextNode(), MBB->end(), TRI))
2163
0
        continue;
2164
17
2165
17
      
if (17
isPositionIndependentOrROPI17
) {
2166
7
        MachineInstr *Add = Load->getNextNode();
2167
7
        if (Add->getOpcode() != ARM::tADDrr ||
2168
7
            Add->getOperand(2).getReg() != Load->getOperand(0).getReg() ||
2169
7
            Add->getOperand(3).getReg() != BaseReg ||
2170
7
            !Add->getOperand(2).isKill())
2171
0
          continue;
2172
7
        
if (7
Add->getOperand(0).getReg() != MI->getOperand(0).getReg()7
)
2173
0
          continue;
2174
7
        
if (7
registerDefinedBetween(IdxReg, Add->getNextNode(), MI, TRI)7
)
2175
7
          // IdxReg gets redefined in the middle of the sequence.
2176
0
          continue;
2177
7
        Add->eraseFromParent();
2178
7
        DeadSize += 2;
2179
17
      } else {
2180
10
        if (Load->getOperand(0).getReg() != MI->getOperand(0).getReg())
2181
0
          continue;
2182
10
        
if (10
registerDefinedBetween(IdxReg, Load->getNextNode(), MI, TRI)10
)
2183
10
          // IdxReg gets redefined in the middle of the sequence.
2184
1
          continue;
2185
16
      }
2186
16
2187
16
      // Now safe to delete the load and lsl. The LEA will be removed later.
2188
16
      CanDeleteLEA = true;
2189
16
      Shift->eraseFromParent();
2190
16
      Load->eraseFromParent();
2191
16
      DeadSize += 4;
2192
16
    }
2193
363
2194
354
    
DEBUG354
(dbgs() << "Shrink JT: " << *MI);
2195
354
    MachineInstr *CPEMI = User.CPEMI;
2196
354
    unsigned Opc = ByteOk ? 
ARM::t2TBB_JT312
:
ARM::t2TBH_JT42
;
2197
354
    if (!isThumb2)
2198
16
      
Opc = ByteOk ? 16
ARM::tTBB_JT15
:
ARM::tTBH_JT1
;
2199
354
2200
354
    MachineBasicBlock::iterator MI_JT = MI;
2201
354
    MachineInstr *NewJTMI =
2202
354
        BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
2203
354
            .addReg(User.MI->getOperand(0).getReg(),
2204
354
                    getKillRegState(BaseRegKill))
2205
354
            .addReg(IdxReg, getKillRegState(IdxRegKill))
2206
354
            .addJumpTableIndex(JTI, JTOP.getTargetFlags())
2207
354
            .addImm(CPEMI->getOperand(0).getImm());
2208
354
    DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
2209
354
2210
354
    unsigned JTOpc = ByteOk ? 
ARM::JUMPTABLE_TBB312
:
ARM::JUMPTABLE_TBH42
;
2211
354
    CPEMI->setDesc(TII->get(JTOpc));
2212
354
2213
354
    if (
jumpTableFollowsTB(MI, User.CPEMI)354
) {
2214
353
      NewJTMI->getOperand(0).setReg(ARM::PC);
2215
353
      NewJTMI->getOperand(0).setIsKill(false);
2216
353
2217
353
      if (
CanDeleteLEA353
) {
2218
353
        if (isThumb2)
2219
337
          RemoveDeadAddBetweenLEAAndJT(User.MI, MI, DeadSize);
2220
353
2221
353
        User.MI->eraseFromParent();
2222
353
        DeadSize += isThumb2 ? 
4337
:
216
;
2223
353
2224
353
        // The LEA was eliminated, the TBB instruction becomes the only new user
2225
353
        // of the jump table.
2226
353
        User.MI = NewJTMI;
2227
353
        User.MaxDisp = 4;
2228
353
        User.NegOk = false;
2229
353
        User.IsSoImm = false;
2230
353
        User.KnownAlignment = false;
2231
0
      } else {
2232
0
        // The LEA couldn't be eliminated, so we must add another CPUser to
2233
0
        // record the TBB or TBH use.
2234
0
        int CPEntryIdx = JumpTableEntryIndices[JTI];
2235
0
        auto &CPEs = CPEntries[CPEntryIdx];
2236
0
        auto Entry =
2237
0
            find_if(CPEs, [&](CPEntry &E) { return E.CPEMI == User.CPEMI; });
2238
0
        ++Entry->RefCount;
2239
0
        CPUsers.emplace_back(CPUser(NewJTMI, User.CPEMI, 4, false, false));
2240
0
      }
2241
353
    }
2242
365
2243
365
    unsigned NewSize = TII->getInstSizeInBytes(*NewJTMI);
2244
365
    unsigned OrigSize = TII->getInstSizeInBytes(*MI);
2245
365
    MI->eraseFromParent();
2246
365
2247
365
    int Delta = OrigSize - NewSize + DeadSize;
2248
365
    BBInfo[MBB->getNumber()].Size -= Delta;
2249
365
    adjustBBOffsetsAfter(MBB);
2250
365
2251
365
    ++NumTBs;
2252
365
    MadeChange = true;
2253
365
  }
2254
9.65k
2255
9.65k
  return MadeChange;
2256
9.65k
}
2257
2258
/// reorderThumb2JumpTables - Adjust the function's block layout to ensure that
2259
/// jump tables always branch forwards, since that's what tbb and tbh need.
2260
9.73k
bool ARMConstantIslands::reorderThumb2JumpTables() {
2261
9.73k
  bool MadeChange = false;
2262
9.73k
2263
9.73k
  MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
2264
9.73k
  if (
!MJTI9.73k
)
return false9.41k
;
2265
324
2266
324
  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
2267
690
  for (unsigned i = 0, e = T2JumpTables.size(); 
i != e690
;
++i366
) {
2268
366
    MachineInstr *MI = T2JumpTables[i];
2269
366
    const MCInstrDesc &MCID = MI->getDesc();
2270
366
    unsigned NumOps = MCID.getNumOperands();
2271
366
    unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 
20
:
1366
);
2272
366
    MachineOperand JTOP = MI->getOperand(JTOpIdx);
2273
366
    unsigned JTI = JTOP.getIndex();
2274
366
    assert(JTI < JT.size());
2275
366
2276
366
    // We prefer if target blocks for the jump table come after the jump
2277
366
    // instruction so we can use TB[BH]. Loop through the target blocks
2278
366
    // and try to adjust them such that that's true.
2279
366
    int JTNumber = MI->getParent()->getNumber();
2280
366
    const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
2281
4.40k
    for (unsigned j = 0, ee = JTBBs.size(); 
j != ee4.40k
;
++j4.03k
) {
2282
4.03k
      MachineBasicBlock *MBB = JTBBs[j];
2283
4.03k
      int DTNumber = MBB->getNumber();
2284
4.03k
2285
4.03k
      if (
DTNumber < JTNumber4.03k
) {
2286
56
        // The destination precedes the switch. Try to move the block forward
2287
56
        // so we have a positive offset.
2288
56
        MachineBasicBlock *NewBB =
2289
56
          adjustJTTargetBlockForward(MBB, MI->getParent());
2290
56
        if (NewBB)
2291
28
          MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
2292
56
        MadeChange = true;
2293
56
      }
2294
4.03k
    }
2295
366
  }
2296
9.73k
2297
9.73k
  return MadeChange;
2298
9.73k
}
2299
2300
MachineBasicBlock *ARMConstantIslands::
2301
56
adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
2302
56
  // If the destination block is terminated by an unconditional branch,
2303
56
  // try to move it; otherwise, create a new block following the jump
2304
56
  // table that branches back to the actual target. This is a very simple
2305
56
  // heuristic. FIXME: We can definitely improve it.
2306
56
  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2307
56
  SmallVector<MachineOperand, 4> Cond;
2308
56
  SmallVector<MachineOperand, 4> CondPrior;
2309
56
  MachineFunction::iterator BBi = BB->getIterator();
2310
56
  MachineFunction::iterator OldPrior = std::prev(BBi);
2311
56
2312
56
  // If the block terminator isn't analyzable, don't try to move the block
2313
56
  bool B = TII->analyzeBranch(*BB, TBB, FBB, Cond);
2314
56
2315
56
  // If the block ends in an unconditional branch, move it. The prior block
2316
56
  // has to have an analyzable terminator for us to move this one. Be paranoid
2317
56
  // and make sure we're not trying to move the entry block of the function.
2318
56
  if (
!B && 56
Cond.empty()56
&&
BB != &MF->front()39
&&
2319
56
      
!TII->analyzeBranch(*OldPrior, TBB, FBB, CondPrior)39
) {
2320
28
    BB->moveAfter(JTBB);
2321
28
    OldPrior->updateTerminator();
2322
28
    BB->updateTerminator();
2323
28
    // Update numbering to account for the block being moved.
2324
28
    MF->RenumberBlocks();
2325
28
    ++NumJTMoved;
2326
28
    return nullptr;
2327
28
  }
2328
28
2329
28
  // Create a new MBB for the code after the jump BB.
2330
28
  MachineBasicBlock *NewBB =
2331
28
    MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
2332
28
  MachineFunction::iterator MBBI = ++JTBB->getIterator();
2333
28
  MF->insert(MBBI, NewBB);
2334
28
2335
28
  // Add an unconditional branch from NewBB to BB.
2336
28
  // There doesn't seem to be meaningful DebugInfo available; this doesn't
2337
28
  // correspond directly to anything in the source.
2338
28
  if (isThumb2)
2339
24
    BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B))
2340
24
        .addMBB(BB)
2341
24
        .add(predOps(ARMCC::AL));
2342
28
  else
2343
4
    BuildMI(NewBB, DebugLoc(), TII->get(ARM::tB))
2344
4
        .addMBB(BB)
2345
4
        .add(predOps(ARMCC::AL));
2346
56
2347
56
  // Update internal data structures to account for the newly inserted MBB.
2348
56
  MF->RenumberBlocks(NewBB);
2349
56
2350
56
  // Update the CFG.
2351
56
  NewBB->addSuccessor(BB);
2352
56
  JTBB->replaceSuccessor(BB, NewBB);
2353
56
2354
56
  ++NumJTInserted;
2355
56
  return NewBB;
2356
56
}
2357
2358
/// createARMConstantIslandPass - returns an instance of the constpool
2359
/// island pass.
2360
4.38k
FunctionPass *llvm::createARMConstantIslandPass() {
2361
4.38k
  return new ARMConstantIslands();
2362
4.38k
}
2363
2364
INITIALIZE_PASS(ARMConstantIslands, "arm-cp-islands", ARM_CP_ISLANDS_OPT_NAME,
2365
                false, false)