Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- AArch64CondBrTuning.cpp --- Conditional branch tuning for AArch64 -===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
/// \file
9
/// This file contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions
10
/// into a conditional branch (B.cond), when the NZCV flags can be set for
11
/// "free".  This is preferred on targets that have more flexibility when
12
/// scheduling B.cond instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming
13
/// all other variables are equal).  This can also reduce register pressure.
14
///
15
/// A few examples:
16
///
17
/// 1) add w8, w0, w1  -> cmn w0, w1             ; CMN is an alias of ADDS.
18
///    cbz w8, .LBB_2  -> b.eq .LBB0_2
19
///
20
/// 2) add w8, w0, w1  -> adds w8, w0, w1        ; w8 has multiple uses.
21
///    cbz w8, .LBB1_2 -> b.eq .LBB1_2
22
///
23
/// 3) sub w8, w0, w1       -> subs w8, w0, w1   ; w8 has multiple uses.
24
///    tbz w8, #31, .LBB6_2 -> b.pl .LBB6_2
25
///
26
//===----------------------------------------------------------------------===//
27
28
#include "AArch64.h"
29
#include "AArch64Subtarget.h"
30
#include "llvm/CodeGen/MachineFunction.h"
31
#include "llvm/CodeGen/MachineFunctionPass.h"
32
#include "llvm/CodeGen/MachineInstrBuilder.h"
33
#include "llvm/CodeGen/MachineRegisterInfo.h"
34
#include "llvm/CodeGen/Passes.h"
35
#include "llvm/CodeGen/TargetInstrInfo.h"
36
#include "llvm/CodeGen/TargetRegisterInfo.h"
37
#include "llvm/CodeGen/TargetSubtargetInfo.h"
38
#include "llvm/Support/Debug.h"
39
#include "llvm/Support/raw_ostream.h"
40
41
using namespace llvm;
42
43
#define DEBUG_TYPE "aarch64-cond-br-tuning"
44
265k
#define AARCH64_CONDBR_TUNING_NAME "AArch64 Conditional Branch Tuning"
45
46
namespace {
47
class AArch64CondBrTuning : public MachineFunctionPass {
48
  const AArch64InstrInfo *TII;
49
  const TargetRegisterInfo *TRI;
50
51
  MachineRegisterInfo *MRI;
52
53
public:
54
  static char ID;
55
8.61k
  AArch64CondBrTuning() : MachineFunctionPass(ID) {
56
8.61k
    initializeAArch64CondBrTuningPass(*PassRegistry::getPassRegistry());
57
8.61k
  }
58
  void getAnalysisUsage(AnalysisUsage &AU) const override;
59
  bool runOnMachineFunction(MachineFunction &MF) override;
60
265k
  StringRef getPassName() const override { return AARCH64_CONDBR_TUNING_NAME; }
61
62
private:
63
  MachineInstr *getOperandDef(const MachineOperand &MO);
64
  MachineInstr *convertToFlagSetting(MachineInstr &MI, bool IsFlagSetting);
65
  MachineInstr *convertToCondBr(MachineInstr &MI);
66
  bool tryToTuneBranch(MachineInstr &MI, MachineInstr &DefMI);
67
};
68
} // end anonymous namespace
69
70
char AArch64CondBrTuning::ID = 0;
71
72
INITIALIZE_PASS(AArch64CondBrTuning, "aarch64-cond-br-tuning",
73
                AARCH64_CONDBR_TUNING_NAME, false, false)
74
75
8.57k
void AArch64CondBrTuning::getAnalysisUsage(AnalysisUsage &AU) const {
76
8.57k
  AU.setPreservesCFG();
77
8.57k
  MachineFunctionPass::getAnalysisUsage(AU);
78
8.57k
}
79
80
644k
MachineInstr *AArch64CondBrTuning::getOperandDef(const MachineOperand &MO) {
81
644k
  if (!TargetRegisterInfo::isVirtualRegister(MO.getReg()))
82
0
    return nullptr;
83
644k
  return MRI->getUniqueVRegDef(MO.getReg());
84
644k
}
85
86
MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI,
87
58.9k
                                                        bool IsFlagSetting) {
88
58.9k
  // If this is already the flag setting version of the instruction (e.g., SUBS)
89
58.9k
  // just make sure the implicit-def of NZCV isn't marked dead.
90
58.9k
  if (IsFlagSetting) {
91
16.3k
    for (unsigned I = MI.getNumExplicitOperands(), E = MI.getNumOperands();
92
32.6k
         I != E; 
++I16.3k
) {
93
16.3k
      MachineOperand &MO = MI.getOperand(I);
94
16.3k
      if (MO.isReg() && MO.isDead() && 
MO.getReg() == AArch64::NZCV15.5k
)
95
15.5k
        MO.setIsDead(false);
96
16.3k
    }
97
16.3k
    return &MI;
98
16.3k
  }
99
42.5k
  bool Is64Bit;
100
42.5k
  unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode(), Is64Bit);
101
42.5k
  unsigned NewDestReg = MI.getOperand(0).getReg();
102
42.5k
  if (MRI->hasOneNonDBGUse(MI.getOperand(0).getReg()))
103
12.8k
    NewDestReg = Is64Bit ? 
AArch64::XZR5.04k
:
AArch64::WZR7.81k
;
104
42.5k
105
42.5k
  MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
106
42.5k
                                    TII->get(NewOpc), NewDestReg);
107
127k
  for (unsigned I = 1, E = MI.getNumOperands(); I != E; 
++I85.2k
)
108
85.2k
    MIB.add(MI.getOperand(I));
109
42.5k
110
42.5k
  return MIB;
111
42.5k
}
112
113
58.9k
MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) {
114
58.9k
  AArch64CC::CondCode CC;
115
58.9k
  MachineBasicBlock *TargetMBB = TII->getBranchDestBlock(MI);
116
58.9k
  switch (MI.getOpcode()) {
117
58.9k
  default:
118
0
    llvm_unreachable("Unexpected opcode!");
119
58.9k
120
58.9k
  case AArch64::CBZW:
121
33.5k
  case AArch64::CBZX:
122
33.5k
    CC = AArch64CC::EQ;
123
33.5k
    break;
124
33.5k
  case AArch64::CBNZW:
125
24.9k
  case AArch64::CBNZX:
126
24.9k
    CC = AArch64CC::NE;
127
24.9k
    break;
128
24.9k
  case AArch64::TBZW:
129
364
  case AArch64::TBZX:
130
364
    CC = AArch64CC::PL;
131
364
    break;
132
364
  case AArch64::TBNZW:
133
57
  case AArch64::TBNZX:
134
57
    CC = AArch64CC::MI;
135
57
    break;
136
58.9k
  }
137
58.9k
  return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc))
138
58.9k
      .addImm(CC)
139
58.9k
      .addMBB(TargetMBB);
140
58.9k
}
141
142
bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI,
143
644k
                                          MachineInstr &DefMI) {
144
644k
  // We don't want NZCV bits live across blocks.
145
644k
  if (MI.getParent() != DefMI.getParent())
146
124k
    return false;
147
520k
148
520k
  bool IsFlagSetting = true;
149
520k
  unsigned MIOpc = MI.getOpcode();
150
520k
  MachineInstr *NewCmp = nullptr, *NewBr = nullptr;
151
520k
  switch (DefMI.getOpcode()) {
152
520k
  default:
153
443k
    return false;
154
520k
  case AArch64::ADDWri:
155
33.4k
  case AArch64::ADDWrr:
156
33.4k
  case AArch64::ADDWrs:
157
33.4k
  case AArch64::ADDWrx:
158
33.4k
  case AArch64::ANDWri:
159
33.4k
  case AArch64::ANDWrr:
160
33.4k
  case AArch64::ANDWrs:
161
33.4k
  case AArch64::BICWrr:
162
33.4k
  case AArch64::BICWrs:
163
33.4k
  case AArch64::SUBWri:
164
33.4k
  case AArch64::SUBWrr:
165
33.4k
  case AArch64::SUBWrs:
166
33.4k
  case AArch64::SUBWrx:
167
33.4k
    IsFlagSetting = false;
168
33.4k
    LLVM_FALLTHROUGH;
169
34.4k
  case AArch64::ADDSWri:
170
34.4k
  case AArch64::ADDSWrr:
171
34.4k
  case AArch64::ADDSWrs:
172
34.4k
  case AArch64::ADDSWrx:
173
34.4k
  case AArch64::ANDSWri:
174
34.4k
  case AArch64::ANDSWrr:
175
34.4k
  case AArch64::ANDSWrs:
176
34.4k
  case AArch64::BICSWrr:
177
34.4k
  case AArch64::BICSWrs:
178
34.4k
  case AArch64::SUBSWri:
179
34.4k
  case AArch64::SUBSWrr:
180
34.4k
  case AArch64::SUBSWrs:
181
34.4k
  case AArch64::SUBSWrx:
182
34.4k
    switch (MIOpc) {
183
34.4k
    default:
184
0
      llvm_unreachable("Unexpected opcode!");
185
34.4k
186
34.4k
    case AArch64::CBZW:
187
34.4k
    case AArch64::CBNZW:
188
34.4k
    case AArch64::TBZW:
189
34.4k
    case AArch64::TBNZW:
190
34.4k
      // Check to see if the TBZ/TBNZ is checking the sign bit.
191
34.4k
      if ((MIOpc == AArch64::TBZW || 
MIOpc == AArch64::TBNZW34.3k
) &&
192
34.4k
          
MI.getOperand(1).getImm() != 3117.5k
)
193
17.4k
        return false;
194
16.9k
195
16.9k
      // There must not be any instruction between DefMI and MI that clobbers or
196
16.9k
      // reads NZCV.
197
16.9k
      MachineBasicBlock::iterator I(DefMI), E(MI);
198
28.4k
      for (I = std::next(I); I != E; 
++I11.4k
) {
199
11.6k
        if (I->modifiesRegister(AArch64::NZCV, TRI) ||
200
11.6k
            
I->readsRegister(AArch64::NZCV, TRI)11.4k
)
201
138
          return false;
202
11.6k
      }
203
16.9k
      
LLVM_DEBUG16.8k
(dbgs() << " Replacing instructions:\n ");
204
16.8k
      LLVM_DEBUG(DefMI.print(dbgs()));
205
16.8k
      LLVM_DEBUG(dbgs() << "    ");
206
16.8k
      LLVM_DEBUG(MI.print(dbgs()));
207
16.8k
208
16.8k
      NewCmp = convertToFlagSetting(DefMI, IsFlagSetting);
209
16.8k
      NewBr = convertToCondBr(MI);
210
16.8k
      break;
211
16.8k
    }
212
16.8k
    break;
213
16.8k
214
26.7k
  case AArch64::ADDXri:
215
26.7k
  case AArch64::ADDXrr:
216
26.7k
  case AArch64::ADDXrs:
217
26.7k
  case AArch64::ADDXrx:
218
26.7k
  case AArch64::ANDXri:
219
26.7k
  case AArch64::ANDXrr:
220
26.7k
  case AArch64::ANDXrs:
221
26.7k
  case AArch64::BICXrr:
222
26.7k
  case AArch64::BICXrs:
223
26.7k
  case AArch64::SUBXri:
224
26.7k
  case AArch64::SUBXrr:
225
26.7k
  case AArch64::SUBXrs:
226
26.7k
  case AArch64::SUBXrx:
227
26.7k
    IsFlagSetting = false;
228
26.7k
    LLVM_FALLTHROUGH;
229
42.1k
  case AArch64::ADDSXri:
230
42.1k
  case AArch64::ADDSXrr:
231
42.1k
  case AArch64::ADDSXrs:
232
42.1k
  case AArch64::ADDSXrx:
233
42.1k
  case AArch64::ANDSXri:
234
42.1k
  case AArch64::ANDSXrr:
235
42.1k
  case AArch64::ANDSXrs:
236
42.1k
  case AArch64::BICSXrr:
237
42.1k
  case AArch64::BICSXrs:
238
42.1k
  case AArch64::SUBSXri:
239
42.1k
  case AArch64::SUBSXrr:
240
42.1k
  case AArch64::SUBSXrs:
241
42.1k
  case AArch64::SUBSXrx:
242
42.1k
    switch (MIOpc) {
243
42.1k
    default:
244
0
      llvm_unreachable("Unexpected opcode!");
245
42.1k
246
42.1k
    case AArch64::CBZX:
247
42.1k
    case AArch64::CBNZX:
248
42.1k
    case AArch64::TBZX:
249
42.1k
    case AArch64::TBNZX: {
250
42.1k
      // Check to see if the TBZ/TBNZ is checking the sign bit.
251
42.1k
      if ((MIOpc == AArch64::TBZX || 
MIOpc == AArch64::TBNZX41.8k
) &&
252
42.1k
          
MI.getOperand(1).getImm() != 63336
)
253
8
        return false;
254
42.1k
      // There must not be any instruction between DefMI and MI that clobbers or
255
42.1k
      // reads NZCV.
256
42.1k
      MachineBasicBlock::iterator I(DefMI), E(MI);
257
92.4k
      for (I = std::next(I); I != E; 
++I50.2k
) {
258
50.3k
        if (I->modifiesRegister(AArch64::NZCV, TRI) ||
259
50.3k
            
I->readsRegister(AArch64::NZCV, TRI)50.2k
)
260
73
          return false;
261
50.3k
      }
262
42.1k
      
LLVM_DEBUG42.1k
(dbgs() << " Replacing instructions:\n ");
263
42.1k
      LLVM_DEBUG(DefMI.print(dbgs()));
264
42.1k
      LLVM_DEBUG(dbgs() << "    ");
265
42.1k
      LLVM_DEBUG(MI.print(dbgs()));
266
42.1k
267
42.1k
      NewCmp = convertToFlagSetting(DefMI, IsFlagSetting);
268
42.1k
      NewBr = convertToCondBr(MI);
269
42.1k
      break;
270
42.1k
    }
271
42.1k
    }
272
42.1k
    break;
273
58.9k
  }
274
58.9k
  (void)NewCmp; (void)NewBr;
275
58.9k
  assert(NewCmp && NewBr && "Expected new instructions.");
276
58.9k
277
58.9k
  LLVM_DEBUG(dbgs() << "  with instruction:\n    ");
278
58.9k
  LLVM_DEBUG(NewCmp->print(dbgs()));
279
58.9k
  LLVM_DEBUG(dbgs() << "    ");
280
58.9k
  LLVM_DEBUG(NewBr->print(dbgs()));
281
58.9k
282
58.9k
  // If this was a flag setting version of the instruction, we use the original
283
58.9k
  // instruction by just clearing the dead marked on the implicit-def of NCZV.
284
58.9k
  // Therefore, we should not erase this instruction.
285
58.9k
  if (!IsFlagSetting)
286
42.5k
    DefMI.eraseFromParent();
287
58.9k
  MI.eraseFromParent();
288
58.9k
  return true;
289
58.9k
}
290
291
257k
bool AArch64CondBrTuning::runOnMachineFunction(MachineFunction &MF) {
292
257k
  if (skipFunction(MF.getFunction()))
293
16
    return false;
294
257k
295
257k
  LLVM_DEBUG(
296
257k
      dbgs() << "********** AArch64 Conditional Branch Tuning  **********\n"
297
257k
             << "********** Function: " << MF.getName() << '\n');
298
257k
299
257k
  TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
300
257k
  TRI = MF.getSubtarget().getRegisterInfo();
301
257k
  MRI = &MF.getRegInfo();
302
257k
303
257k
  bool Changed = false;
304
2.00M
  for (MachineBasicBlock &MBB : MF) {
305
2.00M
    bool LocalChange = false;
306
2.00M
    for (MachineBasicBlock::iterator I = MBB.getFirstTerminator(),
307
2.00M
                                     E = MBB.end();
308
4.01M
         I != E; 
++I2.01M
) {
309
2.07M
      MachineInstr &MI = *I;
310
2.07M
      switch (MI.getOpcode()) {
311
2.07M
      default:
312
1.42M
        break;
313
2.07M
      case AArch64::CBZW:
314
644k
      case AArch64::CBZX:
315
644k
      case AArch64::CBNZW:
316
644k
      case AArch64::CBNZX:
317
644k
      case AArch64::TBZW:
318
644k
      case AArch64::TBZX:
319
644k
      case AArch64::TBNZW:
320
644k
      case AArch64::TBNZX:
321
644k
        MachineInstr *DefMI = getOperandDef(MI.getOperand(0));
322
644k
        LocalChange = (DefMI && tryToTuneBranch(MI, *DefMI));
323
644k
        break;
324
2.07M
      }
325
2.07M
      // If the optimization was successful, we can't optimize any other
326
2.07M
      // branches because doing so would clobber the NZCV flags.
327
2.07M
      if (LocalChange) {
328
58.9k
        Changed = true;
329
58.9k
        break;
330
58.9k
      }
331
2.07M
    }
332
2.00M
  }
333
257k
  return Changed;
334
257k
}
335
336
8.61k
FunctionPass *llvm::createAArch64CondBrTuning() {
337
8.61k
  return new AArch64CondBrTuning();
338
8.61k
}