Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
Line
Count
Source (jump to first uncovered line)
1
//=- AArch64ConditionOptimizer.cpp - Remove useless comparisons for AArch64 -=//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This pass tries to make consecutive compares of values use same operands to
11
// allow CSE pass to remove duplicated instructions.  For this it analyzes
12
// branches and adjusts comparisons with immediate values by converting:
13
//  * GE -> GT
14
//  * GT -> GE
15
//  * LT -> LE
16
//  * LE -> LT
17
// and adjusting immediate values appropriately.  It basically corrects two
18
// immediate values towards each other to make them equal.
19
//
20
// Consider the following example in C:
21
//
22
//   if ((a < 5 && ...) || (a > 5 && ...)) {
23
//        ~~~~~             ~~~~~
24
//          ^                 ^
25
//          x                 y
26
//
27
// Here both "x" and "y" expressions compare "a" with "5".  When "x" evaluates
28
// to "false", "y" can just check flags set by the first comparison.  As a
29
// result of the canonicalization employed by
30
// SelectionDAGBuilder::visitSwitchCase, DAGCombine, and other target-specific
31
// code, assembly ends up in the form that is not CSE friendly:
32
//
33
//     ...
34
//     cmp      w8, #4
35
//     b.gt     .LBB0_3
36
//     ...
37
//   .LBB0_3:
38
//     cmp      w8, #6
39
//     b.lt     .LBB0_6
40
//     ...
41
//
42
// Same assembly after the pass:
43
//
44
//     ...
45
//     cmp      w8, #5
46
//     b.ge     .LBB0_3
47
//     ...
48
//   .LBB0_3:
49
//     cmp      w8, #5     // <-- CSE pass removes this instruction
50
//     b.le     .LBB0_6
51
//     ...
52
//
53
// Currently only SUBS and ADDS followed by b.?? are supported.
54
//
55
// TODO: maybe handle TBNZ/TBZ the same way as CMP when used instead for "a < 0"
56
// TODO: handle other conditional instructions (e.g. CSET)
57
// TODO: allow second branching to be anything if it doesn't require adjusting
58
//
59
//===----------------------------------------------------------------------===//
60
61
#include "AArch64.h"
62
#include "MCTargetDesc/AArch64AddressingModes.h"
63
#include "Utils/AArch64BaseInfo.h"
64
#include "llvm/ADT/ArrayRef.h"
65
#include "llvm/ADT/DepthFirstIterator.h"
66
#include "llvm/ADT/SmallVector.h"
67
#include "llvm/ADT/Statistic.h"
68
#include "llvm/CodeGen/MachineBasicBlock.h"
69
#include "llvm/CodeGen/MachineDominators.h"
70
#include "llvm/CodeGen/MachineFunction.h"
71
#include "llvm/CodeGen/MachineFunctionPass.h"
72
#include "llvm/CodeGen/MachineInstr.h"
73
#include "llvm/CodeGen/MachineInstrBuilder.h"
74
#include "llvm/CodeGen/MachineOperand.h"
75
#include "llvm/CodeGen/MachineRegisterInfo.h"
76
#include "llvm/Pass.h"
77
#include "llvm/Support/Debug.h"
78
#include "llvm/Support/ErrorHandling.h"
79
#include "llvm/Support/raw_ostream.h"
80
#include "llvm/Target/TargetInstrInfo.h"
81
#include "llvm/Target/TargetSubtargetInfo.h"
82
#include <cassert>
83
#include <cstdlib>
84
#include <tuple>
85
86
using namespace llvm;
87
88
#define DEBUG_TYPE "aarch64-condopt"
89
90
STATISTIC(NumConditionsAdjusted, "Number of conditions adjusted");
91
92
namespace {
93
94
class AArch64ConditionOptimizer : public MachineFunctionPass {
95
  const TargetInstrInfo *TII;
96
  MachineDominatorTree *DomTree;
97
  const MachineRegisterInfo *MRI;
98
99
public:
100
  // Stores immediate, compare instruction opcode and branch condition (in this
101
  // order) of adjusted comparison.
102
  using CmpInfo = std::tuple<int, unsigned, AArch64CC::CondCode>;
103
104
  static char ID;
105
106
13.8k
  AArch64ConditionOptimizer() : MachineFunctionPass(ID) {
107
13.8k
    initializeAArch64ConditionOptimizerPass(*PassRegistry::getPassRegistry());
108
13.8k
  }
109
110
  void getAnalysisUsage(AnalysisUsage &AU) const override;
111
  MachineInstr *findSuitableCompare(MachineBasicBlock *MBB);
112
  CmpInfo adjustCmp(MachineInstr *CmpMI, AArch64CC::CondCode Cmp);
113
  void modifyCmp(MachineInstr *CmpMI, const CmpInfo &Info);
114
  bool adjustTo(MachineInstr *CmpMI, AArch64CC::CondCode Cmp, MachineInstr *To,
115
                int ToImm);
116
  bool runOnMachineFunction(MachineFunction &MF) override;
117
118
13.8k
  StringRef getPassName() const override {
119
13.8k
    return "AArch64 Condition Optimizer";
120
13.8k
  }
121
};
122
123
} // end anonymous namespace
124
125
char AArch64ConditionOptimizer::ID = 0;
126
127
90.0k
INITIALIZE_PASS_BEGIN90.0k
(AArch64ConditionOptimizer, "aarch64-condopt",
128
90.0k
                      "AArch64 CondOpt Pass", false, false)
129
90.0k
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
130
90.0k
INITIALIZE_PASS_END(AArch64ConditionOptimizer, "aarch64-condopt",
131
                    "AArch64 CondOpt Pass", false, false)
132
133
13.8k
FunctionPass *llvm::createAArch64ConditionOptimizerPass() {
134
13.8k
  return new AArch64ConditionOptimizer();
135
13.8k
}
136
137
13.7k
void AArch64ConditionOptimizer::getAnalysisUsage(AnalysisUsage &AU) const {
138
13.7k
  AU.addRequired<MachineDominatorTree>();
139
13.7k
  AU.addPreserved<MachineDominatorTree>();
140
13.7k
  MachineFunctionPass::getAnalysisUsage(AU);
141
13.7k
}
142
143
// Finds compare instruction that corresponds to supported types of branching.
144
// Returns the instruction or nullptr on failures or detecting unsupported
145
// instructions.
146
MachineInstr *AArch64ConditionOptimizer::findSuitableCompare(
147
1.85M
    MachineBasicBlock *MBB) {
148
1.85M
  MachineBasicBlock::iterator I = MBB->getFirstTerminator();
149
1.85M
  if (I == MBB->end())
150
56.8k
    return nullptr;
151
1.79M
152
1.79M
  
if (1.79M
I->getOpcode() != AArch64::Bcc1.79M
)
153
1.13M
    return nullptr;
154
666k
155
666k
  // Since we may modify cmp of this MBB, make sure NZCV does not live out.
156
666k
  for (auto SuccBB : MBB->successors())
157
1.33M
    
if (1.33M
SuccBB->isLiveIn(AArch64::NZCV)1.33M
)
158
3
      return nullptr;
159
666k
160
666k
  // Now find the instruction controlling the terminator.
161
717k
  
for (MachineBasicBlock::iterator B = MBB->begin(); 666k
I != B717k
;) {
162
710k
    --I;
163
710k
    assert(!I->isTerminator() && "Spurious terminator");
164
710k
    // Check if there is any use of NZCV between CMP and Bcc.
165
710k
    if (I->readsRegister(AArch64::NZCV))
166
598
      return nullptr;
167
709k
    switch (I->getOpcode()) {
168
709k
    // cmp is an alias for subs with a dead destination register.
169
330k
    case AArch64::SUBSWri:
170
330k
    case AArch64::SUBSXri:
171
330k
    // cmn is an alias for adds with a dead destination register.
172
330k
    case AArch64::ADDSWri:
173
330k
    case AArch64::ADDSXri: {
174
330k
      unsigned ShiftAmt = AArch64_AM::getShiftValue(I->getOperand(3).getImm());
175
330k
      if (
!I->getOperand(2).isImm()330k
) {
176
0
        DEBUG(dbgs() << "Immediate of cmp is symbolic, " << *I << '\n');
177
0
        return nullptr;
178
330k
      } else 
if (330k
I->getOperand(2).getImm() << ShiftAmt >= 0xfff330k
) {
179
4.47k
        DEBUG(dbgs() << "Immediate of cmp may be out of range, " << *I << '\n');
180
4.47k
        return nullptr;
181
325k
      } else 
if (325k
!MRI->use_empty(I->getOperand(0).getReg())325k
) {
182
2.01k
        DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n');
183
330k
        return nullptr;
184
330k
      }
185
323k
      return &*I;
186
323k
    }
187
323k
    // Prevent false positive case like:
188
323k
    // cmp      w19, #0
189
323k
    // cinc     w0, w19, gt
190
323k
    // ...
191
323k
    // fcmp     d8, #0.0
192
323k
    // b.gt     .LBB0_5
193
328k
    case AArch64::FCMPDri:
194
328k
    case AArch64::FCMPSri:
195
328k
    case AArch64::FCMPESri:
196
328k
    case AArch64::FCMPEDri:
197
328k
198
328k
    case AArch64::SUBSWrr:
199
328k
    case AArch64::SUBSXrr:
200
328k
    case AArch64::ADDSWrr:
201
328k
    case AArch64::ADDSXrr:
202
328k
    case AArch64::FCMPSrr:
203
328k
    case AArch64::FCMPDrr:
204
328k
    case AArch64::FCMPESrr:
205
328k
    case AArch64::FCMPEDrr:
206
328k
      // Skip comparison instructions without immediate operands.
207
328k
      return nullptr;
208
710k
    }
209
710k
  }
210
7.43k
  
DEBUG7.43k
(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n');
211
7.43k
  return nullptr;
212
1.85M
}
213
214
// Changes opcode adds <-> subs considering register operand width.
215
0
static int getComplementOpc(int Opc) {
216
0
  switch (Opc) {
217
0
  case AArch64::ADDSWri: return AArch64::SUBSWri;
218
0
  case AArch64::ADDSXri: return AArch64::SUBSXri;
219
0
  case AArch64::SUBSWri: return AArch64::ADDSWri;
220
0
  case AArch64::SUBSXri: return AArch64::ADDSXri;
221
0
  default:
222
0
    llvm_unreachable("Unexpected opcode");
223
0
  }
224
0
}
225
226
// Changes form of comparison inclusive <-> exclusive.
227
3.31k
static AArch64CC::CondCode getAdjustedCmp(AArch64CC::CondCode Cmp) {
228
3.31k
  switch (Cmp) {
229
118
  case AArch64CC::GT: return AArch64CC::GE;
230
0
  case AArch64CC::GE: return AArch64CC::GT;
231
3.20k
  case AArch64CC::LT: return AArch64CC::LE;
232
0
  case AArch64CC::LE: return AArch64CC::LT;
233
0
  default:
234
0
    llvm_unreachable("Unexpected condition code");
235
0
  }
236
0
}
237
238
// Transforms GT -> GE, GE -> GT, LT -> LE, LE -> LT by updating comparison
239
// operator and condition code.
240
AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp(
241
3.31k
    MachineInstr *CmpMI, AArch64CC::CondCode Cmp) {
242
3.31k
  unsigned Opc = CmpMI->getOpcode();
243
3.31k
244
3.31k
  // CMN (compare with negative immediate) is an alias to ADDS (as
245
3.31k
  // "operand - negative" == "operand + positive")
246
3.30k
  bool Negative = (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri);
247
3.31k
248
3.31k
  int Correction = (Cmp == AArch64CC::GT) ? 
1118
:
-13.20k
;
249
3.31k
  // Negate Correction value for comparison with negative immediate (CMN).
250
3.31k
  if (
Negative3.31k
) {
251
13
    Correction = -Correction;
252
13
  }
253
3.31k
254
3.31k
  const int OldImm = (int)CmpMI->getOperand(2).getImm();
255
3.31k
  const int NewImm = std::abs(OldImm + Correction);
256
3.31k
257
3.31k
  // Handle +0 -> -1 and -0 -> +1 (CMN with 0 immediate) transitions by
258
3.31k
  // adjusting compare instruction opcode.
259
3.31k
  if (
OldImm == 0 && 3.31k
((Negative && 39
Correction == 10
) ||
260
3.31k
                      
(!Negative && 39
Correction == -139
))) {
261
0
    Opc = getComplementOpc(Opc);
262
0
  }
263
3.31k
264
3.31k
  return CmpInfo(NewImm, Opc, getAdjustedCmp(Cmp));
265
3.31k
}
266
267
// Applies changes to comparison instruction suggested by adjustCmp().
268
void AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI,
269
3.22k
    const CmpInfo &Info) {
270
3.22k
  int Imm;
271
3.22k
  unsigned Opc;
272
3.22k
  AArch64CC::CondCode Cmp;
273
3.22k
  std::tie(Imm, Opc, Cmp) = Info;
274
3.22k
275
3.22k
  MachineBasicBlock *const MBB = CmpMI->getParent();
276
3.22k
277
3.22k
  // Change immediate in comparison instruction (ADDS or SUBS).
278
3.22k
  BuildMI(*MBB, CmpMI, CmpMI->getDebugLoc(), TII->get(Opc))
279
3.22k
      .add(CmpMI->getOperand(0))
280
3.22k
      .add(CmpMI->getOperand(1))
281
3.22k
      .addImm(Imm)
282
3.22k
      .add(CmpMI->getOperand(3));
283
3.22k
  CmpMI->eraseFromParent();
284
3.22k
285
3.22k
  // The fact that this comparison was picked ensures that it's related to the
286
3.22k
  // first terminator instruction.
287
3.22k
  MachineInstr &BrMI = *MBB->getFirstTerminator();
288
3.22k
289
3.22k
  // Change condition in branch instruction.
290
3.22k
  BuildMI(*MBB, BrMI, BrMI.getDebugLoc(), TII->get(AArch64::Bcc))
291
3.22k
      .addImm(Cmp)
292
3.22k
      .add(BrMI.getOperand(1));
293
3.22k
  BrMI.eraseFromParent();
294
3.22k
295
3.22k
  MBB->updateTerminator();
296
3.22k
297
3.22k
  ++NumConditionsAdjusted;
298
3.22k
}
299
300
// Parse a condition code returned by AnalyzeBranch, and compute the CondCode
301
// corresponding to TBB.
302
// Returns true if parsing was successful, otherwise false is returned.
303
143k
static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) {
304
143k
  // A normal br.cond simply has the condition code.
305
143k
  if (
Cond[0].getImm() != -1143k
) {
306
143k
    assert(Cond.size() == 1 && "Unknown Cond array format");
307
143k
    CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
308
143k
    return true;
309
143k
  }
310
0
  return false;
311
0
}
312
313
// Adjusts one cmp instruction to another one if result of adjustment will allow
314
// CSE.  Returns true if compare instruction was changed, otherwise false is
315
// returned.
316
bool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI,
317
  AArch64CC::CondCode Cmp, MachineInstr *To, int ToImm)
318
3.12k
{
319
3.12k
  CmpInfo Info = adjustCmp(CmpMI, Cmp);
320
3.12k
  if (
std::get<0>(Info) == ToImm && 3.12k
std::get<1>(Info) == To->getOpcode()3.12k
) {
321
3.09k
    modifyCmp(CmpMI, Info);
322
3.09k
    return true;
323
3.09k
  }
324
31
  return false;
325
31
}
326
327
456k
bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
328
456k
  DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
329
456k
               << "********** Function: " << MF.getName() << '\n');
330
456k
  if (skipFunction(*MF.getFunction()))
331
1
    return false;
332
456k
333
456k
  TII = MF.getSubtarget().getInstrInfo();
334
456k
  DomTree = &getAnalysis<MachineDominatorTree>();
335
456k
  MRI = &MF.getRegInfo();
336
456k
337
456k
  bool Changed = false;
338
456k
339
456k
  // Visit blocks in dominator tree pre-order. The pre-order enables multiple
340
456k
  // cmp-conversions from the same head block.
341
456k
  // Note that updateDomTree() modifies the children of the DomTree node
342
456k
  // currently being visited. The df_iterator supports that; it doesn't look at
343
456k
  // child_begin() / child_end() until after a node has been visited.
344
3.56M
  for (MachineDomTreeNode *I : depth_first(DomTree)) {
345
3.56M
    MachineBasicBlock *HBB = I->getBlock();
346
3.56M
347
3.56M
    SmallVector<MachineOperand, 4> HeadCond;
348
3.56M
    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
349
3.56M
    if (
TII->analyzeBranch(*HBB, TBB, FBB, HeadCond)3.56M
) {
350
672k
      continue;
351
672k
    }
352
2.89M
353
2.89M
    // Equivalence check is to skip loops.
354
2.89M
    
if (2.89M
!TBB || 2.89M
TBB == HBB2.22M
) {
355
822k
      continue;
356
822k
    }
357
2.07M
358
2.07M
    SmallVector<MachineOperand, 4> TrueCond;
359
2.07M
    MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr;
360
2.07M
    if (
TII->analyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCond)2.07M
) {
361
470k
      continue;
362
470k
    }
363
1.60M
364
1.60M
    MachineInstr *HeadCmpMI = findSuitableCompare(HBB);
365
1.60M
    if (
!HeadCmpMI1.60M
) {
366
1.35M
      continue;
367
1.35M
    }
368
251k
369
251k
    MachineInstr *TrueCmpMI = findSuitableCompare(TBB);
370
251k
    if (
!TrueCmpMI251k
) {
371
180k
      continue;
372
180k
    }
373
71.9k
374
71.9k
    AArch64CC::CondCode HeadCmp;
375
71.9k
    if (
HeadCond.empty() || 71.9k
!parseCond(HeadCond, HeadCmp)71.9k
) {
376
0
      continue;
377
0
    }
378
71.9k
379
71.9k
    AArch64CC::CondCode TrueCmp;
380
71.9k
    if (
TrueCond.empty() || 71.9k
!parseCond(TrueCond, TrueCmp)71.9k
) {
381
0
      continue;
382
0
    }
383
71.9k
384
71.9k
    const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm();
385
71.9k
    const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm();
386
71.9k
387
71.9k
    DEBUG(dbgs() << "Head branch:\n");
388
71.9k
    DEBUG(dbgs() << "\tcondition: "
389
71.9k
          << AArch64CC::getCondCodeName(HeadCmp) << '\n');
390
71.9k
    DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n');
391
71.9k
392
71.9k
    DEBUG(dbgs() << "True branch:\n");
393
71.9k
    DEBUG(dbgs() << "\tcondition: "
394
71.9k
          << AArch64CC::getCondCodeName(TrueCmp) << '\n');
395
71.9k
    DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n');
396
71.9k
397
71.9k
    if (
((HeadCmp == AArch64CC::GT && 71.9k
TrueCmp == AArch64CC::LT4.23k
) ||
398
71.6k
         
(HeadCmp == AArch64CC::LT && 71.6k
TrueCmp == AArch64CC::GT18.4k
)) &&
399
71.9k
        
std::abs(TrueImm - HeadImm) == 2672
) {
400
97
      // This branch transforms machine instructions that correspond to
401
97
      //
402
97
      // 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...)
403
97
      // 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...)
404
97
      //
405
97
      // into
406
97
      //
407
97
      // 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...)
408
97
      // 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...)
409
97
410
97
      CmpInfo HeadCmpInfo = adjustCmp(HeadCmpMI, HeadCmp);
411
97
      CmpInfo TrueCmpInfo = adjustCmp(TrueCmpMI, TrueCmp);
412
97
      if (std::get<0>(HeadCmpInfo) == std::get<0>(TrueCmpInfo) &&
413
97
          
std::get<1>(HeadCmpInfo) == std::get<1>(TrueCmpInfo)74
) {
414
68
        modifyCmp(HeadCmpMI, HeadCmpInfo);
415
68
        modifyCmp(TrueCmpMI, TrueCmpInfo);
416
68
        Changed = true;
417
68
      }
418
71.9k
    } else 
if (71.8k
((HeadCmp == AArch64CC::GT && 71.8k
TrueCmp == AArch64CC::GT4.20k
) ||
419
71.5k
                
(HeadCmp == AArch64CC::LT && 71.5k
TrueCmp == AArch64CC::LT18.3k
)) &&
420
71.8k
                
std::abs(TrueImm - HeadImm) == 111.5k
) {
421
3.12k
      // This branch transforms machine instructions that correspond to
422
3.12k
      //
423
3.12k
      // 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...)
424
3.12k
      // 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...)
425
3.12k
      //
426
3.12k
      // into
427
3.12k
      //
428
3.12k
      // 1) (a <= {NewImm} && ...) || (a >  {NewImm} && ...)
429
3.12k
      // 2) (a <  {NewImm} && ...) || (a >= {NewImm} && ...)
430
3.12k
431
3.12k
      // GT -> GE transformation increases immediate value, so picking the
432
3.12k
      // smaller one; LT -> LE decreases immediate value so invert the choice.
433
3.12k
      bool adjustHeadCond = (HeadImm < TrueImm);
434
3.12k
      if (
HeadCmp == AArch64CC::LT3.12k
) {
435
3.10k
          adjustHeadCond = !adjustHeadCond;
436
3.10k
      }
437
3.12k
438
3.12k
      if (
adjustHeadCond3.12k
) {
439
205
        Changed |= adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm);
440
3.12k
      } else {
441
2.91k
        Changed |= adjustTo(TrueCmpMI, TrueCmp, HeadCmpMI, HeadImm);
442
2.91k
      }
443
71.8k
    }
444
3.56M
    // Other transformation cases almost never occur due to generation of < or >
445
3.56M
    // comparisons instead of <= and >=.
446
3.56M
  }
447
456k
448
456k
  return Changed;
449
456k
}