Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
Line
Count
Source (jump to first uncovered line)
1
//====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
/// \file
9
///
10
/// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual
11
/// flag bits.
12
///
13
/// We have to do this by carefully analyzing and rewriting the usage of the
14
/// copied EFLAGS register because there is no general way to rematerialize the
15
/// entire EFLAGS register safely and efficiently. Using `popf` both forces
16
/// dynamic stack adjustment and can create correctness issues due to IF, TF,
17
/// and other non-status flags being overwritten. Using sequences involving
18
/// SAHF don't work on all x86 processors and are often quite slow compared to
19
/// directly testing a single status preserved in its own GPR.
20
///
21
//===----------------------------------------------------------------------===//
22
23
#include "X86.h"
24
#include "X86InstrBuilder.h"
25
#include "X86InstrInfo.h"
26
#include "X86Subtarget.h"
27
#include "llvm/ADT/ArrayRef.h"
28
#include "llvm/ADT/DenseMap.h"
29
#include "llvm/ADT/PostOrderIterator.h"
30
#include "llvm/ADT/STLExtras.h"
31
#include "llvm/ADT/ScopeExit.h"
32
#include "llvm/ADT/SmallPtrSet.h"
33
#include "llvm/ADT/SmallSet.h"
34
#include "llvm/ADT/SmallVector.h"
35
#include "llvm/ADT/SparseBitVector.h"
36
#include "llvm/ADT/Statistic.h"
37
#include "llvm/CodeGen/MachineBasicBlock.h"
38
#include "llvm/CodeGen/MachineConstantPool.h"
39
#include "llvm/CodeGen/MachineDominators.h"
40
#include "llvm/CodeGen/MachineFunction.h"
41
#include "llvm/CodeGen/MachineFunctionPass.h"
42
#include "llvm/CodeGen/MachineInstr.h"
43
#include "llvm/CodeGen/MachineInstrBuilder.h"
44
#include "llvm/CodeGen/MachineModuleInfo.h"
45
#include "llvm/CodeGen/MachineOperand.h"
46
#include "llvm/CodeGen/MachineRegisterInfo.h"
47
#include "llvm/CodeGen/MachineSSAUpdater.h"
48
#include "llvm/CodeGen/TargetInstrInfo.h"
49
#include "llvm/CodeGen/TargetRegisterInfo.h"
50
#include "llvm/CodeGen/TargetSchedule.h"
51
#include "llvm/CodeGen/TargetSubtargetInfo.h"
52
#include "llvm/IR/DebugLoc.h"
53
#include "llvm/MC/MCSchedule.h"
54
#include "llvm/Pass.h"
55
#include "llvm/Support/CommandLine.h"
56
#include "llvm/Support/Debug.h"
57
#include "llvm/Support/raw_ostream.h"
58
#include <algorithm>
59
#include <cassert>
60
#include <iterator>
61
#include <utility>
62
63
using namespace llvm;
64
65
#define PASS_KEY "x86-flags-copy-lowering"
66
#define DEBUG_TYPE PASS_KEY
67
68
STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
69
STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
70
STATISTIC(NumTestsInserted, "Number of test instructions inserted");
71
STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
72
73
namespace {
74
75
// Convenient array type for storing registers associated with each condition.
76
using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>;
77
78
class X86FlagsCopyLoweringPass : public MachineFunctionPass {
79
public:
80
12.2k
  X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) { }
81
82
150k
  StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; }
83
  bool runOnMachineFunction(MachineFunction &MF) override;
84
  void getAnalysisUsage(AnalysisUsage &AU) const override;
85
86
  /// Pass identification, replacement for typeid.
87
  static char ID;
88
89
private:
90
  MachineRegisterInfo *MRI;
91
  const X86Subtarget *Subtarget;
92
  const X86InstrInfo *TII;
93
  const TargetRegisterInfo *TRI;
94
  const TargetRegisterClass *PromoteRC;
95
  MachineDominatorTree *MDT;
96
97
  CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
98
                                  MachineBasicBlock::iterator CopyDefI);
99
100
  unsigned promoteCondToReg(MachineBasicBlock &MBB,
101
                            MachineBasicBlock::iterator TestPos,
102
                            DebugLoc TestLoc, X86::CondCode Cond);
103
  std::pair<unsigned, bool>
104
  getCondOrInverseInReg(MachineBasicBlock &TestMBB,
105
                        MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
106
                        X86::CondCode Cond, CondRegArray &CondRegs);
107
  void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
108
                  DebugLoc Loc, unsigned Reg);
109
110
  void rewriteArithmetic(MachineBasicBlock &TestMBB,
111
                         MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
112
                         MachineInstr &MI, MachineOperand &FlagUse,
113
                         CondRegArray &CondRegs);
114
  void rewriteCMov(MachineBasicBlock &TestMBB,
115
                   MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
116
                   MachineInstr &CMovI, MachineOperand &FlagUse,
117
                   CondRegArray &CondRegs);
118
  void rewriteCondJmp(MachineBasicBlock &TestMBB,
119
                      MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
120
                      MachineInstr &JmpI, CondRegArray &CondRegs);
121
  void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
122
                   MachineInstr &CopyDefI);
123
  void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
124
                               MachineBasicBlock::iterator TestPos,
125
                               DebugLoc TestLoc, MachineInstr &SetBI,
126
                               MachineOperand &FlagUse, CondRegArray &CondRegs);
127
  void rewriteSetCC(MachineBasicBlock &TestMBB,
128
                    MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
129
                    MachineInstr &SetCCI, MachineOperand &FlagUse,
130
                    CondRegArray &CondRegs);
131
};
132
133
} // end anonymous namespace
134
135
102k
INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE,
136
102k
                      "X86 EFLAGS copy lowering", false, false)
137
102k
INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE,
138
                    "X86 EFLAGS copy lowering", false, false)
139
140
12.2k
FunctionPass *llvm::createX86FlagsCopyLoweringPass() {
141
12.2k
  return new X86FlagsCopyLoweringPass();
142
12.2k
}
143
144
char X86FlagsCopyLoweringPass::ID = 0;
145
146
12.1k
void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
147
12.1k
  AU.addRequired<MachineDominatorTree>();
148
12.1k
  MachineFunctionPass::getAnalysisUsage(AU);
149
12.1k
}
150
151
namespace {
152
/// An enumeration of the arithmetic instruction mnemonics which have
153
/// interesting flag semantics.
154
///
155
/// We can map instruction opcodes into these mnemonics to make it easy to
156
/// dispatch with specific functionality.
157
enum class FlagArithMnemonic {
158
  ADC,
159
  ADCX,
160
  ADOX,
161
  RCL,
162
  RCR,
163
  SBB,
164
};
165
} // namespace
166
167
13
static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) {
168
13
  switch (Opcode) {
169
13
  default:
170
0
    report_fatal_error("No support for lowering a copy into EFLAGS when used "
171
0
                       "by this instruction!");
172
13
173
13
#define LLVM_EXPAND_INSTR_SIZES(MNEMONIC, SUFFIX)                              \
174
42
  case X86::MNEMONIC##8##SUFFIX:                                               \
175
42
  case X86::MNEMONIC##16##SUFFIX:                                              \
176
42
  case X86::MNEMONIC##32##SUFFIX:                                              \
177
42
  case X86::MNEMONIC##64##SUFFIX:
178
13
179
13
#define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC)                                    \
180
13
  
LLVM_EXPAND_INSTR_SIZES9
(MNEMONIC, rr) \
181
9
  LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV)                                    \
182
9
  LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm)                                        \
183
9
  LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr)                                        \
184
9
  case X86::MNEMONIC##8ri:                                                     \
185
9
  case X86::MNEMONIC##16ri8:                                                   \
186
9
  case X86::MNEMONIC##32ri8:                                                   \
187
9
  case X86::MNEMONIC##64ri8:                                                   \
188
9
  case X86::MNEMONIC##16ri:                                                    \
189
9
  case X86::MNEMONIC##32ri:                                                    \
190
9
  case X86::MNEMONIC##64ri32:                                                  \
191
9
  case X86::MNEMONIC##8mi:                                                     \
192
9
  case X86::MNEMONIC##16mi8:                                                   \
193
9
  case X86::MNEMONIC##32mi8:                                                   \
194
9
  case X86::MNEMONIC##64mi8:                                                   \
195
9
  case X86::MNEMONIC##16mi:                                                    \
196
9
  case X86::MNEMONIC##32mi:                                                    \
197
9
  case X86::MNEMONIC##64mi32:                                                  \
198
9
  case X86::MNEMONIC##8i8:                                                     \
199
9
  case X86::MNEMONIC##16i16:                                                   \
200
9
  case X86::MNEMONIC##32i32:                                                   \
201
9
  case X86::MNEMONIC##64i32:
202
13
203
13
    
LLVM_EXPAND_ADC_SBB_INSTR0
(ADC)
204
8
    return FlagArithMnemonic::ADC;
205
264
206
264
    
LLVM_EXPAND_ADC_SBB_INSTR8
(SBB)
207
1
    return FlagArithMnemonic::SBB;
208
33
209
33
#undef LLVM_EXPAND_ADC_SBB_INSTR
210
33
211
33
    
LLVM_EXPAND_INSTR_SIZES1
(RCL, rCL)
212
4
    
LLVM_EXPAND_INSTR_SIZES1
(RCL, r1)
213
4
    
LLVM_EXPAND_INSTR_SIZES1
(RCL, ri)
214
1
    return FlagArithMnemonic::RCL;
215
3
216
4
    
LLVM_EXPAND_INSTR_SIZES1
(RCR, rCL)
217
4
    
LLVM_EXPAND_INSTR_SIZES1
(RCR, r1)
218
4
    
LLVM_EXPAND_INSTR_SIZES1
(RCR, ri)
219
1
    return FlagArithMnemonic::RCR;
220
3
221
3
#undef LLVM_EXPAND_INSTR_SIZES
222
3
223
3
  case X86::ADCX32rr:
224
1
  case X86::ADCX64rr:
225
1
  case X86::ADCX32rm:
226
1
  case X86::ADCX64rm:
227
1
    return FlagArithMnemonic::ADCX;
228
1
229
1
  case X86::ADOX32rr:
230
1
  case X86::ADOX64rr:
231
1
  case X86::ADOX32rm:
232
1
  case X86::ADOX64rm:
233
1
    return FlagArithMnemonic::ADOX;
234
13
  }
235
13
}
236
237
static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
238
                                     MachineInstr &SplitI,
239
5
                                     const X86InstrInfo &TII) {
240
5
  MachineFunction &MF = *MBB.getParent();
241
5
242
5
  assert(SplitI.getParent() == &MBB &&
243
5
         "Split instruction must be in the split block!");
244
5
  assert(SplitI.isBranch() &&
245
5
         "Only designed to split a tail of branch instructions!");
246
5
  assert(X86::getCondFromBranch(SplitI) != X86::COND_INVALID &&
247
5
         "Must split on an actual jCC instruction!");
248
5
249
5
  // Dig out the previous instruction to the split point.
250
5
  MachineInstr &PrevI = *std::prev(SplitI.getIterator());
251
5
  assert(PrevI.isBranch() && "Must split after a branch!");
252
5
  assert(X86::getCondFromBranch(PrevI) != X86::COND_INVALID &&
253
5
         "Must split after an actual jCC instruction!");
254
5
  assert(!std::prev(PrevI.getIterator())->isTerminator() &&
255
5
         "Must only have this one terminator prior to the split!");
256
5
257
5
  // Grab the one successor edge that will stay in `MBB`.
258
5
  MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB();
259
5
260
5
  // Analyze the original block to see if we are actually splitting an edge
261
5
  // into two edges. This can happen when we have multiple conditional jumps to
262
5
  // the same successor.
263
5
  bool IsEdgeSplit =
264
5
      std::any_of(SplitI.getIterator(), MBB.instr_end(),
265
9
                  [&](MachineInstr &MI) {
266
9
                    assert(MI.isTerminator() &&
267
9
                           "Should only have spliced terminators!");
268
9
                    return llvm::any_of(
269
19
                        MI.operands(), [&](MachineOperand &MOp) {
270
19
                          return MOp.isMBB() && 
MOp.getMBB() == &UnsplitSucc9
;
271
19
                        });
272
9
                  }) ||
273
5
      MBB.getFallThrough() == &UnsplitSucc;
274
5
275
5
  MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
276
5
277
5
  // Insert the new block immediately after the current one. Any existing
278
5
  // fallthrough will be sunk into this new block anyways.
279
5
  MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
280
5
281
5
  // Splice the tail of instructions into the new block.
282
5
  NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end());
283
5
284
5
  // Copy the necessary succesors (and their probability info) into the new
285
5
  // block.
286
20
  for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; 
++SI15
)
287
15
    if (IsEdgeSplit || *SI != &UnsplitSucc)
288
10
      NewMBB.copySuccessor(&MBB, SI);
289
5
  // Normalize the probabilities if we didn't end up splitting the edge.
290
5
  if (!IsEdgeSplit)
291
5
    NewMBB.normalizeSuccProbs();
292
5
293
5
  // Now replace all of the moved successors in the original block with the new
294
5
  // block. This will merge their probabilities.
295
5
  for (MachineBasicBlock *Succ : NewMBB.successors())
296
10
    if (Succ != &UnsplitSucc)
297
10
      MBB.replaceSuccessor(Succ, &NewMBB);
298
5
299
5
  // We should always end up replacing at least one successor.
300
5
  assert(MBB.isSuccessor(&NewMBB) &&
301
5
         "Failed to make the new block a successor!");
302
5
303
5
  // Now update all the PHIs.
304
10
  for (MachineBasicBlock *Succ : NewMBB.successors()) {
305
10
    for (MachineInstr &MI : *Succ) {
306
10
      if (!MI.isPHI())
307
10
        break;
308
0
309
0
      for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
310
0
           OpIdx += 2) {
311
0
        MachineOperand &OpV = MI.getOperand(OpIdx);
312
0
        MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
313
0
        assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
314
0
        if (OpMBB.getMBB() != &MBB)
315
0
          continue;
316
0
317
0
        // Replace the operand for unsplit successors
318
0
        if (!IsEdgeSplit || Succ != &UnsplitSucc) {
319
0
          OpMBB.setMBB(&NewMBB);
320
0
321
0
          // We have to continue scanning as there may be multiple entries in
322
0
          // the PHI.
323
0
          continue;
324
0
        }
325
0
326
0
        // When we have split the edge append a new successor.
327
0
        MI.addOperand(MF, OpV);
328
0
        MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
329
0
        break;
330
0
      }
331
0
    }
332
10
  }
333
5
334
5
  return NewMBB;
335
5
}
336
337
137k
bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
338
137k
  LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
339
137k
                    << " **********\n");
340
137k
341
137k
  Subtarget = &MF.getSubtarget<X86Subtarget>();
342
137k
  MRI = &MF.getRegInfo();
343
137k
  TII = Subtarget->getInstrInfo();
344
137k
  TRI = Subtarget->getRegisterInfo();
345
137k
  MDT = &getAnalysis<MachineDominatorTree>();
346
137k
  PromoteRC = &X86::GR8RegClass;
347
137k
348
137k
  if (MF.begin() == MF.end())
349
0
    // Nothing to do for a degenerate empty function...
350
0
    return false;
351
137k
352
137k
  // Collect the copies in RPO so that when there are chains where a copy is in
353
137k
  // turn copied again we visit the first one first. This ensures we can find
354
137k
  // viable locations for testing the original EFLAGS that dominate all the
355
137k
  // uses across complex CFGs.
356
137k
  SmallVector<MachineInstr *, 4> Copies;
357
137k
  ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
358
137k
  for (MachineBasicBlock *MBB : RPOT)
359
401k
    for (MachineInstr &MI : *MBB)
360
3.18M
      if (MI.getOpcode() == TargetOpcode::COPY &&
361
3.18M
          
MI.getOperand(0).getReg() == X86::EFLAGS824k
)
362
80
        Copies.push_back(&MI);
363
137k
364
137k
  for (MachineInstr *CopyI : Copies) {
365
80
    MachineBasicBlock &MBB = *CopyI->getParent();
366
80
367
80
    MachineOperand &VOp = CopyI->getOperand(1);
368
80
    assert(VOp.isReg() &&
369
80
           "The input to the copy for EFLAGS should always be a register!");
370
80
    MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg());
371
80
    if (CopyDefI.getOpcode() != TargetOpcode::COPY) {
372
0
      // FIXME: The big likely candidate here are PHI nodes. We could in theory
373
0
      // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard
374
0
      // enough that it is probably better to change every other part of LLVM
375
0
      // to avoid creating them. The issue is that once we have PHIs we won't
376
0
      // know which original EFLAGS value we need to capture with our setCCs
377
0
      // below. The end result will be computing a complete set of setCCs that
378
0
      // we *might* want, computing them in every place where we copy *out* of
379
0
      // EFLAGS and then doing SSA formation on all of them to insert necessary
380
0
      // PHI nodes and consume those here. Then hoping that somehow we DCE the
381
0
      // unnecessary ones. This DCE seems very unlikely to be successful and so
382
0
      // we will almost certainly end up with a glut of dead setCC
383
0
      // instructions. Until we have a motivating test case and fail to avoid
384
0
      // it by changing other parts of LLVM's lowering, we refuse to handle
385
0
      // this complex case here.
386
0
      LLVM_DEBUG(
387
0
          dbgs() << "ERROR: Encountered unexpected def of an eflags copy: ";
388
0
          CopyDefI.dump());
389
0
      report_fatal_error(
390
0
          "Cannot lower EFLAGS copy unless it is defined in turn by a copy!");
391
0
    }
392
80
393
80
    auto Cleanup = make_scope_exit([&] {
394
80
      // All uses of the EFLAGS copy are now rewritten, kill the copy into
395
80
      // eflags and if dead the copy from.
396
80
      CopyI->eraseFromParent();
397
80
      if (MRI->use_empty(CopyDefI.getOperand(0).getReg()))
398
71
        CopyDefI.eraseFromParent();
399
80
      ++NumCopiesEliminated;
400
80
    });
401
80
402
80
    MachineOperand &DOp = CopyI->getOperand(0);
403
80
    assert(DOp.isDef() && "Expected register def!");
404
80
    assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!");
405
80
    if (DOp.isDead())
406
0
      continue;
407
80
408
80
    MachineBasicBlock *TestMBB = CopyDefI.getParent();
409
80
    auto TestPos = CopyDefI.getIterator();
410
80
    DebugLoc TestLoc = CopyDefI.getDebugLoc();
411
80
412
80
    LLVM_DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump());
413
80
414
80
    // Walk up across live-in EFLAGS to find where they were actually def'ed.
415
80
    //
416
80
    // This copy's def may just be part of a region of blocks covered by
417
80
    // a single def of EFLAGS and we want to find the top of that region where
418
80
    // possible.
419
80
    //
420
80
    // This is essentially a search for a *candidate* reaching definition
421
80
    // location. We don't need to ever find the actual reaching definition here,
422
80
    // but we want to walk up the dominator tree to find the highest point which
423
80
    // would be viable for such a definition.
424
80
    auto HasEFLAGSClobber = [&](MachineBasicBlock::iterator Begin,
425
80
                                MachineBasicBlock::iterator End) {
426
15
      // Scan backwards as we expect these to be relatively short and often find
427
15
      // a clobber near the end.
428
15
      return llvm::any_of(
429
34
          llvm::reverse(llvm::make_range(Begin, End)), [&](MachineInstr &MI) {
430
34
            // Flag any instruction (other than the copy we are
431
34
            // currently rewriting) that defs EFLAGS.
432
34
            return &MI != CopyI && 
MI.findRegisterDefOperand(X86::EFLAGS)32
;
433
34
          });
434
15
    };
435
80
    auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB,
436
80
                                    MachineBasicBlock *EndMBB) {
437
5
      assert(MDT->dominates(BeginMBB, EndMBB) &&
438
5
             "Only support paths down the dominator tree!");
439
5
      SmallPtrSet<MachineBasicBlock *, 4> Visited;
440
5
      SmallVector<MachineBasicBlock *, 4> Worklist;
441
5
      // We terminate at the beginning. No need to scan it.
442
5
      Visited.insert(BeginMBB);
443
5
      Worklist.push_back(EndMBB);
444
10
      do {
445
10
        auto *MBB = Worklist.pop_back_val();
446
17
        for (auto *PredMBB : MBB->predecessors()) {
447
17
          if (!Visited.insert(PredMBB).second)
448
10
            continue;
449
7
          if (HasEFLAGSClobber(PredMBB->begin(), PredMBB->end()))
450
2
            return true;
451
5
          // Enqueue this block to walk its predecessors.
452
5
          Worklist.push_back(PredMBB);
453
5
        }
454
10
      } while (
!Worklist.empty()8
);
455
5
      // No clobber found along a path from the begin to end.
456
5
      
return false3
;
457
5
    };
458
83
    while (TestMBB->isLiveIn(X86::EFLAGS) && 
!TestMBB->pred_empty()5
&&
459
83
           
!HasEFLAGSClobber(TestMBB->begin(), TestPos)5
) {
460
5
      // Find the nearest common dominator of the predecessors, as
461
5
      // that will be the best candidate to hoist into.
462
5
      MachineBasicBlock *HoistMBB =
463
5
          std::accumulate(std::next(TestMBB->pred_begin()), TestMBB->pred_end(),
464
5
                          *TestMBB->pred_begin(),
465
5
                          [&](MachineBasicBlock *LHS, MachineBasicBlock *RHS) {
466
4
                            return MDT->findNearestCommonDominator(LHS, RHS);
467
4
                          });
468
5
469
5
      // Now we need to scan all predecessors that may be reached along paths to
470
5
      // the hoist block. A clobber anywhere in any of these blocks the hoist.
471
5
      // Note that this even handles loops because we require *no* clobbers.
472
5
      if (HasEFLAGSClobberPath(HoistMBB, TestMBB))
473
2
        break;
474
3
475
3
      // We also need the terminators to not sneakily clobber flags.
476
3
      if (HasEFLAGSClobber(HoistMBB->getFirstTerminator()->getIterator(),
477
3
                           HoistMBB->instr_end()))
478
0
        break;
479
3
480
3
      // We found a viable location, hoist our test position to it.
481
3
      TestMBB = HoistMBB;
482
3
      TestPos = TestMBB->getFirstTerminator()->getIterator();
483
3
      // Clear the debug location as it would just be confusing after hoisting.
484
3
      TestLoc = DebugLoc();
485
3
    }
486
80
    LLVM_DEBUG({
487
80
      auto DefIt = llvm::find_if(
488
80
          llvm::reverse(llvm::make_range(TestMBB->instr_begin(), TestPos)),
489
80
          [&](MachineInstr &MI) {
490
80
            return MI.findRegisterDefOperand(X86::EFLAGS);
491
80
          });
492
80
      if (DefIt.base() != TestMBB->instr_begin()) {
493
80
        dbgs() << "  Using EFLAGS defined by: ";
494
80
        DefIt->dump();
495
80
      } else {
496
80
        dbgs() << "  Using live-in flags for BB:\n";
497
80
        TestMBB->dump();
498
80
      }
499
80
    });
500
80
501
80
    // While rewriting uses, we buffer jumps and rewrite them in a second pass
502
80
    // because doing so will perturb the CFG that we are walking to find the
503
80
    // uses in the first place.
504
80
    SmallVector<MachineInstr *, 4> JmpIs;
505
80
506
80
    // Gather the condition flags that have already been preserved in
507
80
    // registers. We do this from scratch each time as we expect there to be
508
80
    // very few of them and we expect to not revisit the same copy definition
509
80
    // many times. If either of those change sufficiently we could build a map
510
80
    // of these up front instead.
511
80
    CondRegArray CondRegs = collectCondsInRegs(*TestMBB, TestPos);
512
80
513
80
    // Collect the basic blocks we need to scan. Typically this will just be
514
80
    // a single basic block but we may have to scan multiple blocks if the
515
80
    // EFLAGS copy lives into successors.
516
80
    SmallVector<MachineBasicBlock *, 2> Blocks;
517
80
    SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks;
518
80
    Blocks.push_back(&MBB);
519
80
520
96
    do {
521
96
      MachineBasicBlock &UseMBB = *Blocks.pop_back_val();
522
96
523
96
      // Track when if/when we find a kill of the flags in this block.
524
96
      bool FlagsKilled = false;
525
96
526
96
      // In most cases, we walk from the beginning to the end of the block. But
527
96
      // when the block is the same block as the copy is from, we will visit it
528
96
      // twice. The first time we start from the copy and go to the end. The
529
96
      // second time we start from the beginning and go to the copy. This lets
530
96
      // us handle copies inside of cycles.
531
96
      // FIXME: This loop is *super* confusing. This is at least in part
532
96
      // a symptom of all of this routine needing to be refactored into
533
96
      // documentable components. Once done, there may be a better way to write
534
96
      // this loop.
535
96
      for (auto MII = (&UseMBB == &MBB && 
!VisitedBlocks.count(&UseMBB)82
)
536
96
                          ? 
std::next(CopyI->getIterator())80
537
96
                          : 
UseMBB.instr_begin()16
,
538
96
                MIE = UseMBB.instr_end();
539
204
           MII != MIE;) {
540
192
        MachineInstr &MI = *MII++;
541
192
        // If we are in the original copy block and encounter either the copy
542
192
        // def or the copy itself, break so that we don't re-process any part of
543
192
        // the block or process the instructions in the range that was copied
544
192
        // over.
545
192
        if (&MI == CopyI || &MI == &CopyDefI) {
546
2
          assert(&UseMBB == &MBB && VisitedBlocks.count(&MBB) &&
547
2
                 "Should only encounter these on the second pass over the "
548
2
                 "original block.");
549
2
          break;
550
2
        }
551
190
552
190
        MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS);
553
190
        if (!FlagUse) {
554
88
          if (MI.findRegisterDefOperand(X86::EFLAGS)) {
555
8
            // If EFLAGS are defined, it's as-if they were killed. We can stop
556
8
            // scanning here.
557
8
            //
558
8
            // NB!!! Many instructions only modify some flags. LLVM currently
559
8
            // models this as clobbering all flags, but if that ever changes
560
8
            // this will need to be carefully updated to handle that more
561
8
            // complex logic.
562
8
            FlagsKilled = true;
563
8
            break;
564
8
          }
565
80
          continue;
566
80
        }
567
102
568
102
        LLVM_DEBUG(dbgs() << "  Rewriting use: "; MI.dump());
569
102
570
102
        // Check the kill flag before we rewrite as that may change it.
571
102
        if (FlagUse->isKill())
572
8
          FlagsKilled = true;
573
102
574
102
        // Once we encounter a branch, the rest of the instructions must also be
575
102
        // branches. We can't rewrite in place here, so we handle them below.
576
102
        //
577
102
        // Note that we don't have to handle tail calls here, even conditional
578
102
        // tail calls, as those are not introduced into the X86 MI until post-RA
579
102
        // branch folding or black placement. As a consequence, we get to deal
580
102
        // with the simpler formulation of conditional branches followed by tail
581
102
        // calls.
582
102
        if (X86::getCondFromBranch(MI) != X86::COND_INVALID) {
583
49
          auto JmpIt = MI.getIterator();
584
54
          do {
585
54
            JmpIs.push_back(&*JmpIt);
586
54
            ++JmpIt;
587
54
          } while (JmpIt != UseMBB.instr_end() &&
588
54
                   X86::getCondFromBranch(*JmpIt) !=
589
34
                       X86::COND_INVALID);
590
49
          break;
591
49
        }
592
53
593
53
        // Otherwise we can just rewrite in-place.
594
53
        if (X86::getCondFromCMov(MI) != X86::COND_INVALID) {
595
25
          rewriteCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
596
28
        } else if (X86::getCondFromSETCC(MI) != X86::COND_INVALID) {
597
4
          rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
598
24
        } else if (MI.getOpcode() == TargetOpcode::COPY) {
599
6
          rewriteCopy(MI, *FlagUse, CopyDefI);
600
18
        } else {
601
18
          // We assume all other instructions that use flags also def them.
602
18
          assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
603
18
                 "Expected a def of EFLAGS for this instruction!");
604
18
605
18
          // NB!!! Several arithmetic instructions only *partially* update
606
18
          // flags. Theoretically, we could generate MI code sequences that
607
18
          // would rely on this fact and observe different flags independently.
608
18
          // But currently LLVM models all of these instructions as clobbering
609
18
          // all the flags in an undef way. We rely on that to simplify the
610
18
          // logic.
611
18
          FlagsKilled = true;
612
18
613
18
          switch (MI.getOpcode()) {
614
18
          case X86::SETB_C8r:
615
5
          case X86::SETB_C16r:
616
5
          case X86::SETB_C32r:
617
5
          case X86::SETB_C64r:
618
5
            // Use custom lowering for arithmetic that is merely extending the
619
5
            // carry flag. We model this as the SETB_C* pseudo instructions.
620
5
            rewriteSetCarryExtended(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
621
5
                                    CondRegs);
622
5
            break;
623
5
624
13
          default:
625
13
            // Generically handle remaining uses as arithmetic instructions.
626
13
            rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
627
13
                              CondRegs);
628
13
            break;
629
18
          }
630
18
          break;
631
18
        }
632
35
633
35
        // If this was the last use of the flags, we're done.
634
35
        if (FlagsKilled)
635
7
          break;
636
35
      }
637
96
638
96
      // If the flags were killed, we're done with this block.
639
96
      if (FlagsKilled)
640
33
        continue;
641
63
642
63
      // Otherwise we need to scan successors for ones where the flags live-in
643
63
      // and queue those up for processing.
644
63
      for (MachineBasicBlock *SuccMBB : UseMBB.successors())
645
109
        if (SuccMBB->isLiveIn(X86::EFLAGS) &&
646
109
            
VisitedBlocks.insert(SuccMBB).second21
) {
647
16
          // We currently don't do any PHI insertion and so we require that the
648
16
          // test basic block dominates all of the use basic blocks. Further, we
649
16
          // can't have a cycle from the test block back to itself as that would
650
16
          // create a cycle requiring a PHI to break it.
651
16
          //
652
16
          // We could in theory do PHI insertion here if it becomes useful by
653
16
          // just taking undef values in along every edge that we don't trace
654
16
          // this EFLAGS copy along. This isn't as bad as fully general PHI
655
16
          // insertion, but still seems like a great deal of complexity.
656
16
          //
657
16
          // Because it is theoretically possible that some earlier MI pass or
658
16
          // other lowering transformation could induce this to happen, we do
659
16
          // a hard check even in non-debug builds here.
660
16
          if (SuccMBB == TestMBB || !MDT->dominates(TestMBB, SuccMBB)) {
661
0
            LLVM_DEBUG({
662
0
              dbgs()
663
0
                  << "ERROR: Encountered use that is not dominated by our test "
664
0
                     "basic block! Rewriting this would require inserting PHI "
665
0
                     "nodes to track the flag state across the CFG.\n\nTest "
666
0
                     "block:\n";
667
0
              TestMBB->dump();
668
0
              dbgs() << "Use block:\n";
669
0
              SuccMBB->dump();
670
0
            });
671
0
            report_fatal_error(
672
0
                "Cannot lower EFLAGS copy when original copy def "
673
0
                "does not dominate all uses.");
674
0
          }
675
16
676
16
          Blocks.push_back(SuccMBB);
677
16
        }
678
96
    } while (!Blocks.empty());
679
80
680
80
    // Now rewrite the jumps that use the flags. These we handle specially
681
80
    // because if there are multiple jumps in a single basic block we'll have
682
80
    // to do surgery on the CFG.
683
80
    MachineBasicBlock *LastJmpMBB = nullptr;
684
80
    for (MachineInstr *JmpI : JmpIs) {
685
54
      // Past the first jump within a basic block we need to split the blocks
686
54
      // apart.
687
54
      if (JmpI->getParent() == LastJmpMBB)
688
5
        splitBlock(*JmpI->getParent(), *JmpI, *TII);
689
49
      else
690
49
        LastJmpMBB = JmpI->getParent();
691
54
692
54
      rewriteCondJmp(*TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
693
54
    }
694
80
695
80
    // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if
696
80
    // the copy's def operand is itself a kill.
697
80
  }
698
137k
699
#ifndef NDEBUG
700
  for (MachineBasicBlock &MBB : MF)
701
    for (MachineInstr &MI : MBB)
702
      if (MI.getOpcode() == TargetOpcode::COPY &&
703
          (MI.getOperand(0).getReg() == X86::EFLAGS ||
704
           MI.getOperand(1).getReg() == X86::EFLAGS)) {
705
        LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: ";
706
                   MI.dump());
707
        llvm_unreachable("Unlowered EFLAGS copy!");
708
      }
709
#endif
710
711
137k
  return true;
712
137k
}
713
714
/// Collect any conditions that have already been set in registers so that we
715
/// can re-use them rather than adding duplicates.
716
CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
717
80
    MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos) {
718
80
  CondRegArray CondRegs = {};
719
80
720
80
  // Scan backwards across the range of instructions with live EFLAGS.
721
80
  for (MachineInstr &MI :
722
114
       llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) {
723
114
    X86::CondCode Cond = X86::getCondFromSETCC(MI);
724
114
    if (Cond != X86::COND_INVALID && 
!MI.mayStore()18
&&
MI.getOperand(0).isReg()17
&&
725
114
        
TRI->isVirtualRegister(MI.getOperand(0).getReg())17
) {
726
17
      assert(MI.getOperand(0).isDef() &&
727
17
             "A non-storing SETcc should always define a register!");
728
17
      CondRegs[Cond] = MI.getOperand(0).getReg();
729
17
    }
730
114
731
114
    // Stop scanning when we see the first definition of the EFLAGS as prior to
732
114
    // this we would potentially capture the wrong flag state.
733
114
    if (MI.findRegisterDefOperand(X86::EFLAGS))
734
78
      break;
735
114
  }
736
80
  return CondRegs;
737
80
}
738
739
unsigned X86FlagsCopyLoweringPass::promoteCondToReg(
740
    MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
741
78
    DebugLoc TestLoc, X86::CondCode Cond) {
742
78
  unsigned Reg = MRI->createVirtualRegister(PromoteRC);
743
78
  auto SetI = BuildMI(TestMBB, TestPos, TestLoc,
744
78
                      TII->get(X86::SETCCr), Reg).addImm(Cond);
745
78
  (void)SetI;
746
78
  LLVM_DEBUG(dbgs() << "    save cond: "; SetI->dump());
747
78
  ++NumSetCCsInserted;
748
78
  return Reg;
749
78
}
750
751
std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg(
752
    MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
753
79
    DebugLoc TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) {
754
79
  unsigned &CondReg = CondRegs[Cond];
755
79
  unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)];
756
79
  if (!CondReg && 
!InvCondReg71
)
757
60
    CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
758
79
759
79
  if (CondReg)
760
68
    return {CondReg, false};
761
11
  else
762
11
    return {InvCondReg, true};
763
79
}
764
765
void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB,
766
                                          MachineBasicBlock::iterator Pos,
767
79
                                          DebugLoc Loc, unsigned Reg) {
768
79
  auto TestI =
769
79
      BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg);
770
79
  (void)TestI;
771
79
  LLVM_DEBUG(dbgs() << "    test cond: "; TestI->dump());
772
79
  ++NumTestsInserted;
773
79
}
774
775
void X86FlagsCopyLoweringPass::rewriteArithmetic(
776
    MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
777
    DebugLoc TestLoc, MachineInstr &MI, MachineOperand &FlagUse,
778
13
    CondRegArray &CondRegs) {
779
13
  // Arithmetic is either reading CF or OF. Figure out which condition we need
780
13
  // to preserve in a register.
781
13
  X86::CondCode Cond;
782
13
783
13
  // The addend to use to reset CF or OF when added to the flag value.
784
13
  int Addend;
785
13
786
13
  switch (getMnemonicFromOpcode(MI.getOpcode())) {
787
13
  case FlagArithMnemonic::ADC:
788
12
  case FlagArithMnemonic::ADCX:
789
12
  case FlagArithMnemonic::RCL:
790
12
  case FlagArithMnemonic::RCR:
791
12
  case FlagArithMnemonic::SBB:
792
12
    Cond = X86::COND_B; // CF == 1
793
12
    // Set up an addend that when one is added will need a carry due to not
794
12
    // having a higher bit available.
795
12
    Addend = 255;
796
12
    break;
797
12
798
12
  case FlagArithMnemonic::ADOX:
799
1
    Cond = X86::COND_O; // OF == 1
800
1
    // Set up an addend that when one is added will turn from positive to
801
1
    // negative and thus overflow in the signed domain.
802
1
    Addend = 127;
803
1
    break;
804
13
  }
805
13
806
13
  // Now get a register that contains the value of the flag input to the
807
13
  // arithmetic. We require exactly this flag to simplify the arithmetic
808
13
  // required to materialize it back into the flag.
809
13
  unsigned &CondReg = CondRegs[Cond];
810
13
  if (!CondReg)
811
12
    CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
812
13
813
13
  MachineBasicBlock &MBB = *MI.getParent();
814
13
815
13
  // Insert an instruction that will set the flag back to the desired value.
816
13
  unsigned TmpReg = MRI->createVirtualRegister(PromoteRC);
817
13
  auto AddI =
818
13
      BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri))
819
13
          .addDef(TmpReg, RegState::Dead)
820
13
          .addReg(CondReg)
821
13
          .addImm(Addend);
822
13
  (void)AddI;
823
13
  LLVM_DEBUG(dbgs() << "    add cond: "; AddI->dump());
824
13
  ++NumAddsInserted;
825
13
  FlagUse.setIsKill(true);
826
13
}
827
828
void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB,
829
                                           MachineBasicBlock::iterator TestPos,
830
                                           DebugLoc TestLoc,
831
                                           MachineInstr &CMovI,
832
                                           MachineOperand &FlagUse,
833
25
                                           CondRegArray &CondRegs) {
834
25
  // First get the register containing this specific condition.
835
25
  X86::CondCode Cond = X86::getCondFromCMov(CMovI);
836
25
  unsigned CondReg;
837
25
  bool Inverted;
838
25
  std::tie(CondReg, Inverted) =
839
25
      getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
840
25
841
25
  MachineBasicBlock &MBB = *CMovI.getParent();
842
25
843
25
  // Insert a direct test of the saved register.
844
25
  insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg);
845
25
846
25
  // Rewrite the CMov to use the !ZF flag from the test, and then kill its use
847
25
  // of the flags afterward.
848
25
  CMovI.getOperand(CMovI.getDesc().getNumOperands() - 1)
849
25
      .setImm(Inverted ? 
X86::COND_E3
:
X86::COND_NE22
);
850
25
  FlagUse.setIsKill(true);
851
25
  LLVM_DEBUG(dbgs() << "    fixed cmov: "; CMovI.dump());
852
25
}
853
854
void X86FlagsCopyLoweringPass::rewriteCondJmp(
855
    MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
856
54
    DebugLoc TestLoc, MachineInstr &JmpI, CondRegArray &CondRegs) {
857
54
  // First get the register containing this specific condition.
858
54
  X86::CondCode Cond = X86::getCondFromBranch(JmpI);
859
54
  unsigned CondReg;
860
54
  bool Inverted;
861
54
  std::tie(CondReg, Inverted) =
862
54
      getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
863
54
864
54
  MachineBasicBlock &JmpMBB = *JmpI.getParent();
865
54
866
54
  // Insert a direct test of the saved register.
867
54
  insertTest(JmpMBB, JmpI.getIterator(), JmpI.getDebugLoc(), CondReg);
868
54
869
54
  // Rewrite the jump to use the !ZF flag from the test, and kill its use of
870
54
  // flags afterward.
871
54
  JmpI.getOperand(1).setImm(Inverted ? 
X86::COND_E8
:
X86::COND_NE46
);
872
54
  JmpI.findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
873
54
  LLVM_DEBUG(dbgs() << "    fixed jCC: "; JmpI.dump());
874
54
}
875
876
void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
877
                                           MachineOperand &FlagUse,
878
6
                                           MachineInstr &CopyDefI) {
879
6
  // Just replace this copy with the original copy def.
880
6
  MRI->replaceRegWith(MI.getOperand(0).getReg(),
881
6
                      CopyDefI.getOperand(0).getReg());
882
6
  MI.eraseFromParent();
883
6
}
884
885
void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
886
    MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
887
    DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
888
5
    CondRegArray &CondRegs) {
889
5
  // This routine is only used to handle pseudos for setting a register to zero
890
5
  // or all ones based on CF. This is essentially the sign extended from 1-bit
891
5
  // form of SETB and modeled with the SETB_C* pseudos. They require special
892
5
  // handling as they aren't normal SETcc instructions and are lowered to an
893
5
  // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
894
5
  // they are only provided in reg-defining forms. A complicating factor is that
895
5
  // they can define many different register widths.
896
5
  assert(SetBI.getOperand(0).isReg() &&
897
5
         "Cannot have a non-register defined operand to this variant of SETB!");
898
5
899
5
  // Little helper to do the common final step of replacing the register def'ed
900
5
  // by this SETB instruction with a new register and removing the SETB
901
5
  // instruction.
902
5
  auto RewriteToReg = [&](unsigned Reg) {
903
5
    MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
904
5
    SetBI.eraseFromParent();
905
5
  };
906
5
907
5
  // Grab the register class used for this particular instruction.
908
5
  auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());
909
5
910
5
  MachineBasicBlock &MBB = *SetBI.getParent();
911
5
  auto SetPos = SetBI.getIterator();
912
5
  auto SetLoc = SetBI.getDebugLoc();
913
5
914
10
  auto AdjustReg = [&](unsigned Reg) {
915
10
    auto &OrigRC = *MRI->getRegClass(Reg);
916
10
    if (&OrigRC == &SetBRC)
917
3
      return Reg;
918
7
919
7
    unsigned NewReg;
920
7
921
7
    int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
922
7
    int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
923
7
    assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
924
7
    assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
925
7
    int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
926
7
                       X86::NoSubRegister, X86::sub_32bit};
927
7
928
7
    // If the original size is smaller than the target *and* is smaller than 4
929
7
    // bytes, we need to explicitly zero extend it. We always extend to 4-bytes
930
7
    // to maximize the chance of being able to CSE that operation and to avoid
931
7
    // partial dependency stalls extending to 2-bytes.
932
7
    if (OrigRegSize < TargetRegSize && 
OrigRegSize < 45
) {
933
4
      NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
934
4
      BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
935
4
          .addReg(Reg);
936
4
      if (&SetBRC == &X86::GR32RegClass)
937
2
        return NewReg;
938
2
      Reg = NewReg;
939
2
      OrigRegSize = 4;
940
2
    }
941
7
942
7
    NewReg = MRI->createVirtualRegister(&SetBRC);
943
5
    if (OrigRegSize < TargetRegSize) {
944
2
      BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
945
2
              NewReg)
946
2
          .addImm(0)
947
2
          .addReg(Reg)
948
2
          .addImm(SubRegIdx[OrigRegSize]);
949
3
    } else if (OrigRegSize > TargetRegSize) {
950
3
      if (TargetRegSize == 1 && 
!Subtarget->is64Bit()1
) {
951
0
        // Need to constrain the register class.
952
0
        MRI->constrainRegClass(Reg, &X86::GR32_ABCDRegClass);
953
0
      }
954
3
955
3
      BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY),
956
3
              NewReg)
957
3
          .addReg(Reg, 0, SubRegIdx[TargetRegSize]);
958
3
    } else {
959
0
      BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
960
0
          .addReg(Reg);
961
0
    }
962
5
    return NewReg;
963
7
  };
964
5
965
5
  unsigned &CondReg = CondRegs[X86::COND_B];
966
5
  if (!CondReg)
967
2
    CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);
968
5
969
5
  // Adjust the condition to have the desired register width by zero-extending
970
5
  // as needed.
971
5
  // FIXME: We should use a better API to avoid the local reference and using a
972
5
  // different variable here.
973
5
  unsigned ExtCondReg = AdjustReg(CondReg);
974
5
975
5
  // Now we need to turn this into a bitmask. We do this by subtracting it from
976
5
  // zero.
977
5
  unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
978
5
  BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
979
5
  ZeroReg = AdjustReg(ZeroReg);
980
5
981
5
  unsigned Sub;
982
5
  switch (SetBI.getOpcode()) {
983
5
  case X86::SETB_C8r:
984
1
    Sub = X86::SUB8rr;
985
1
    break;
986
5
987
5
  case X86::SETB_C16r:
988
1
    Sub = X86::SUB16rr;
989
1
    break;
990
5
991
5
  case X86::SETB_C32r:
992
2
    Sub = X86::SUB32rr;
993
2
    break;
994
5
995
5
  case X86::SETB_C64r:
996
1
    Sub = X86::SUB64rr;
997
1
    break;
998
5
999
5
  default:
1000
0
    llvm_unreachable("Invalid SETB_C* opcode!");
1001
5
  }
1002
5
  unsigned ResultReg = MRI->createVirtualRegister(&SetBRC);
1003
5
  BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
1004
5
      .addReg(ZeroReg)
1005
5
      .addReg(ExtCondReg);
1006
5
  return RewriteToReg(ResultReg);
1007
5
}
1008
1009
void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
1010
                                            MachineBasicBlock::iterator TestPos,
1011
                                            DebugLoc TestLoc,
1012
                                            MachineInstr &SetCCI,
1013
                                            MachineOperand &FlagUse,
1014
4
                                            CondRegArray &CondRegs) {
1015
4
  X86::CondCode Cond = X86::getCondFromSETCC(SetCCI);
1016
4
  // Note that we can't usefully rewrite this to the inverse without complex
1017
4
  // analysis of the users of the setCC. Largely we rely on duplicates which
1018
4
  // could have been avoided already being avoided here.
1019
4
  unsigned &CondReg = CondRegs[Cond];
1020
4
  if (!CondReg)
1021
4
    CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
1022
4
1023
4
  // Rewriting a register def is trivial: we just replace the register and
1024
4
  // remove the setcc.
1025
4
  if (!SetCCI.mayStore()) {
1026
3
    assert(SetCCI.getOperand(0).isReg() &&
1027
3
           "Cannot have a non-register defined operand to SETcc!");
1028
3
    MRI->replaceRegWith(SetCCI.getOperand(0).getReg(), CondReg);
1029
3
    SetCCI.eraseFromParent();
1030
3
    return;
1031
3
  }
1032
1
1033
1
  // Otherwise, we need to emit a store.
1034
1
  auto MIB = BuildMI(*SetCCI.getParent(), SetCCI.getIterator(),
1035
1
                     SetCCI.getDebugLoc(), TII->get(X86::MOV8mr));
1036
1
  // Copy the address operands.
1037
6
  for (int i = 0; i < X86::AddrNumOperands; 
++i5
)
1038
5
    MIB.add(SetCCI.getOperand(i));
1039
1
1040
1
  MIB.addReg(CondReg);
1041
1
1042
1
  MIB.setMemRefs(SetCCI.memoperands());
1043
1
1044
1
  SetCCI.eraseFromParent();
1045
1
  return;
1046
1
}