Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===---------------------------------------------------------------------===//
8
//
9
// This pass performs peephole optimizations to clean up ugly code
10
// sequences at the MachineInstruction layer.  It runs at the end of
11
// the SSA phases, following VSX swap removal.  A pass of dead code
12
// elimination follows this one for quick clean-up of any dead
13
// instructions introduced here.  Although we could do this as callbacks
14
// from the generic peephole pass, this would have a couple of bad
15
// effects:  it might remove optimization opportunities for VSX swap
16
// removal, and it would miss cleanups made possible following VSX
17
// swap removal.
18
//
19
//===---------------------------------------------------------------------===//
20
21
#include "PPC.h"
22
#include "PPCInstrBuilder.h"
23
#include "PPCInstrInfo.h"
24
#include "PPCMachineFunctionInfo.h"
25
#include "PPCTargetMachine.h"
26
#include "llvm/ADT/Statistic.h"
27
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
28
#include "llvm/CodeGen/MachineDominators.h"
29
#include "llvm/CodeGen/MachinePostDominators.h"
30
#include "llvm/CodeGen/MachineFunctionPass.h"
31
#include "llvm/CodeGen/MachineInstrBuilder.h"
32
#include "llvm/CodeGen/MachineRegisterInfo.h"
33
#include "llvm/Support/Debug.h"
34
#include "MCTargetDesc/PPCPredicates.h"
35
36
using namespace llvm;
37
38
#define DEBUG_TYPE "ppc-mi-peepholes"
39
40
STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
41
STATISTIC(MultiTOCSaves,
42
          "Number of functions with multiple TOC saves that must be kept");
43
STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
44
STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
45
STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
46
STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
47
STATISTIC(NumConvertedToImmediateForm,
48
          "Number of instructions converted to their immediate form");
49
STATISTIC(NumFunctionsEnteredInMIPeephole,
50
          "Number of functions entered in PPC MI Peepholes");
51
STATISTIC(NumFixedPointIterations,
52
          "Number of fixed-point iterations converting reg-reg instructions "
53
          "to reg-imm ones");
54
STATISTIC(NumRotatesCollapsed,
55
          "Number of pairs of rotate left, clear left/right collapsed");
56
STATISTIC(NumEXTSWAndSLDICombined,
57
          "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
58
59
static cl::opt<bool>
60
FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
61
                   cl::desc("Iterate to a fixed point when attempting to "
62
                            "convert reg-reg instructions to reg-imm"));
63
64
static cl::opt<bool>
65
ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
66
              cl::desc("Convert eligible reg+reg instructions to reg+imm"));
67
68
static cl::opt<bool>
69
    EnableSExtElimination("ppc-eliminate-signext",
70
                          cl::desc("enable elimination of sign-extensions"),
71
                          cl::init(false), cl::Hidden);
72
73
static cl::opt<bool>
74
    EnableZExtElimination("ppc-eliminate-zeroext",
75
                          cl::desc("enable elimination of zero-extensions"),
76
                          cl::init(false), cl::Hidden);
77
78
namespace {
79
80
struct PPCMIPeephole : public MachineFunctionPass {
81
82
  static char ID;
83
  const PPCInstrInfo *TII;
84
  MachineFunction *MF;
85
  MachineRegisterInfo *MRI;
86
87
1.67k
  PPCMIPeephole() : MachineFunctionPass(ID) {
88
1.67k
    initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
89
1.67k
  }
90
91
private:
92
  MachineDominatorTree *MDT;
93
  MachinePostDominatorTree *MPDT;
94
  MachineBlockFrequencyInfo *MBFI;
95
  uint64_t EntryFreq;
96
97
  // Initialize class variables.
98
  void initialize(MachineFunction &MFParm);
99
100
  // Perform peepholes.
101
  bool simplifyCode(void);
102
103
  // Perform peepholes.
104
  bool eliminateRedundantCompare(void);
105
  bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
106
  bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
107
  bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI);
108
  void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
109
                      MachineInstr *MI);
110
111
public:
112
113
1.63k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
114
1.63k
    AU.addRequired<MachineDominatorTree>();
115
1.63k
    AU.addRequired<MachinePostDominatorTree>();
116
1.63k
    AU.addRequired<MachineBlockFrequencyInfo>();
117
1.63k
    AU.addPreserved<MachineDominatorTree>();
118
1.63k
    AU.addPreserved<MachinePostDominatorTree>();
119
1.63k
    AU.addPreserved<MachineBlockFrequencyInfo>();
120
1.63k
    MachineFunctionPass::getAnalysisUsage(AU);
121
1.63k
  }
122
123
  // Main entry point for this pass.
124
10.4k
  bool runOnMachineFunction(MachineFunction &MF) override {
125
10.4k
    if (skipFunction(MF.getFunction()))
126
1
      return false;
127
10.4k
    initialize(MF);
128
10.4k
    return simplifyCode();
129
10.4k
  }
130
};
131
132
// Initialize class variables.
133
10.4k
void PPCMIPeephole::initialize(MachineFunction &MFParm) {
134
10.4k
  MF = &MFParm;
135
10.4k
  MRI = &MF->getRegInfo();
136
10.4k
  MDT = &getAnalysis<MachineDominatorTree>();
137
10.4k
  MPDT = &getAnalysis<MachinePostDominatorTree>();
138
10.4k
  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
139
10.4k
  EntryFreq = MBFI->getEntryFreq();
140
10.4k
  TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
141
10.4k
  LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
142
10.4k
  LLVM_DEBUG(MF->dump());
143
10.4k
}
144
145
static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
146
1.87k
                                      MachineRegisterInfo *MRI) {
147
1.87k
  assert(Op && "Invalid Operand!");
148
1.87k
  if (!Op->isReg())
149
0
    return nullptr;
150
1.87k
151
1.87k
  unsigned Reg = Op->getReg();
152
1.87k
  if (!TargetRegisterInfo::isVirtualRegister(Reg))
153
0
    return nullptr;
154
1.87k
155
1.87k
  return MRI->getVRegDef(Reg);
156
1.87k
}
157
158
// This function returns number of known zero bits in output of MI
159
// starting from the most significant bit.
160
static unsigned
161
0
getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
162
0
  unsigned Opcode = MI->getOpcode();
163
0
  if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
164
0
      Opcode == PPC::RLDCL  || Opcode == PPC::RLDCLo)
165
0
    return MI->getOperand(3).getImm();
166
0
167
0
  if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
168
0
       MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
169
0
    return MI->getOperand(3).getImm();
170
0
171
0
  if ((Opcode == PPC::RLWINM  || Opcode == PPC::RLWINMo ||
172
0
       Opcode == PPC::RLWNM   || Opcode == PPC::RLWNMo  ||
173
0
       Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
174
0
       MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
175
0
    return 32 + MI->getOperand(3).getImm();
176
0
177
0
  if (Opcode == PPC::ANDIo) {
178
0
    uint16_t Imm = MI->getOperand(2).getImm();
179
0
    return 48 + countLeadingZeros(Imm);
180
0
  }
181
0
182
0
  if (Opcode == PPC::CNTLZW  || Opcode == PPC::CNTLZWo ||
183
0
      Opcode == PPC::CNTTZW  || Opcode == PPC::CNTTZWo ||
184
0
      Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
185
0
    // The result ranges from 0 to 32.
186
0
    return 58;
187
0
188
0
  if (Opcode == PPC::CNTLZD  || Opcode == PPC::CNTLZDo ||
189
0
      Opcode == PPC::CNTTZD  || Opcode == PPC::CNTTZDo)
190
0
    // The result ranges from 0 to 64.
191
0
    return 57;
192
0
193
0
  if (Opcode == PPC::LHZ   || Opcode == PPC::LHZX  ||
194
0
      Opcode == PPC::LHZ8  || Opcode == PPC::LHZX8 ||
195
0
      Opcode == PPC::LHZU  || Opcode == PPC::LHZUX ||
196
0
      Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
197
0
    return 48;
198
0
199
0
  if (Opcode == PPC::LBZ   || Opcode == PPC::LBZX  ||
200
0
      Opcode == PPC::LBZ8  || Opcode == PPC::LBZX8 ||
201
0
      Opcode == PPC::LBZU  || Opcode == PPC::LBZUX ||
202
0
      Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
203
0
    return 56;
204
0
205
0
  if (TII->isZeroExtended(*MI))
206
0
    return 32;
207
0
208
0
  return 0;
209
0
}
210
211
// This function maintains a map for the pairs <TOC Save Instr, Keep>
212
// Each time a new TOC save is encountered, it checks if any of the existing
213
// ones are dominated by the new one. If so, it marks the existing one as
214
// redundant by setting it's entry in the map as false. It then adds the new
215
// instruction to the map with either true or false depending on if any
216
// existing instructions dominated the new one.
217
void PPCMIPeephole::UpdateTOCSaves(
218
29
  std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
219
29
  assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
220
29
  assert(MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
221
29
         "TOC-save removal only supported on ELFv2");
222
29
  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
223
29
224
29
  MachineBasicBlock *Entry = &MF->front();
225
29
  uint64_t CurrBlockFreq = MBFI->getBlockFreq(MI->getParent()).getFrequency();
226
29
227
29
  // If the block in which the TOC save resides is in a block that
228
29
  // post-dominates Entry, or a block that is hotter than entry (keep in mind
229
29
  // that early MachineLICM has already run so the TOC save won't be hoisted)
230
29
  // we can just do the save in the prologue.
231
29
  if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
232
13
    FI->setMustSaveTOC(true);
233
29
234
29
  // If we are saving the TOC in the prologue, all the TOC saves can be removed
235
29
  // from the code.
236
29
  if (FI->mustSaveTOC()) {
237
17
    for (auto &TOCSave : TOCSaves)
238
14
      TOCSave.second = false;
239
17
    // Add new instruction to map.
240
17
    TOCSaves[MI] = false;
241
17
    return;
242
17
  }
243
12
244
12
  bool Keep = true;
245
14
  for (auto It = TOCSaves.begin(); It != TOCSaves.end(); 
It++2
) {
246
2
    MachineInstr *CurrInst = It->first;
247
2
    // If new instruction dominates an existing one, mark existing one as
248
2
    // redundant.
249
2
    if (It->second && MDT->dominates(MI, CurrInst))
250
0
      It->second = false;
251
2
    // Check if the new instruction is redundant.
252
2
    if (MDT->dominates(CurrInst, MI)) {
253
0
      Keep = false;
254
0
      break;
255
0
    }
256
2
  }
257
12
  // Add new instruction to map.
258
12
  TOCSaves[MI] = Keep;
259
12
}
260
261
// Perform peephole optimizations.
262
10.4k
bool PPCMIPeephole::simplifyCode(void) {
263
10.4k
  bool Simplified = false;
264
10.4k
  MachineInstr* ToErase = nullptr;
265
10.4k
  std::map<MachineInstr *, bool> TOCSaves;
266
10.4k
  const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
267
10.4k
  NumFunctionsEnteredInMIPeephole++;
268
10.4k
  if (ConvertRegReg) {
269
10.4k
    // Fixed-point conversion of reg/reg instructions fed by load-immediate
270
10.4k
    // into reg/imm instructions. FIXME: This is expensive, control it with
271
10.4k
    // an option.
272
10.4k
    bool SomethingChanged = false;
273
10.5k
    do {
274
10.5k
      NumFixedPointIterations++;
275
10.5k
      SomethingChanged = false;
276
15.8k
      for (MachineBasicBlock &MBB : *MF) {
277
134k
        for (MachineInstr &MI : MBB) {
278
134k
          if (MI.isDebugInstr())
279
15
            continue;
280
134k
281
134k
          if (TII->convertToImmediateForm(MI)) {
282
169
            // We don't erase anything in case the def has other uses. Let DCE
283
169
            // remove it if it can be removed.
284
169
            LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
285
169
            LLVM_DEBUG(MI.dump());
286
169
            NumConvertedToImmediateForm++;
287
169
            SomethingChanged = true;
288
169
            Simplified = true;
289
169
            continue;
290
169
          }
291
134k
        }
292
15.8k
      }
293
10.5k
    } while (SomethingChanged && 
FixedPointRegToImm121
);
294
10.4k
  }
295
10.4k
296
15.7k
  for (MachineBasicBlock &MBB : *MF) {
297
132k
    for (MachineInstr &MI : MBB) {
298
132k
299
132k
      // If the previous instruction was marked for elimination,
300
132k
      // remove it now.
301
132k
      if (ToErase) {
302
230
        ToErase->eraseFromParent();
303
230
        ToErase = nullptr;
304
230
      }
305
132k
306
132k
      // Ignore debug instructions.
307
132k
      if (MI.isDebugInstr())
308
15
        continue;
309
132k
310
132k
      // Per-opcode peepholes.
311
132k
      switch (MI.getOpcode()) {
312
132k
313
132k
      default:
314
125k
        break;
315
132k
316
132k
      case PPC::STD: {
317
1.20k
        MachineFrameInfo &MFI = MF->getFrameInfo();
318
1.20k
        if (MFI.hasVarSizedObjects() ||
319
1.20k
            
!MF->getSubtarget<PPCSubtarget>().isELFv2ABI()1.20k
)
320
852
          break;
321
355
        // When encountering a TOC save instruction, call UpdateTOCSaves
322
355
        // to add it to the TOCSaves map and mark any existing TOC saves
323
355
        // it dominates as redundant.
324
355
        if (TII->isTOCSaveMI(MI))
325
29
          UpdateTOCSaves(TOCSaves, &MI);
326
355
        break;
327
355
      }
328
2.57k
      case PPC::XXPERMDI: {
329
2.57k
        // Perform simplifications of 2x64 vector swaps and splats.
330
2.57k
        // A swap is identified by an immediate value of 2, and a splat
331
2.57k
        // is identified by an immediate value of 0 or 3.
332
2.57k
        int Immed = MI.getOperand(3).getImm();
333
2.57k
334
2.57k
        if (Immed != 1) {
335
2.48k
336
2.48k
          // For each of these simplifications, we need the two source
337
2.48k
          // regs to match.  Unfortunately, MachineCSE ignores COPY and
338
2.48k
          // SUBREG_TO_REG, so for example we can see
339
2.48k
          //   XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
340
2.48k
          // We have to look through chains of COPY and SUBREG_TO_REG
341
2.48k
          // to find the real source values for comparison.
342
2.48k
          unsigned TrueReg1 =
343
2.48k
            TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
344
2.48k
          unsigned TrueReg2 =
345
2.48k
            TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
346
2.48k
347
2.48k
          if (TrueReg1 == TrueReg2
348
2.48k
              && 
TargetRegisterInfo::isVirtualRegister(TrueReg1)1.96k
) {
349
1.65k
            MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
350
1.65k
            unsigned DefOpc = DefMI ? DefMI->getOpcode() : 
00
;
351
1.65k
352
1.65k
            // If this is a splat fed by a splatting load, the splat is
353
1.65k
            // redundant. Replace with a copy. This doesn't happen directly due
354
1.65k
            // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
355
1.65k
            // a load of a double to a vector of 64-bit integers.
356
1.65k
            auto isConversionOfLoadAndSplat = [=]() -> bool {
357
109
              if (DefOpc != PPC::XVCVDPSXDS && 
DefOpc != PPC::XVCVDPUXDS105
)
358
101
                return false;
359
8
              unsigned DefReg =
360
8
                TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
361
8
              if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
362
8
                MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
363
8
                if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
364
8
                  return true;
365
0
              }
366
0
              return false;
367
0
            };
368
1.65k
            if (DefMI && (Immed == 0 || 
Immed == 31.58k
)) {
369
109
              if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
370
8
                LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
371
8
                                     "to load-and-splat/copy: ");
372
8
                LLVM_DEBUG(MI.dump());
373
8
                BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
374
8
                        MI.getOperand(0).getReg())
375
8
                    .add(MI.getOperand(1));
376
8
                ToErase = &MI;
377
8
                Simplified = true;
378
8
              }
379
109
            }
380
1.65k
381
1.65k
            // If this is a splat or a swap fed by another splat, we
382
1.65k
            // can replace it with a copy.
383
1.65k
            if (DefOpc == PPC::XXPERMDI) {
384
151
              unsigned FeedImmed = DefMI->getOperand(3).getImm();
385
151
              unsigned FeedReg1 =
386
151
                TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
387
151
              unsigned FeedReg2 =
388
151
                TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
389
151
390
151
              if ((FeedImmed == 0 || 
FeedImmed == 3119
) &&
FeedReg1 == FeedReg232
) {
391
29
                LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
392
29
                                     "to splat/copy: ");
393
29
                LLVM_DEBUG(MI.dump());
394
29
                BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
395
29
                        MI.getOperand(0).getReg())
396
29
                    .add(MI.getOperand(1));
397
29
                ToErase = &MI;
398
29
                Simplified = true;
399
29
              }
400
122
401
122
              // If this is a splat fed by a swap, we can simplify modify
402
122
              // the splat to splat the other value from the swap's input
403
122
              // parameter.
404
122
              else if ((Immed == 0 || 
Immed == 3118
)
405
122
                       && 
FeedImmed == 217
&&
FeedReg1 == FeedReg217
) {
406
17
                LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
407
17
                LLVM_DEBUG(MI.dump());
408
17
                MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
409
17
                MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
410
17
                MI.getOperand(3).setImm(3 - Immed);
411
17
                Simplified = true;
412
17
              }
413
105
414
105
              // If this is a swap fed by a swap, we can replace it
415
105
              // with a copy from the first swap's input.
416
105
              else if (Immed == 2 && FeedImmed == 2 && 
FeedReg1 == FeedReg2102
) {
417
102
                LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
418
102
                LLVM_DEBUG(MI.dump());
419
102
                BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
420
102
                        MI.getOperand(0).getReg())
421
102
                    .add(DefMI->getOperand(1));
422
102
                ToErase = &MI;
423
102
                Simplified = true;
424
102
              }
425
1.50k
            } else if ((Immed == 0 || 
Immed == 31.43k
) &&
DefOpc == PPC::XXPERMDIs85
&&
426
1.50k
                       
(28
DefMI->getOperand(2).getImm() == 028
||
427
28
                        
DefMI->getOperand(2).getImm() == 34
)) {
428
24
              // Splat fed by another splat - switch the output of the first
429
24
              // and remove the second.
430
24
              DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
431
24
              ToErase = &MI;
432
24
              Simplified = true;
433
24
              LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
434
24
              LLVM_DEBUG(MI.dump());
435
24
            }
436
1.65k
          }
437
2.48k
        }
438
2.57k
        break;
439
355
      }
440
355
      case PPC::VSPLTB:
441
222
      case PPC::VSPLTH:
442
222
      case PPC::XXSPLTW: {
443
222
        unsigned MyOpcode = MI.getOpcode();
444
222
        unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 
1184
:
238
;
445
222
        unsigned TrueReg =
446
222
          TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
447
222
        if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
448
60
          break;
449
162
        MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
450
162
        if (!DefMI)
451
0
          break;
452
162
        unsigned DefOpcode = DefMI->getOpcode();
453
162
        auto isConvertOfSplat = [=]() -> bool {
454
94
          if (DefOpcode != PPC::XVCVSPSXWS && 
DefOpcode != PPC::XVCVSPUXWS92
)
455
90
            return false;
456
4
          unsigned ConvReg = DefMI->getOperand(1).getReg();
457
4
          if (!TargetRegisterInfo::isVirtualRegister(ConvReg))
458
0
            return false;
459
4
          MachineInstr *Splt = MRI->getVRegDef(ConvReg);
460
4
          return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
461
4
            
Splt->getOpcode() == PPC::XXSPLTW0
);
462
4
        };
463
162
        bool AlreadySplat = (MyOpcode == DefOpcode) ||
464
162
          
(150
MyOpcode == PPC::VSPLTB150
&&
DefOpcode == PPC::VSPLTBs24
) ||
465
162
          
(134
MyOpcode == PPC::VSPLTH134
&&
DefOpcode == PPC::VSPLTHs14
) ||
466
162
          
(126
MyOpcode == PPC::XXSPLTW126
&&
DefOpcode == PPC::XXSPLTWs112
) ||
467
162
          
(110
MyOpcode == PPC::XXSPLTW110
&&
DefOpcode == PPC::LXVWSX96
) ||
468
162
          
(110
MyOpcode == PPC::XXSPLTW110
&&
DefOpcode == PPC::MTVSRWS96
)||
469
162
          
(108
MyOpcode == PPC::XXSPLTW108
&&
isConvertOfSplat()94
);
470
162
        // If the instruction[s] that feed this splat have already splat
471
162
        // the value, this splat is redundant.
472
162
        if (AlreadySplat) {
473
58
          LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
474
58
          LLVM_DEBUG(MI.dump());
475
58
          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
476
58
                  MI.getOperand(0).getReg())
477
58
              .add(MI.getOperand(OpNo));
478
58
          ToErase = &MI;
479
58
          Simplified = true;
480
58
        }
481
162
        // Splat fed by a shift. Usually when we align value to splat into
482
162
        // vector element zero.
483
162
        if (DefOpcode == PPC::XXSLDWI) {
484
0
          unsigned ShiftRes = DefMI->getOperand(0).getReg();
485
0
          unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
486
0
          unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
487
0
          unsigned ShiftImm = DefMI->getOperand(3).getImm();
488
0
          unsigned SplatImm = MI.getOperand(2).getImm();
489
0
          if (ShiftOp1 == ShiftOp2) {
490
0
            unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
491
0
            if (MRI->hasOneNonDBGUse(ShiftRes)) {
492
0
              LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
493
0
              LLVM_DEBUG(DefMI->dump());
494
0
              ToErase = DefMI;
495
0
            }
496
0
            Simplified = true;
497
0
            LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
498
0
                              << " to " << NewElem << " in instruction: ");
499
0
            LLVM_DEBUG(MI.dump());
500
0
            MI.getOperand(1).setReg(ShiftOp1);
501
0
            MI.getOperand(2).setImm(NewElem);
502
0
          }
503
0
        }
504
162
        break;
505
162
      }
506
162
      case PPC::XVCVDPSP: {
507
40
        // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
508
40
        unsigned TrueReg =
509
40
          TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
510
40
        if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
511
2
          break;
512
38
        MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
513
38
514
38
        // This can occur when building a vector of single precision or integer
515
38
        // values.
516
38
        if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
517
38
          unsigned DefsReg1 =
518
38
            TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
519
38
          unsigned DefsReg2 =
520
38
            TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
521
38
          if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
522
38
              !TargetRegisterInfo::isVirtualRegister(DefsReg2))
523
0
            break;
524
38
          MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
525
38
          MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
526
38
527
38
          if (!P1 || !P2)
528
0
            break;
529
38
530
38
          // Remove the passed FRSP instruction if it only feeds this MI and
531
38
          // set any uses of that FRSP (in this MI) to the source of the FRSP.
532
76
          
auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) 38
{
533
76
            if (RoundInstr->getOpcode() == PPC::FRSP &&
534
76
                
MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())28
) {
535
28
              Simplified = true;
536
28
              unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
537
28
              unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
538
28
              MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
539
140
              for (int i = 0, e = Use.getNumOperands(); i < e; 
++i112
)
540
112
                if (Use.getOperand(i).isReg() &&
541
112
                    
Use.getOperand(i).getReg() == FRSPDefines56
)
542
28
                  Use.getOperand(i).setReg(ConvReg1);
543
28
              LLVM_DEBUG(dbgs() << "Removing redundant FRSP:\n");
544
28
              LLVM_DEBUG(RoundInstr->dump());
545
28
              LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
546
28
              LLVM_DEBUG(MI.dump());
547
28
              LLVM_DEBUG(dbgs() << "Through instruction:\n");
548
28
              LLVM_DEBUG(DefMI->dump());
549
28
              RoundInstr->eraseFromParent();
550
28
            }
551
76
          };
552
38
553
38
          // If the input to XVCVDPSP is a vector that was built (even
554
38
          // partially) out of FRSP's, the FRSP(s) can safely be removed
555
38
          // since this instruction performs the same operation.
556
38
          if (P1 != P2) {
557
38
            removeFRSPIfPossible(P1);
558
38
            removeFRSPIfPossible(P2);
559
38
            break;
560
38
          }
561
0
          removeFRSPIfPossible(P1);
562
0
        }
563
38
        
break0
;
564
38
      }
565
75
      case PPC::EXTSH:
566
75
      case PPC::EXTSH8:
567
75
      case PPC::EXTSH8_32_64: {
568
75
        if (!EnableSExtElimination) break;
569
0
        unsigned NarrowReg = MI.getOperand(1).getReg();
570
0
        if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
571
0
          break;
572
0
573
0
        MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
574
0
        // If we've used a zero-extending load that we will sign-extend,
575
0
        // just do a sign-extending load.
576
0
        if (SrcMI->getOpcode() == PPC::LHZ ||
577
0
            SrcMI->getOpcode() == PPC::LHZX) {
578
0
          if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
579
0
            break;
580
0
          auto is64Bit = [] (unsigned Opcode) {
581
0
            return Opcode == PPC::EXTSH8;
582
0
          };
583
0
          auto isXForm = [] (unsigned Opcode) {
584
0
            return Opcode == PPC::LHZX;
585
0
          };
586
0
          auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
587
0
            if (is64Bit)
588
0
              if (isXForm) return PPC::LHAX8;
589
0
              else         return PPC::LHA8;
590
0
            else
591
0
              if (isXForm) return PPC::LHAX;
592
0
              else         return PPC::LHA;
593
0
          };
594
0
          unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
595
0
                                       isXForm(SrcMI->getOpcode()));
596
0
          LLVM_DEBUG(dbgs() << "Zero-extending load\n");
597
0
          LLVM_DEBUG(SrcMI->dump());
598
0
          LLVM_DEBUG(dbgs() << "and sign-extension\n");
599
0
          LLVM_DEBUG(MI.dump());
600
0
          LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
601
0
          SrcMI->setDesc(TII->get(Opc));
602
0
          SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
603
0
          ToErase = &MI;
604
0
          Simplified = true;
605
0
          NumEliminatedSExt++;
606
0
        }
607
0
        break;
608
0
      }
609
443
      case PPC::EXTSW:
610
443
      case PPC::EXTSW_32:
611
443
      case PPC::EXTSW_32_64: {
612
443
        if (!EnableSExtElimination) break;
613
0
        unsigned NarrowReg = MI.getOperand(1).getReg();
614
0
        if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
615
0
          break;
616
0
617
0
        MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
618
0
        // If we've used a zero-extending load that we will sign-extend,
619
0
        // just do a sign-extending load.
620
0
        if (SrcMI->getOpcode() == PPC::LWZ ||
621
0
            SrcMI->getOpcode() == PPC::LWZX) {
622
0
          if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
623
0
            break;
624
0
          auto is64Bit = [] (unsigned Opcode) {
625
0
            return Opcode == PPC::EXTSW || Opcode == PPC::EXTSW_32_64;
626
0
          };
627
0
          auto isXForm = [] (unsigned Opcode) {
628
0
            return Opcode == PPC::LWZX;
629
0
          };
630
0
          auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
631
0
            if (is64Bit)
632
0
              if (isXForm) return PPC::LWAX;
633
0
              else         return PPC::LWA;
634
0
            else
635
0
              if (isXForm) return PPC::LWAX_32;
636
0
              else         return PPC::LWA_32;
637
0
          };
638
0
          unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
639
0
                                       isXForm(SrcMI->getOpcode()));
640
0
          LLVM_DEBUG(dbgs() << "Zero-extending load\n");
641
0
          LLVM_DEBUG(SrcMI->dump());
642
0
          LLVM_DEBUG(dbgs() << "and sign-extension\n");
643
0
          LLVM_DEBUG(MI.dump());
644
0
          LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
645
0
          SrcMI->setDesc(TII->get(Opc));
646
0
          SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
647
0
          ToErase = &MI;
648
0
          Simplified = true;
649
0
          NumEliminatedSExt++;
650
0
        } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
651
0
                   TII->isSignExtended(*SrcMI)) {
652
0
          // We can eliminate EXTSW if the input is known to be already
653
0
          // sign-extended.
654
0
          LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
655
0
          unsigned TmpReg =
656
0
            MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
657
0
          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
658
0
                  TmpReg);
659
0
          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
660
0
                  MI.getOperand(0).getReg())
661
0
              .addReg(TmpReg)
662
0
              .addReg(NarrowReg)
663
0
              .addImm(PPC::sub_32);
664
0
          ToErase = &MI;
665
0
          Simplified = true;
666
0
          NumEliminatedSExt++;
667
0
        }
668
0
        break;
669
0
      }
670
1.05k
      case PPC::RLDICL: {
671
1.05k
        // We can eliminate RLDICL (e.g. for zero-extension)
672
1.05k
        // if all bits to clear are already zero in the input.
673
1.05k
        // This code assume following code sequence for zero-extension.
674
1.05k
        //   %6 = COPY %5:sub_32; (optional)
675
1.05k
        //   %8 = IMPLICIT_DEF;
676
1.05k
        //   %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
677
1.05k
        if (!EnableZExtElimination) break;
678
0
679
0
        if (MI.getOperand(2).getImm() != 0)
680
0
          break;
681
0
682
0
        unsigned SrcReg = MI.getOperand(1).getReg();
683
0
        if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
684
0
          break;
685
0
686
0
        MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
687
0
        if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
688
0
              SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
689
0
          break;
690
0
691
0
        MachineInstr *ImpDefMI, *SubRegMI;
692
0
        ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
693
0
        SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
694
0
        if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
695
0
696
0
        SrcMI = SubRegMI;
697
0
        if (SubRegMI->getOpcode() == PPC::COPY) {
698
0
          unsigned CopyReg = SubRegMI->getOperand(1).getReg();
699
0
          if (TargetRegisterInfo::isVirtualRegister(CopyReg))
700
0
            SrcMI = MRI->getVRegDef(CopyReg);
701
0
        }
702
0
703
0
        unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII);
704
0
        if (MI.getOperand(3).getImm() <= KnownZeroCount) {
705
0
          LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
706
0
          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
707
0
                  MI.getOperand(0).getReg())
708
0
              .addReg(SrcReg);
709
0
          ToErase = &MI;
710
0
          Simplified = true;
711
0
          NumEliminatedZExt++;
712
0
        }
713
0
        break;
714
0
      }
715
0
716
0
      // TODO: Any instruction that has an immediate form fed only by a PHI
717
0
      // whose operands are all load immediate can be folded away. We currently
718
0
      // do this for ADD instructions, but should expand it to arithmetic and
719
0
      // binary instructions with immediate forms in the future.
720
851
      case PPC::ADD4:
721
851
      case PPC::ADD8: {
722
1.70k
        auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
723
1.70k
          assert(PhiOp && "Invalid Operand!");
724
1.70k
          MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
725
1.70k
726
1.70k
          return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
727
1.70k
                 
MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg())93
;
728
1.70k
        };
729
851
730
851
        auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
731
851
                                            MachineOperand *PhiOp) {
732
52
          assert(PhiOp && "Invalid Operand!");
733
52
          assert(DominatorOp && "Invalid Operand!");
734
52
          MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
735
52
          MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
736
52
737
52
          // Note: the vregs only show up at odd indices position of PHI Node,
738
52
          // the even indices position save the BB info.
739
62
          for (unsigned i = 1; i < DefPhiMI->getNumOperands(); 
i += 210
) {
740
58
            MachineInstr *LiMI =
741
58
                getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
742
58
            if (!LiMI ||
743
58
                (LiMI->getOpcode() != PPC::LI && 
LiMI->getOpcode() != PPC::LI827
)
744
58
                || 
!MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg())41
||
745
58
                
!MDT->dominates(DefDomMI, LiMI)38
)
746
48
              return false;
747
58
          }
748
52
749
52
          
return true4
;
750
52
        };
751
851
752
851
        MachineOperand Op1 = MI.getOperand(1);
753
851
        MachineOperand Op2 = MI.getOperand(2);
754
851
        if (isSingleUsePHI(&Op2) && 
dominatesAllSingleUseLIs(&Op1, &Op2)39
)
755
0
          std::swap(Op1, Op2);
756
851
        else if (!isSingleUsePHI(&Op1) || 
!dominatesAllSingleUseLIs(&Op2, &Op1)13
)
757
847
          break; // We don't have an ADD fed by LI's that can be transformed
758
4
759
4
        // Now we know that Op1 is the PHI node and Op2 is the dominator
760
4
        unsigned DominatorReg = Op2.getReg();
761
4
762
4
        const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
763
4
                                             ? &PPC::G8RC_and_G8RC_NOX0RegClass
764
4
                                             : 
&PPC::GPRC_and_GPRC_NOR0RegClass0
;
765
4
        MRI->setRegClass(DominatorReg, TRC);
766
4
767
4
        // replace LIs with ADDIs
768
4
        MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
769
14
        for (unsigned i = 1; i < DefPhiMI->getNumOperands(); 
i += 210
) {
770
10
          MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
771
10
          LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
772
10
          LLVM_DEBUG(LiMI->dump());
773
10
774
10
          // There could be repeated registers in the PHI, e.g: %1 =
775
10
          // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
776
10
          // already replaced the def instruction, skip.
777
10
          if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
778
0
            continue;
779
10
780
10
          assert((LiMI->getOpcode() == PPC::LI ||
781
10
                  LiMI->getOpcode() == PPC::LI8) &&
782
10
                 "Invalid Opcode!");
783
10
          auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
784
10
          LiMI->RemoveOperand(1);                    // remove the imm of LI
785
10
          LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? 
PPC::ADDI0
786
10
                                                              : PPC::ADDI8));
787
10
          MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
788
10
              .addReg(DominatorReg)
789
10
              .addImm(LiImm); // restore the imm of LI
790
10
          LLVM_DEBUG(LiMI->dump());
791
10
        }
792
4
793
4
        // Replace ADD with COPY
794
4
        LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
795
4
        LLVM_DEBUG(MI.dump());
796
4
        BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
797
4
                MI.getOperand(0).getReg())
798
4
            .add(Op1);
799
4
        ToErase = &MI;
800
4
        Simplified = true;
801
4
        NumOptADDLIs++;
802
4
        break;
803
4
      }
804
799
      case PPC::RLDICR: {
805
799
        Simplified |= emitRLDICWhenLoweringJumpTables(MI) ||
806
799
                      
combineSEXTAndSHL(MI, ToErase)789
;
807
799
        break;
808
4
      }
809
132k
      }
810
132k
    }
811
15.7k
812
15.7k
    // If the last instruction was marked for elimination,
813
15.7k
    // remove it now.
814
15.7k
    if (ToErase) {
815
0
      ToErase->eraseFromParent();
816
0
      ToErase = nullptr;
817
0
    }
818
15.7k
  }
819
10.4k
820
10.4k
  // Eliminate all the TOC save instructions which are redundant.
821
10.4k
  Simplified |= eliminateRedundantTOCSaves(TOCSaves);
822
10.4k
  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
823
10.4k
  if (FI->mustSaveTOC())
824
11
    NumTOCSavesInPrologue++;
825
10.4k
826
10.4k
  // We try to eliminate redundant compare instruction.
827
10.4k
  Simplified |= eliminateRedundantCompare();
828
10.4k
829
10.4k
  return Simplified;
830
10.4k
}
831
832
// helper functions for eliminateRedundantCompare
833
126
static bool isEqOrNe(MachineInstr *BI) {
834
126
  PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
835
126
  unsigned PredCond = PPC::getPredicateCondition(Pred);
836
126
  return (PredCond == PPC::PRED_EQ || 
PredCond == PPC::PRED_NE94
);
837
126
}
838
839
280
static bool isSupportedCmpOp(unsigned opCode) {
840
280
  return (opCode == PPC::CMPLD  || 
opCode == PPC::CMPD257
||
841
280
          
opCode == PPC::CMPLW251
||
opCode == PPC::CMPW199
||
842
280
          
opCode == PPC::CMPLDI192
||
opCode == PPC::CMPDI134
||
843
280
          
opCode == PPC::CMPLWI124
||
opCode == PPC::CMPWI27
);
844
280
}
845
846
272
static bool is64bitCmpOp(unsigned opCode) {
847
272
  return (opCode == PPC::CMPLD  || 
opCode == PPC::CMPD249
||
848
272
          
opCode == PPC::CMPLDI243
||
opCode == PPC::CMPDI185
);
849
272
}
850
851
112
static bool isSignedCmpOp(unsigned opCode) {
852
112
  return (opCode == PPC::CMPD  || opCode == PPC::CMPW ||
853
112
          opCode == PPC::CMPDI || 
opCode == PPC::CMPWI100
);
854
112
}
855
856
42
static unsigned getSignedCmpOpCode(unsigned opCode) {
857
42
  if (opCode == PPC::CMPLD)  
return PPC::CMPD9
;
858
33
  if (opCode == PPC::CMPLW)  
return PPC::CMPW7
;
859
26
  if (opCode == PPC::CMPLDI) 
return PPC::CMPDI9
;
860
17
  if (opCode == PPC::CMPLWI) return PPC::CMPWI;
861
0
  return opCode;
862
0
}
863
864
// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
865
// (LT x) to (LE x-1)
866
56
static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
867
56
  uint64_t Imm = CMPI->getOperand(2).getImm();
868
56
  bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
869
56
  if ((!SignedCmp && 
Imm == 040
) ||
(52
SignedCmp52
&&
Imm == 0x800016
))
870
4
    return 0;
871
52
872
52
  PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
873
52
  unsigned PredCond = PPC::getPredicateCondition(Pred);
874
52
  unsigned PredHint = PPC::getPredicateHint(Pred);
875
52
  if (PredCond == PPC::PRED_GE)
876
0
    return PPC::getPredicate(PPC::PRED_GT, PredHint);
877
52
  if (PredCond == PPC::PRED_LT)
878
10
    return PPC::getPredicate(PPC::PRED_LE, PredHint);
879
42
880
42
  return 0;
881
42
}
882
883
// We can increment immediate x in (GT x) by changing it to (GE x+1) or
884
// (LE x) to (LT x+1)
885
56
static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
886
56
  uint64_t Imm = CMPI->getOperand(2).getImm();
887
56
  bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
888
56
  if ((!SignedCmp && 
Imm == 0xFFFF40
) || (SignedCmp &&
Imm == 0x7FFF16
))
889
0
    return 0;
890
56
891
56
  PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
892
56
  unsigned PredCond = PPC::getPredicateCondition(Pred);
893
56
  unsigned PredHint = PPC::getPredicateHint(Pred);
894
56
  if (PredCond == PPC::PRED_GT)
895
18
    return PPC::getPredicate(PPC::PRED_GE, PredHint);
896
38
  if (PredCond == PPC::PRED_LE)
897
0
    return PPC::getPredicate(PPC::PRED_LT, PredHint);
898
38
899
38
  return 0;
900
38
}
901
902
// This takes a Phi node and returns a register value for the specified BB.
903
static unsigned getIncomingRegForBlock(MachineInstr *Phi,
904
5
                                       MachineBasicBlock *MBB) {
905
7
  for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; 
I += 22
) {
906
7
    MachineOperand &MO = Phi->getOperand(I);
907
7
    if (MO.getMBB() == MBB)
908
5
      return Phi->getOperand(I-1).getReg();
909
7
  }
910
5
  
llvm_unreachable0
("invalid src basic block for this Phi node\n");
911
5
  
return 00
;
912
5
}
913
914
// This function tracks the source of the register through register copy.
915
// If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
916
// assuming that the control comes from BB1 into BB2.
917
static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
918
300
                           MachineBasicBlock *BB2, MachineRegisterInfo *MRI) {
919
300
  unsigned SrcReg = Reg;
920
303
  while (1) {
921
303
    unsigned NextReg = SrcReg;
922
303
    MachineInstr *Inst = MRI->getVRegDef(SrcReg);
923
303
    if (BB1 && 
Inst->getOpcode() == PPC::PHI150
&&
Inst->getParent() == BB25
) {
924
3
      NextReg = getIncomingRegForBlock(Inst, BB1);
925
3
      // We track through PHI only once to avoid infinite loop.
926
3
      BB1 = nullptr;
927
3
    }
928
300
    else if (Inst->isFullCopy())
929
70
      NextReg = Inst->getOperand(1).getReg();
930
303
    if (NextReg == SrcReg || 
!TargetRegisterInfo::isVirtualRegister(NextReg)73
)
931
300
      break;
932
3
    SrcReg = NextReg;
933
3
  }
934
300
  return SrcReg;
935
300
}
936
937
static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
938
                                          MachineBasicBlock *&PredMBB,
939
                                          MachineBasicBlock *&MBBtoMoveCmp,
940
15.7k
                                          MachineRegisterInfo *MRI) {
941
15.7k
942
16.3k
  auto isEligibleBB = [&](MachineBasicBlock &BB) {
943
16.3k
    auto BII = BB.getFirstInstrTerminator();
944
16.3k
    // We optimize BBs ending with a conditional branch.
945
16.3k
    // We check only for BCC here, not BCCLR, because BCCLR
946
16.3k
    // will be formed only later in the pipeline.
947
16.3k
    if (BB.succ_size() == 2 &&
948
16.3k
        
BII != BB.instr_end()3.16k
&&
949
16.3k
        
(*BII).getOpcode() == PPC::BCC3.16k
&&
950
16.3k
        
(*BII).getOperand(1).isReg()2.13k
) {
951
2.13k
      // We optimize only if the condition code is used only by one BCC.
952
2.13k
      unsigned CndReg = (*BII).getOperand(1).getReg();
953
2.13k
      if (!TargetRegisterInfo::isVirtualRegister(CndReg) ||
954
2.13k
          
!MRI->hasOneNonDBGUse(CndReg)1.23k
)
955
992
        return false;
956
1.14k
957
1.14k
      MachineInstr *CMPI = MRI->getVRegDef(CndReg);
958
1.14k
      // We assume compare and branch are in the same BB for ease of analysis.
959
1.14k
      if (CMPI->getParent() != &BB)
960
14
        return false;
961
1.13k
962
1.13k
      // We skip this BB if a physical register is used in comparison.
963
1.13k
      for (MachineOperand &MO : CMPI->operands())
964
3.29k
        if (MO.isReg() && 
!TargetRegisterInfo::isVirtualRegister(MO.getReg())2.55k
)
965
110
          return false;
966
1.13k
967
1.13k
      
return true1.02k
;
968
14.1k
    }
969
14.1k
    return false;
970
14.1k
  };
971
15.7k
972
15.7k
  // If this BB has more than one successor, we can create a new BB and
973
15.7k
  // move the compare instruction in the new BB.
974
15.7k
  // So far, we do not move compare instruction to a BB having multiple
975
15.7k
  // successors to avoid potentially increasing code size.
976
15.7k
  auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
977
172
    return BB.succ_size() == 1;
978
172
  };
979
15.7k
980
15.7k
  if (!isEligibleBB(MBB))
981
14.9k
    return false;
982
717
983
717
  unsigned NumPredBBs = MBB.pred_size();
984
717
  if (NumPredBBs == 1) {
985
221
    MachineBasicBlock *TmpMBB = *MBB.pred_begin();
986
221
    if (isEligibleBB(*TmpMBB)) {
987
132
      PredMBB = TmpMBB;
988
132
      MBBtoMoveCmp = nullptr;
989
132
      return true;
990
132
    }
991
496
  }
992
496
  else if (NumPredBBs == 2) {
993
210
    // We check for partially redundant case.
994
210
    // So far, we support cases with only two predecessors
995
210
    // to avoid increasing the number of instructions.
996
210
    MachineBasicBlock::pred_iterator PI = MBB.pred_begin();
997
210
    MachineBasicBlock *Pred1MBB = *PI;
998
210
    MachineBasicBlock *Pred2MBB = *(PI+1);
999
210
1000
210
    if (isEligibleBB(*Pred1MBB) && 
isEligibleForMoveCmp(*Pred2MBB)31
) {
1001
21
      // We assume Pred1MBB is the BB containing the compare to be merged and
1002
21
      // Pred2MBB is the BB to which we will append a compare instruction.
1003
21
      // Hence we can proceed as is.
1004
21
    }
1005
189
    else if (isEligibleBB(*Pred2MBB) && 
isEligibleForMoveCmp(*Pred1MBB)141
) {
1006
131
      // We need to swap Pred1MBB and Pred2MBB to canonicalize.
1007
131
      std::swap(Pred1MBB, Pred2MBB);
1008
131
    }
1009
58
    else return false;
1010
152
1011
152
    // Here, Pred2MBB is the BB to which we need to append a compare inst.
1012
152
    // We cannot move the compare instruction if operands are not available
1013
152
    // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
1014
152
    MachineInstr *BI = &*MBB.getFirstInstrTerminator();
1015
152
    MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
1016
178
    for (int I = 1; I <= 2; 
I++26
)
1017
167
      if (CMPI->getOperand(I).isReg()) {
1018
161
        MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
1019
161
        if (Inst->getParent() == &MBB && 
Inst->getOpcode() != PPC::PHI147
)
1020
141
          return false;
1021
161
      }
1022
152
1023
152
    PredMBB = Pred1MBB;
1024
11
    MBBtoMoveCmp = Pred2MBB;
1025
11
    return true;
1026
375
  }
1027
375
1028
375
  return false;
1029
375
}
1030
1031
// This function will iterate over the input map containing a pair of TOC save
1032
// instruction and a flag. The flag will be set to false if the TOC save is
1033
// proven redundant. This function will erase from the basic block all the TOC
1034
// saves marked as redundant.
1035
bool PPCMIPeephole::eliminateRedundantTOCSaves(
1036
10.4k
    std::map<MachineInstr *, bool> &TOCSaves) {
1037
10.4k
  bool Simplified = false;
1038
10.4k
  int NumKept = 0;
1039
10.4k
  for (auto TOCSave : TOCSaves) {
1040
29
    if (!TOCSave.second) {
1041
21
      TOCSave.first->eraseFromParent();
1042
21
      RemoveTOCSave++;
1043
21
      Simplified = true;
1044
21
    } else {
1045
8
      NumKept++;
1046
8
    }
1047
29
  }
1048
10.4k
1049
10.4k
  if (NumKept > 1)
1050
1
    MultiTOCSaves++;
1051
10.4k
1052
10.4k
  return Simplified;
1053
10.4k
}
1054
1055
// If multiple conditional branches are executed based on the (essentially)
1056
// same comparison, we merge compare instructions into one and make multiple
1057
// conditional branches on this comparison.
1058
// For example,
1059
//   if (a == 0) { ... }
1060
//   else if (a < 0) { ... }
1061
// can be executed by one compare and two conditional branches instead of
1062
// two pairs of a compare and a conditional branch.
1063
//
1064
// This method merges two compare instructions in two MBBs and modifies the
1065
// compare and conditional branch instructions if needed.
1066
// For the above example, the input for this pass looks like:
1067
//   cmplwi r3, 0
1068
//   beq    0, .LBB0_3
1069
//   cmpwi  r3, -1
1070
//   bgt    0, .LBB0_4
1071
// So, before merging two compares, we need to modify these instructions as
1072
//   cmpwi  r3, 0       ; cmplwi and cmpwi yield same result for beq
1073
//   beq    0, .LBB0_3
1074
//   cmpwi  r3, 0       ; greather than -1 means greater or equal to 0
1075
//   bge    0, .LBB0_4
1076
1077
10.4k
bool PPCMIPeephole::eliminateRedundantCompare(void) {
1078
10.4k
  bool Simplified = false;
1079
10.4k
1080
15.7k
  for (MachineBasicBlock &MBB2 : *MF) {
1081
15.7k
    MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1082
15.7k
1083
15.7k
    // For fully redundant case, we select two basic blocks MBB1 and MBB2
1084
15.7k
    // as an optimization target if
1085
15.7k
    // - both MBBs end with a conditional branch,
1086
15.7k
    // - MBB1 is the only predecessor of MBB2, and
1087
15.7k
    // - compare does not take a physical register as a operand in both MBBs.
1088
15.7k
    // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1089
15.7k
    //
1090
15.7k
    // As partially redundant case, we additionally handle if MBB2 has one
1091
15.7k
    // additional predecessor, which has only one successor (MBB2).
1092
15.7k
    // In this case, we move the compare instruction originally in MBB2 into
1093
15.7k
    // MBBtoMoveCmp. This partially redundant case is typically appear by
1094
15.7k
    // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1095
15.7k
    //
1096
15.7k
    // Overview of CFG of related basic blocks
1097
15.7k
    // Fully redundant case        Partially redundant case
1098
15.7k
    //   --------                   ----------------  --------
1099
15.7k
    //   | MBB1 | (w/ 2 succ)       | MBBtoMoveCmp |  | MBB1 | (w/ 2 succ)
1100
15.7k
    //   --------                   ----------------  --------
1101
15.7k
    //      |    \                     (w/ 1 succ) \     |    \
1102
15.7k
    //      |     \                                 \    |     \
1103
15.7k
    //      |                                        \   |
1104
15.7k
    //   --------                                     --------
1105
15.7k
    //   | MBB2 | (w/ 1 pred                          | MBB2 | (w/ 2 pred
1106
15.7k
    //   -------- and 2 succ)                         -------- and 2 succ)
1107
15.7k
    //      |    \                                       |    \
1108
15.7k
    //      |     \                                      |     \
1109
15.7k
    //
1110
15.7k
    if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1111
15.5k
      continue;
1112
143
1113
143
    MachineInstr *BI1   = &*MBB1->getFirstInstrTerminator();
1114
143
    MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1115
143
1116
143
    MachineInstr *BI2   = &*MBB2.getFirstInstrTerminator();
1117
143
    MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1118
143
    bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1119
143
1120
143
    // We cannot optimize an unsupported compare opcode or
1121
143
    // a mix of 32-bit and 64-bit comaprisons
1122
143
    if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1123
143
        
!isSupportedCmpOp(CMPI2->getOpcode())137
||
1124
143
        
is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode())136
)
1125
16
      continue;
1126
127
1127
127
    unsigned NewOpCode = 0;
1128
127
    unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1129
127
    int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1130
127
    bool SwapOperands = false;
1131
127
1132
127
    if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1133
43
      // Typically, unsigned comparison is used for equality check, but
1134
43
      // we replace it with a signed comparison if the comparison
1135
43
      // to be merged is a signed comparison.
1136
43
      // In other cases of opcode mismatch, we cannot optimize this.
1137
43
1138
43
      // We cannot change opcode when comparing against an immediate
1139
43
      // if the most significant bit of the immediate is one
1140
43
      // due to the difference in sign extension.
1141
46
      auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1142
46
        if (!I->getOperand(2).isImm())
1143
16
          return false;
1144
30
        int16_t Imm = (int16_t)I->getOperand(2).getImm();
1145
30
        return Imm < 0;
1146
30
      };
1147
43
1148
43
      if (isEqOrNe(BI2) && 
!CmpAgainstImmWithSignBit(CMPI2)27
&&
1149
43
          
CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode())25
)
1150
18
        NewOpCode = CMPI1->getOpcode();
1151
25
      else if (isEqOrNe(BI1) && 
!CmpAgainstImmWithSignBit(CMPI1)19
&&
1152
25
               
getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode()17
)
1153
11
        NewOpCode = CMPI2->getOpcode();
1154
14
      else continue;
1155
113
    }
1156
113
1157
113
    if (CMPI1->getOperand(2).isReg() && 
CMPI2->getOperand(2).isReg()37
) {
1158
37
      // In case of comparisons between two registers, these two registers
1159
37
      // must be same to merge two comparisons.
1160
37
      unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1161
37
                                         nullptr, nullptr, MRI);
1162
37
      unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1163
37
                                         nullptr, nullptr, MRI);
1164
37
      unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1165
37
                                         MBB1, &MBB2, MRI);
1166
37
      unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1167
37
                                         MBB1, &MBB2, MRI);
1168
37
1169
37
      if (Cmp1Operand1 == Cmp2Operand1 && 
Cmp1Operand2 == Cmp2Operand24
) {
1170
4
        // Same pair of registers in the same order; ready to merge as is.
1171
4
      }
1172
33
      else if (Cmp1Operand1 == Cmp2Operand2 && 
Cmp1Operand2 == Cmp2Operand112
) {
1173
10
        // Same pair of registers in different order.
1174
10
        // We reverse the predicate to merge compare instructions.
1175
10
        PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(0).getImm();
1176
10
        NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1177
10
        // In case of partial redundancy, we need to swap operands
1178
10
        // in another compare instruction.
1179
10
        SwapOperands = true;
1180
10
      }
1181
23
      else continue;
1182
76
    }
1183
76
    else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1184
76
      // In case of comparisons between a register and an immediate,
1185
76
      // the operand register must be same for two compare instructions.
1186
76
      unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1187
76
                                         nullptr, nullptr, MRI);
1188
76
      unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1189
76
                                         MBB1, &MBB2, MRI);
1190
76
      if (Cmp1Operand1 != Cmp2Operand1)
1191
39
        continue;
1192
37
1193
37
      NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1194
37
      NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1195
37
1196
37
      // If immediate are not same, we try to adjust by changing predicate;
1197
37
      // e.g. GT imm means GE (imm+1).
1198
37
      if (Imm1 != Imm2 && 
(35
!isEqOrNe(BI2)35
||
!isEqOrNe(BI1)23
)) {
1199
30
        int Diff = Imm1 - Imm2;
1200
30
        if (Diff < -2 || 
Diff > 228
)
1201
2
          continue;
1202
28
1203
28
        unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1204
28
        unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1205
28
        unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1206
28
        unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1207
28
        if (Diff == 2) {
1208
0
          if (PredToInc2 && PredToDec1) {
1209
0
            NewPredicate2 = PredToInc2;
1210
0
            NewPredicate1 = PredToDec1;
1211
0
            NewImm2++;
1212
0
            NewImm1--;
1213
0
          }
1214
0
        }
1215
28
        else if (Diff == 1) {
1216
20
          if (PredToInc2) {
1217
10
            NewImm2++;
1218
10
            NewPredicate2 = PredToInc2;
1219
10
          }
1220
10
          else if (PredToDec1) {
1221
10
            NewImm1--;
1222
10
            NewPredicate1 = PredToDec1;
1223
10
          }
1224
20
        }
1225
8
        else if (Diff == -1) {
1226
8
          if (PredToDec2) {
1227
0
            NewImm2--;
1228
0
            NewPredicate2 = PredToDec2;
1229
0
          }
1230
8
          else if (PredToInc1) {
1231
8
            NewImm1++;
1232
8
            NewPredicate1 = PredToInc1;
1233
8
          }
1234
8
        }
1235
0
        else if (Diff == -2) {
1236
0
          if (PredToDec2 && PredToInc1) {
1237
0
            NewPredicate2 = PredToDec2;
1238
0
            NewPredicate1 = PredToInc1;
1239
0
            NewImm2--;
1240
0
            NewImm1++;
1241
0
          }
1242
0
        }
1243
28
      }
1244
37
1245
37
      // We cannot merge two compares if the immediates are not same.
1246
37
      
if (35
NewImm2 != NewImm135
)
1247
5
        continue;
1248
44
    }
1249
44
1250
44
    LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1251
44
    LLVM_DEBUG(CMPI1->dump());
1252
44
    LLVM_DEBUG(BI1->dump());
1253
44
    LLVM_DEBUG(CMPI2->dump());
1254
44
    LLVM_DEBUG(BI2->dump());
1255
44
1256
44
    // We adjust opcode, predicates and immediate as we determined above.
1257
44
    if (NewOpCode != 0 && 
NewOpCode != CMPI1->getOpcode()28
) {
1258
10
      CMPI1->setDesc(TII->get(NewOpCode));
1259
10
    }
1260
44
    if (NewPredicate1) {
1261
18
      BI1->getOperand(0).setImm(NewPredicate1);
1262
18
    }
1263
44
    if (NewPredicate2) {
1264
20
      BI2->getOperand(0).setImm(NewPredicate2);
1265
20
    }
1266
44
    if (NewImm1 != Imm1) {
1267
18
      CMPI1->getOperand(2).setImm(NewImm1);
1268
18
    }
1269
44
1270
44
    if (IsPartiallyRedundant) {
1271
2
      // We touch up the compare instruction in MBB2 and move it to
1272
2
      // a previous BB to handle partially redundant case.
1273
2
      if (SwapOperands) {
1274
0
        unsigned Op1 = CMPI2->getOperand(1).getReg();
1275
0
        unsigned Op2 = CMPI2->getOperand(2).getReg();
1276
0
        CMPI2->getOperand(1).setReg(Op2);
1277
0
        CMPI2->getOperand(2).setReg(Op1);
1278
0
      }
1279
2
      if (NewImm2 != Imm2)
1280
0
        CMPI2->getOperand(2).setImm(NewImm2);
1281
2
1282
6
      for (int I = 1; I <= 2; 
I++4
) {
1283
4
        if (CMPI2->getOperand(I).isReg()) {
1284
2
          MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1285
2
          if (Inst->getParent() != &MBB2)
1286
0
            continue;
1287
2
1288
2
          assert(Inst->getOpcode() == PPC::PHI &&
1289
2
                 "We cannot support if an operand comes from this BB.");
1290
2
          unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1291
2
          CMPI2->getOperand(I).setReg(SrcReg);
1292
2
        }
1293
4
      }
1294
2
      auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1295
2
      MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1296
2
1297
2
      DebugLoc DL = CMPI2->getDebugLoc();
1298
2
      unsigned NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1299
2
      BuildMI(MBB2, MBB2.begin(), DL,
1300
2
              TII->get(PPC::PHI), NewVReg)
1301
2
        .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1302
2
        .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1303
2
      BI2->getOperand(1).setReg(NewVReg);
1304
2
    }
1305
42
    else {
1306
42
      // We finally eliminate compare instruction in MBB2.
1307
42
      BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1308
42
      CMPI2->eraseFromParent();
1309
42
    }
1310
44
    BI2->getOperand(1).setIsKill(true);
1311
44
    BI1->getOperand(1).setIsKill(false);
1312
44
1313
44
    LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1314
44
    LLVM_DEBUG(CMPI1->dump());
1315
44
    LLVM_DEBUG(BI1->dump());
1316
44
    LLVM_DEBUG(BI2->dump());
1317
44
    if (IsPartiallyRedundant) {
1318
2
      LLVM_DEBUG(dbgs() << "The following compare is moved into "
1319
2
                        << printMBBReference(*MBBtoMoveCmp)
1320
2
                        << " to handle partial redundancy.\n");
1321
2
      LLVM_DEBUG(CMPI2->dump());
1322
2
    }
1323
44
1324
44
    Simplified = true;
1325
44
  }
1326
10.4k
1327
10.4k
  return Simplified;
1328
10.4k
}
1329
1330
// We miss the opportunity to emit an RLDIC when lowering jump tables
1331
// since ISEL sees only a single basic block. When selecting, the clear
1332
// and shift left will be in different blocks.
1333
799
bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
1334
799
  if (MI.getOpcode() != PPC::RLDICR)
1335
0
    return false;
1336
799
1337
799
  unsigned SrcReg = MI.getOperand(1).getReg();
1338
799
  if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
1339
0
    return false;
1340
799
1341
799
  MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1342
799
  if (SrcMI->getOpcode() != PPC::RLDICL)
1343
789
    return false;
1344
10
1345
10
  MachineOperand MOpSHSrc = SrcMI->getOperand(2);
1346
10
  MachineOperand MOpMBSrc = SrcMI->getOperand(3);
1347
10
  MachineOperand MOpSHMI = MI.getOperand(2);
1348
10
  MachineOperand MOpMEMI = MI.getOperand(3);
1349
10
  if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
1350
10
        MOpMEMI.isImm()))
1351
0
    return false;
1352
10
1353
10
  uint64_t SHSrc = MOpSHSrc.getImm();
1354
10
  uint64_t MBSrc = MOpMBSrc.getImm();
1355
10
  uint64_t SHMI = MOpSHMI.getImm();
1356
10
  uint64_t MEMI = MOpMEMI.getImm();
1357
10
  uint64_t NewSH = SHSrc + SHMI;
1358
10
  uint64_t NewMB = MBSrc - SHMI;
1359
10
  if (NewMB > 63 || NewSH > 63)
1360
0
    return false;
1361
10
1362
10
  // The bits cleared with RLDICL are [0, MBSrc).
1363
10
  // The bits cleared with RLDICR are (MEMI, 63].
1364
10
  // After the sequence, the bits cleared are:
1365
10
  // [0, MBSrc-SHMI) and (MEMI, 63).
1366
10
  //
1367
10
  // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
1368
10
  if ((63 - NewSH) != MEMI)
1369
0
    return false;
1370
10
1371
10
  LLVM_DEBUG(dbgs() << "Converting pair: ");
1372
10
  LLVM_DEBUG(SrcMI->dump());
1373
10
  LLVM_DEBUG(MI.dump());
1374
10
1375
10
  MI.setDesc(TII->get(PPC::RLDIC));
1376
10
  MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1377
10
  MI.getOperand(2).setImm(NewSH);
1378
10
  MI.getOperand(3).setImm(NewMB);
1379
10
1380
10
  LLVM_DEBUG(dbgs() << "To: ");
1381
10
  LLVM_DEBUG(MI.dump());
1382
10
  NumRotatesCollapsed++;
1383
10
  return true;
1384
10
}
1385
1386
// For case in LLVM IR
1387
// entry:
1388
//   %iconv = sext i32 %index to i64
1389
//   br i1 undef label %true, label %false
1390
// true:
1391
//   %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
1392
// ...
1393
// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
1394
// different BBs when conducting instruction selection. We can do a peephole
1395
// optimization to combine these two instructions into extswsli after
1396
// instruction selection.
1397
bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
1398
789
                                      MachineInstr *&ToErase) {
1399
789
  if (MI.getOpcode() != PPC::RLDICR)
1400
0
    return false;
1401
789
1402
789
  if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
1403
391
    return false;
1404
398
1405
398
  assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
1406
398
1407
398
  MachineOperand MOpSHMI = MI.getOperand(2);
1408
398
  MachineOperand MOpMEMI = MI.getOperand(3);
1409
398
  if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
1410
0
    return false;
1411
398
1412
398
  uint64_t SHMI = MOpSHMI.getImm();
1413
398
  uint64_t MEMI = MOpMEMI.getImm();
1414
398
  if (SHMI + MEMI != 63)
1415
2
    return false;
1416
396
1417
396
  unsigned SrcReg = MI.getOperand(1).getReg();
1418
396
  if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
1419
0
    return false;
1420
396
1421
396
  MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1422
396
  if (SrcMI->getOpcode() != PPC::EXTSW &&
1423
396
      
SrcMI->getOpcode() != PPC::EXTSW_32_64389
)
1424
389
    return false;
1425
7
1426
7
  // If the register defined by extsw has more than one use, combination is not
1427
7
  // needed.
1428
7
  if (!MRI->hasOneNonDBGUse(SrcReg))
1429
2
    return false;
1430
5
1431
5
  LLVM_DEBUG(dbgs() << "Combining pair: ");
1432
5
  LLVM_DEBUG(SrcMI->dump());
1433
5
  LLVM_DEBUG(MI.dump());
1434
5
1435
5
  MachineInstr *NewInstr =
1436
5
      BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
1437
5
              SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
1438
5
                                               : 
TII->get(PPC::EXTSWSLI_32_64)0
,
1439
5
              MI.getOperand(0).getReg())
1440
5
          .add(SrcMI->getOperand(1))
1441
5
          .add(MOpSHMI);
1442
5
  (void)NewInstr;
1443
5
1444
5
  LLVM_DEBUG(dbgs() << "TO: ");
1445
5
  LLVM_DEBUG(NewInstr->dump());
1446
5
  ++NumEXTSWAndSLDICombined;
1447
5
  ToErase = &MI;
1448
5
  // SrcMI, which is extsw, is of no use now, erase it.
1449
5
  SrcMI->eraseFromParent();
1450
5
  return true;
1451
5
}
1452
1453
} // end default namespace
1454
1455
101k
INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
1456
101k
                      "PowerPC MI Peephole Optimization", false, false)
1457
101k
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
1458
101k
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
1459
101k
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
1460
101k
INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
1461
                    "PowerPC MI Peephole Optimization", false, false)
1462
1463
char PPCMIPeephole::ID = 0;
1464
FunctionPass*
1465
1.66k
llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
1466