Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines a pattern matching instruction selector for PowerPC,
10
// converting from a legalized dag to a PPC dag.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "MCTargetDesc/PPCMCTargetDesc.h"
15
#include "MCTargetDesc/PPCPredicates.h"
16
#include "PPC.h"
17
#include "PPCISelLowering.h"
18
#include "PPCMachineFunctionInfo.h"
19
#include "PPCSubtarget.h"
20
#include "PPCTargetMachine.h"
21
#include "llvm/ADT/APInt.h"
22
#include "llvm/ADT/DenseMap.h"
23
#include "llvm/ADT/STLExtras.h"
24
#include "llvm/ADT/SmallPtrSet.h"
25
#include "llvm/ADT/SmallVector.h"
26
#include "llvm/ADT/Statistic.h"
27
#include "llvm/Analysis/BranchProbabilityInfo.h"
28
#include "llvm/CodeGen/FunctionLoweringInfo.h"
29
#include "llvm/CodeGen/ISDOpcodes.h"
30
#include "llvm/CodeGen/MachineBasicBlock.h"
31
#include "llvm/CodeGen/MachineFunction.h"
32
#include "llvm/CodeGen/MachineInstrBuilder.h"
33
#include "llvm/CodeGen/MachineRegisterInfo.h"
34
#include "llvm/CodeGen/SelectionDAG.h"
35
#include "llvm/CodeGen/SelectionDAGISel.h"
36
#include "llvm/CodeGen/SelectionDAGNodes.h"
37
#include "llvm/CodeGen/TargetInstrInfo.h"
38
#include "llvm/CodeGen/TargetRegisterInfo.h"
39
#include "llvm/CodeGen/ValueTypes.h"
40
#include "llvm/IR/BasicBlock.h"
41
#include "llvm/IR/DebugLoc.h"
42
#include "llvm/IR/Function.h"
43
#include "llvm/IR/GlobalValue.h"
44
#include "llvm/IR/InlineAsm.h"
45
#include "llvm/IR/InstrTypes.h"
46
#include "llvm/IR/Module.h"
47
#include "llvm/Support/Casting.h"
48
#include "llvm/Support/CodeGen.h"
49
#include "llvm/Support/CommandLine.h"
50
#include "llvm/Support/Compiler.h"
51
#include "llvm/Support/Debug.h"
52
#include "llvm/Support/ErrorHandling.h"
53
#include "llvm/Support/KnownBits.h"
54
#include "llvm/Support/MachineValueType.h"
55
#include "llvm/Support/MathExtras.h"
56
#include "llvm/Support/raw_ostream.h"
57
#include <algorithm>
58
#include <cassert>
59
#include <cstdint>
60
#include <iterator>
61
#include <limits>
62
#include <memory>
63
#include <new>
64
#include <tuple>
65
#include <utility>
66
67
using namespace llvm;
68
69
#define DEBUG_TYPE "ppc-codegen"
70
71
STATISTIC(NumSextSetcc,
72
          "Number of (sext(setcc)) nodes expanded into GPR sequence.");
73
STATISTIC(NumZextSetcc,
74
          "Number of (zext(setcc)) nodes expanded into GPR sequence.");
75
STATISTIC(SignExtensionsAdded,
76
          "Number of sign extensions for compare inputs added.");
77
STATISTIC(ZeroExtensionsAdded,
78
          "Number of zero extensions for compare inputs added.");
79
STATISTIC(NumLogicOpsOnComparison,
80
          "Number of logical ops on i1 values calculated in GPR.");
81
STATISTIC(OmittedForNonExtendUses,
82
          "Number of compares not eliminated as they have non-extending uses.");
83
STATISTIC(NumP9Setb,
84
          "Number of compares lowered to setb.");
85
86
// FIXME: Remove this once the bug has been fixed!
87
cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
88
cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
89
90
static cl::opt<bool>
91
    UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
92
                       cl::desc("use aggressive ppc isel for bit permutations"),
93
                       cl::Hidden);
94
static cl::opt<bool> BPermRewriterNoMasking(
95
    "ppc-bit-perm-rewriter-stress-rotates",
96
    cl::desc("stress rotate selection in aggressive ppc isel for "
97
             "bit permutations"),
98
    cl::Hidden);
99
100
static cl::opt<bool> EnableBranchHint(
101
  "ppc-use-branch-hint", cl::init(true),
102
    cl::desc("Enable static hinting of branches on ppc"),
103
    cl::Hidden);
104
105
static cl::opt<bool> EnableTLSOpt(
106
  "ppc-tls-opt", cl::init(true),
107
    cl::desc("Enable tls optimization peephole"),
108
    cl::Hidden);
109
110
enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
111
  ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
112
  ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
113
114
static cl::opt<ICmpInGPRType> CmpInGPR(
115
  "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
116
  cl::desc("Specify the types of comparisons to emit GPR-only code for."),
117
  cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
118
             clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
119
             clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
120
             clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
121
             clEnumValN(ICGPR_NonExtIn, "nonextin",
122
                        "Only comparisons where inputs don't need [sz]ext."),
123
             clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
124
             clEnumValN(ICGPR_ZextI32, "zexti32",
125
                        "Only i32 comparisons with zext result."),
126
             clEnumValN(ICGPR_ZextI64, "zexti64",
127
                        "Only i64 comparisons with zext result."),
128
             clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
129
             clEnumValN(ICGPR_SextI32, "sexti32",
130
                        "Only i32 comparisons with sext result."),
131
             clEnumValN(ICGPR_SextI64, "sexti64",
132
                        "Only i64 comparisons with sext result.")));
133
namespace {
134
135
  //===--------------------------------------------------------------------===//
136
  /// PPCDAGToDAGISel - PPC specific code to select PPC machine
137
  /// instructions for SelectionDAG operations.
138
  ///
139
  class PPCDAGToDAGISel : public SelectionDAGISel {
140
    const PPCTargetMachine &TM;
141
    const PPCSubtarget *PPCSubTarget;
142
    const PPCTargetLowering *PPCLowering;
143
    unsigned GlobalBaseReg;
144
145
  public:
146
    explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
147
1.80k
        : SelectionDAGISel(tm, OptLevel), TM(tm) {}
148
149
11.1k
    bool runOnMachineFunction(MachineFunction &MF) override {
150
11.1k
      // Make sure we re-emit a set of the global base reg if necessary
151
11.1k
      GlobalBaseReg = 0;
152
11.1k
      PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
153
11.1k
      PPCLowering = PPCSubTarget->getTargetLowering();
154
11.1k
      SelectionDAGISel::runOnMachineFunction(MF);
155
11.1k
156
11.1k
      if (!PPCSubTarget->isSVR4ABI())
157
24
        InsertVRSaveCode(MF);
158
11.1k
159
11.1k
      return true;
160
11.1k
    }
161
162
    void PreprocessISelDAG() override;
163
    void PostprocessISelDAG() override;
164
165
    /// getI16Imm - Return a target constant with the specified value, of type
166
    /// i16.
167
18
    inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
168
18
      return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
169
18
    }
170
171
    /// getI32Imm - Return a target constant with the specified value, of type
172
    /// i32.
173
6.20k
    inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
174
6.20k
      return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
175
6.20k
    }
176
177
    /// getI64Imm - Return a target constant with the specified value, of type
178
    /// i64.
179
963
    inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
180
963
      return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
181
963
    }
182
183
    /// getSmallIPtrImm - Return a target constant of pointer type.
184
936
    inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {
185
936
      return CurDAG->getTargetConstant(
186
936
          Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
187
936
    }
188
189
    /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
190
    /// rotate and mask opcode and mask operation.
191
    static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
192
                                unsigned &SH, unsigned &MB, unsigned &ME);
193
194
    /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
195
    /// base register.  Return the virtual register that holds this value.
196
    SDNode *getGlobalBaseReg();
197
198
    void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
199
200
    // Select - Convert the specified operand from a target-independent to a
201
    // target-specific node if it hasn't already been changed.
202
    void Select(SDNode *N) override;
203
204
    bool tryBitfieldInsert(SDNode *N);
205
    bool tryBitPermutation(SDNode *N);
206
    bool tryIntCompareInGPR(SDNode *N);
207
208
    // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
209
    // an X-Form load instruction with the offset being a relocation coming from
210
    // the PPCISD::ADD_TLS.
211
    bool tryTLSXFormLoad(LoadSDNode *N);
212
    // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
213
    // an X-Form store instruction with the offset being a relocation coming from
214
    // the PPCISD::ADD_TLS.
215
    bool tryTLSXFormStore(StoreSDNode *N);
216
    /// SelectCC - Select a comparison of the specified values with the
217
    /// specified condition code, returning the CR# of the expression.
218
    SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
219
                     const SDLoc &dl);
220
221
    /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
222
    /// immediate field.  Note that the operand at this point is already the
223
    /// result of a prior SelectAddressRegImm call.
224
96
    bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
225
96
      if (N.getOpcode() == ISD::TargetConstant ||
226
96
          
N.getOpcode() == ISD::TargetGlobalAddress30
) {
227
87
        Out = N;
228
87
        return true;
229
87
      }
230
9
231
9
      return false;
232
9
    }
233
234
    /// SelectAddrIdx - Given the specified address, check to see if it can be
235
    /// represented as an indexed [r+r] operation.
236
    /// This is for xform instructions whose associated displacement form is D.
237
    /// The last parameter \p 0 means associated D form has no requirment for 16
238
    /// bit signed displacement.
239
    /// Returns false if it can be represented by [r+imm], which are preferred.
240
121
    bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
241
121
      return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 0);
242
121
    }
243
244
    /// SelectAddrIdx4 - Given the specified address, check to see if it can be
245
    /// represented as an indexed [r+r] operation.
246
    /// This is for xform instructions whose associated displacement form is DS.
247
    /// The last parameter \p 4 means associated DS form 16 bit signed
248
    /// displacement must be a multiple of 4.
249
    /// Returns false if it can be represented by [r+imm], which are preferred.
250
122
    bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
251
122
      return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 4);
252
122
    }
253
254
    /// SelectAddrIdx16 - Given the specified address, check to see if it can be
255
    /// represented as an indexed [r+r] operation.
256
    /// This is for xform instructions whose associated displacement form is DQ.
257
    /// The last parameter \p 16 means associated DQ form 16 bit signed
258
    /// displacement must be a multiple of 16.
259
    /// Returns false if it can be represented by [r+imm], which are preferred.
260
945
    bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
261
945
      return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 16);
262
945
    }
263
264
    /// SelectAddrIdxOnly - Given the specified address, force it to be
265
    /// represented as an indexed [r+r] operation.
266
5.77k
    bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
267
5.77k
      return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
268
5.77k
    }
269
    
270
    /// SelectAddrImm - Returns true if the address N can be represented by
271
    /// a base register plus a signed 16-bit displacement [r+imm].
272
    /// The last parameter \p 0 means D form has no requirment for 16 bit signed
273
    /// displacement.
274
    bool SelectAddrImm(SDValue N, SDValue &Disp,
275
5.57k
                       SDValue &Base) {
276
5.57k
      return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
277
5.57k
    }
278
279
    /// SelectAddrImmX4 - Returns true if the address N can be represented by
280
    /// a base register plus a signed 16-bit displacement that is a multiple of
281
    /// 4 (last parameter). Suitable for use by STD and friends.
282
3.13k
    bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
283
3.13k
      return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
284
3.13k
    }
285
286
    /// SelectAddrImmX16 - Returns true if the address N can be represented by
287
    /// a base register plus a signed 16-bit displacement that is a multiple of
288
    /// 16(last parameter). Suitable for use by STXV and friends.
289
1.56k
    bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
290
1.56k
      return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
291
1.56k
    }
292
293
    // Select an address into a single register.
294
21
    bool SelectAddr(SDValue N, SDValue &Base) {
295
21
      Base = N;
296
21
      return true;
297
21
    }
298
299
    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
300
    /// inline asm expressions.  It is always correct to compute the value into
301
    /// a register.  The case of adding a (possibly relocatable) constant to a
302
    /// register can be improved, but it is wrong to substitute Reg+Reg for
303
    /// Reg in an asm, because the load or store opcode would have to change.
304
    bool SelectInlineAsmMemoryOperand(const SDValue &Op,
305
                                      unsigned ConstraintID,
306
59
                                      std::vector<SDValue> &OutOps) override {
307
59
      switch(ConstraintID) {
308
59
      default:
309
0
        errs() << "ConstraintID: " << ConstraintID << "\n";
310
0
        llvm_unreachable("Unexpected asm memory constraint");
311
59
      case InlineAsm::Constraint_es:
312
59
      case InlineAsm::Constraint_i:
313
59
      case InlineAsm::Constraint_m:
314
59
      case InlineAsm::Constraint_o:
315
59
      case InlineAsm::Constraint_Q:
316
59
      case InlineAsm::Constraint_Z:
317
59
      case InlineAsm::Constraint_Zy:
318
59
        // We need to make sure that this one operand does not end up in r0
319
59
        // (because we might end up lowering this as 0(%op)).
320
59
        const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
321
59
        const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
322
59
        SDLoc dl(Op);
323
59
        SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
324
59
        SDValue NewOp =
325
59
          SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
326
59
                                         dl, Op.getValueType(),
327
59
                                         Op, RC), 0);
328
59
329
59
        OutOps.push_back(NewOp);
330
59
        return false;
331
0
      }
332
0
      return true;
333
0
    }
334
335
    void InsertVRSaveCode(MachineFunction &MF);
336
337
11.2k
    StringRef getPassName() const override {
338
11.2k
      return "PowerPC DAG->DAG Pattern Instruction Selection";
339
11.2k
    }
340
341
// Include the pieces autogenerated from the target description.
342
#include "PPCGenDAGISel.inc"
343
344
private:
345
    bool trySETCC(SDNode *N);
346
347
    void PeepholePPC64();
348
    void PeepholePPC64ZExt();
349
    void PeepholeCROps();
350
351
    SDValue combineToCMPB(SDNode *N);
352
    void foldBoolExts(SDValue &Res, SDNode *&N);
353
354
    bool AllUsersSelectZero(SDNode *N);
355
    void SwapAllSelectUsers(SDNode *N);
356
357
    bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
358
    void transferMemOperands(SDNode *N, SDNode *Result);
359
  };
360
361
} // end anonymous namespace
362
363
/// InsertVRSaveCode - Once the entire function has been instruction selected,
364
/// all virtual registers are created and all machine instructions are built,
365
/// check to see if we need to save/restore VRSAVE.  If so, do it.
366
24
void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
367
24
  // Check to see if this function uses vector registers, which means we have to
368
24
  // save and restore the VRSAVE register and update it with the regs we use.
369
24
  //
370
24
  // In this case, there will be virtual registers of vector type created
371
24
  // by the scheduler.  Detect them now.
372
24
  bool HasVectorVReg = false;
373
52
  for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; 
++i28
) {
374
28
    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
375
28
    if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
376
0
      HasVectorVReg = true;
377
0
      break;
378
0
    }
379
28
  }
380
24
  if (!HasVectorVReg) return;  // nothing to do.
381
0
382
0
  // If we have a vector register, we want to emit code into the entry and exit
383
0
  // blocks to save and restore the VRSAVE register.  We do this here (instead
384
0
  // of marking all vector instructions as clobbering VRSAVE) for two reasons:
385
0
  //
386
0
  // 1. This (trivially) reduces the load on the register allocator, by not
387
0
  //    having to represent the live range of the VRSAVE register.
388
0
  // 2. This (more significantly) allows us to create a temporary virtual
389
0
  //    register to hold the saved VRSAVE value, allowing this temporary to be
390
0
  //    register allocated, instead of forcing it to be spilled to the stack.
391
0
392
0
  // Create two vregs - one to hold the VRSAVE register that is live-in to the
393
0
  // function and one for the value after having bits or'd into it.
394
0
  unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
395
0
  unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
396
0
397
0
  const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
398
0
  MachineBasicBlock &EntryBB = *Fn.begin();
399
0
  DebugLoc dl;
400
0
  // Emit the following code into the entry block:
401
0
  // InVRSAVE = MFVRSAVE
402
0
  // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
403
0
  // MTVRSAVE UpdatedVRSAVE
404
0
  MachineBasicBlock::iterator IP = EntryBB.begin();  // Insert Point
405
0
  BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
406
0
  BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
407
0
          UpdatedVRSAVE).addReg(InVRSAVE);
408
0
  BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
409
0
410
0
  // Find all return blocks, outputting a restore in each epilog.
411
0
  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
412
0
    if (BB->isReturnBlock()) {
413
0
      IP = BB->end(); --IP;
414
0
415
0
      // Skip over all terminator instructions, which are part of the return
416
0
      // sequence.
417
0
      MachineBasicBlock::iterator I2 = IP;
418
0
      while (I2 != BB->begin() && (--I2)->isTerminator())
419
0
        IP = I2;
420
0
421
0
      // Emit: MTVRSAVE InVRSave
422
0
      BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
423
0
    }
424
0
  }
425
0
}
426
427
/// getGlobalBaseReg - Output the instructions required to put the
428
/// base address to use for accessing globals into a register.
429
///
430
43
SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
431
43
  if (!GlobalBaseReg) {
432
27
    const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
433
27
    // Insert the set of GlobalBaseReg into the first MBB of the function
434
27
    MachineBasicBlock &FirstMBB = MF->front();
435
27
    MachineBasicBlock::iterator MBBI = FirstMBB.begin();
436
27
    const Module *M = MF->getFunction().getParent();
437
27
    DebugLoc dl;
438
27
439
27
    if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
440
23
      if (PPCSubTarget->isTargetELF()) {
441
23
        GlobalBaseReg = PPC::R30;
442
23
        if (!PPCSubTarget->isSecurePlt() &&
443
23
            
M->getPICLevel() == PICLevel::SmallPIC13
) {
444
1
          BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
445
1
          BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
446
1
          MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
447
22
        } else {
448
22
          BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
449
22
          BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
450
22
          unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
451
22
          BuildMI(FirstMBB, MBBI, dl,
452
22
                  TII.get(PPC::UpdateGBR), GlobalBaseReg)
453
22
                  .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
454
22
          MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
455
22
        }
456
23
      } else {
457
0
        GlobalBaseReg =
458
0
          RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
459
0
        BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
460
0
        BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
461
0
      }
462
23
    } else {
463
4
      // We must ensure that this sequence is dominated by the prologue.
464
4
      // FIXME: This is a bit of a big hammer since we don't get the benefits
465
4
      // of shrink-wrapping whenever we emit this instruction. Considering
466
4
      // this is used in any function where we emit a jump table, this may be
467
4
      // a significant limitation. We should consider inserting this in the
468
4
      // block where it is used and then commoning this sequence up if it
469
4
      // appears in multiple places.
470
4
      // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
471
4
      // MovePCtoLR8.
472
4
      MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
473
4
      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
474
4
      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
475
4
      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
476
4
    }
477
27
  }
478
43
  return CurDAG->getRegister(GlobalBaseReg,
479
43
                             PPCLowering->getPointerTy(CurDAG->getDataLayout()))
480
43
      .getNode();
481
43
}
482
483
/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
484
/// operand. If so Imm will receive the 32-bit value.
485
3.05k
static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
486
3.05k
  if (N->getOpcode() == ISD::Constant && 
N->getValueType(0) == MVT::i322.00k
) {
487
1.43k
    Imm = cast<ConstantSDNode>(N)->getZExtValue();
488
1.43k
    return true;
489
1.43k
  }
490
1.62k
  return false;
491
1.62k
}
492
493
/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
494
/// operand.  If so Imm will receive the 64-bit value.
495
1.45k
static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
496
1.45k
  if (N->getOpcode() == ISD::Constant && 
N->getValueType(0) == MVT::i64648
) {
497
563
    Imm = cast<ConstantSDNode>(N)->getZExtValue();
498
563
    return true;
499
563
  }
500
891
  return false;
501
891
}
502
503
// isInt32Immediate - This method tests to see if a constant operand.
504
// If so Imm will receive the 32 bit value.
505
2.76k
static bool isInt32Immediate(SDValue N, unsigned &Imm) {
506
2.76k
  return isInt32Immediate(N.getNode(), Imm);
507
2.76k
}
508
509
/// isInt64Immediate - This method tests to see if the value is a 64-bit
510
/// constant operand. If so Imm will receive the 64-bit value.
511
494
static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
512
494
  return isInt64Immediate(N.getNode(), Imm);
513
494
}
514
515
static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
516
898
                              const SDValue &DestMBB) {
517
898
  assert(isa<BasicBlockSDNode>(DestMBB));
518
898
519
898
  if (!FuncInfo->BPI) 
return PPC::BR_NO_HINT53
;
520
845
521
845
  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
522
845
  const Instruction *BBTerm = BB->getTerminator();
523
845
524
845
  if (BBTerm->getNumSuccessors() != 2) 
return PPC::BR_NO_HINT37
;
525
808
526
808
  const BasicBlock *TBB = BBTerm->getSuccessor(0);
527
808
  const BasicBlock *FBB = BBTerm->getSuccessor(1);
528
808
529
808
  auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB);
530
808
  auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB);
531
808
532
808
  // We only want to handle cases which are easy to predict at static time, e.g.
533
808
  // C++ throw statement, that is very likely not taken, or calling never
534
808
  // returned function, e.g. stdlib exit(). So we set Threshold to filter
535
808
  // unwanted cases.
536
808
  //
537
808
  // Below is LLVM branch weight table, we only want to handle case 1, 2
538
808
  //
539
808
  // Case                  Taken:Nontaken  Example
540
808
  // 1. Unreachable        1048575:1       C++ throw, stdlib exit(),
541
808
  // 2. Invoke-terminating 1:1048575
542
808
  // 3. Coldblock          4:64            __builtin_expect
543
808
  // 4. Loop Branch        124:4           For loop
544
808
  // 5. PH/ZH/FPH          20:12
545
808
  const uint32_t Threshold = 10000;
546
808
547
808
  if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
548
757
    return PPC::BR_NO_HINT;
549
51
550
51
  LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName()
551
51
                    << "::" << BB->getName() << "'\n"
552
51
                    << " -> " << TBB->getName() << ": " << TProb << "\n"
553
51
                    << " -> " << FBB->getName() << ": " << FProb << "\n");
554
51
555
51
  const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
556
51
557
51
  // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
558
51
  // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
559
51
  if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
560
30
    std::swap(TProb, FProb);
561
51
562
51
  return (TProb > FProb) ? 
PPC::BR_TAKEN_HINT36
:
PPC::BR_NONTAKEN_HINT15
;
563
51
}
564
565
// isOpcWithIntImmediate - This method tests to see if the node is a specific
566
// opcode and that it has a immediate integer right operand.
567
// If so Imm will receive the 32 bit value.
568
225
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
569
225
  return N->getOpcode() == Opc
570
225
         && 
isInt32Immediate(N->getOperand(1).getNode(), Imm)6
;
571
225
}
572
573
936
void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
574
936
  SDLoc dl(SN);
575
936
  int FI = cast<FrameIndexSDNode>(N)->getIndex();
576
936
  SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
577
936
  unsigned Opc = N->getValueType(0) == MVT::i32 ? 
PPC::ADDI103
:
PPC::ADDI8833
;
578
936
  if (SN->hasOneUse())
579
872
    CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
580
872
                         getSmallIPtrImm(Offset, dl));
581
64
  else
582
64
    ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
583
64
                                           getSmallIPtrImm(Offset, dl)));
584
936
}
585
586
bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
587
                                      bool isShiftMask, unsigned &SH,
588
442
                                      unsigned &MB, unsigned &ME) {
589
442
  // Don't even go down this path for i64, since different logic will be
590
442
  // necessary for rldicl/rldicr/rldimi.
591
442
  if (N->getValueType(0) != MVT::i32)
592
0
    return false;
593
442
594
442
  unsigned Shift  = 32;
595
442
  unsigned Indeterminant = ~0;  // bit mask marking indeterminant results
596
442
  unsigned Opcode = N->getOpcode();
597
442
  if (N->getNumOperands() != 2 ||
598
442
      
!isInt32Immediate(N->getOperand(1).getNode(), Shift)283
||
(Shift > 31)23
)
599
431
    return false;
600
11
601
11
  if (Opcode == ISD::SHL) {
602
0
    // apply shift left to mask if it comes first
603
0
    if (isShiftMask) Mask = Mask << Shift;
604
0
    // determine which bits are made indeterminant by shift
605
0
    Indeterminant = ~(0xFFFFFFFFu << Shift);
606
11
  } else if (Opcode == ISD::SRL) {
607
0
    // apply shift right to mask if it comes first
608
0
    if (isShiftMask) Mask = Mask >> Shift;
609
0
    // determine which bits are made indeterminant by shift
610
0
    Indeterminant = ~(0xFFFFFFFFu >> Shift);
611
0
    // adjust for the left rotate
612
0
    Shift = 32 - Shift;
613
11
  } else if (Opcode == ISD::ROTL) {
614
0
    Indeterminant = 0;
615
11
  } else {
616
11
    return false;
617
11
  }
618
0
619
0
  // if the mask doesn't intersect any Indeterminant bits
620
0
  if (Mask && !(Mask & Indeterminant)) {
621
0
    SH = Shift & 31;
622
0
    // make sure the mask is still a mask (wrap arounds may not be)
623
0
    return isRunOfOnes(Mask, MB, ME);
624
0
  }
625
0
  return false;
626
0
}
627
628
1.76k
bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
629
1.76k
  SDValue Base = ST->getBasePtr();
630
1.76k
  if (Base.getOpcode() != PPCISD::ADD_TLS)
631
1.75k
    return false;
632
9
  SDValue Offset = ST->getOffset();
633
9
  if (!Offset.isUndef())
634
0
    return false;
635
9
636
9
  SDLoc dl(ST);
637
9
  EVT MemVT = ST->getMemoryVT();
638
9
  EVT RegVT = ST->getValue().getValueType();
639
9
640
9
  unsigned Opcode;
641
9
  switch (MemVT.getSimpleVT().SimpleTy) {
642
9
    default:
643
0
      return false;
644
9
    case MVT::i8: {
645
2
      Opcode = (RegVT == MVT::i32) ? 
PPC::STBXTLS_321
:
PPC::STBXTLS1
;
646
2
      break;
647
9
    }
648
9
    case MVT::i16: {
649
2
      Opcode = (RegVT == MVT::i32) ? 
PPC::STHXTLS_321
:
PPC::STHXTLS1
;
650
2
      break;
651
9
    }
652
9
    case MVT::i32: {
653
3
      Opcode = (RegVT == MVT::i32) ? 
PPC::STWXTLS_322
:
PPC::STWXTLS1
;
654
3
      break;
655
9
    }
656
9
    case MVT::i64: {
657
2
      Opcode = PPC::STDXTLS;
658
2
      break;
659
9
    }
660
9
  }
661
9
  SDValue Chain = ST->getChain();
662
9
  SDVTList VTs = ST->getVTList();
663
9
  SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
664
9
                   Chain};
665
9
  SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
666
9
  transferMemOperands(ST, MN);
667
9
  ReplaceNode(ST, MN);
668
9
  return true;
669
9
}
670
671
2.85k
bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
672
2.85k
  SDValue Base = LD->getBasePtr();
673
2.85k
  if (Base.getOpcode() != PPCISD::ADD_TLS)
674
2.84k
    return false;
675
9
  SDValue Offset = LD->getOffset();
676
9
  if (!Offset.isUndef())
677
0
    return false;
678
9
679
9
  SDLoc dl(LD);
680
9
  EVT MemVT = LD->getMemoryVT();
681
9
  EVT RegVT = LD->getValueType(0);
682
9
  unsigned Opcode;
683
9
  switch (MemVT.getSimpleVT().SimpleTy) {
684
9
    default:
685
0
      return false;
686
9
    case MVT::i8: {
687
2
      Opcode = (RegVT == MVT::i32) ? 
PPC::LBZXTLS_321
:
PPC::LBZXTLS1
;
688
2
      break;
689
9
    }
690
9
    case MVT::i16: {
691
2
      Opcode = (RegVT == MVT::i32) ? 
PPC::LHZXTLS_321
:
PPC::LHZXTLS1
;
692
2
      break;
693
9
    }
694
9
    case MVT::i32: {
695
3
      Opcode = (RegVT == MVT::i32) ? 
PPC::LWZXTLS_322
:
PPC::LWZXTLS1
;
696
3
      break;
697
9
    }
698
9
    case MVT::i64: {
699
2
      Opcode = PPC::LDXTLS;
700
2
      break;
701
9
    }
702
9
  }
703
9
  SDValue Chain = LD->getChain();
704
9
  SDVTList VTs = LD->getVTList();
705
9
  SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
706
9
  SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
707
9
  transferMemOperands(LD, MN);
708
9
  ReplaceNode(LD, MN);
709
9
  return true;
710
9
}
711
712
/// Turn an or of two masked values into the rotate left word immediate then
713
/// mask insert (rlwimi) instruction.
714
122
bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
715
122
  SDValue Op0 = N->getOperand(0);
716
122
  SDValue Op1 = N->getOperand(1);
717
122
  SDLoc dl(N);
718
122
719
122
  KnownBits LKnown = CurDAG->computeKnownBits(Op0);
720
122
  KnownBits RKnown = CurDAG->computeKnownBits(Op1);
721
122
722
122
  unsigned TargetMask = LKnown.Zero.getZExtValue();
723
122
  unsigned InsertMask = RKnown.Zero.getZExtValue();
724
122
725
122
  if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
726
7
    unsigned Op0Opc = Op0.getOpcode();
727
7
    unsigned Op1Opc = Op1.getOpcode();
728
7
    unsigned Value, SH = 0;
729
7
    TargetMask = ~TargetMask;
730
7
    InsertMask = ~InsertMask;
731
7
732
7
    // If the LHS has a foldable shift and the RHS does not, then swap it to the
733
7
    // RHS so that we can fold the shift into the insert.
734
7
    if (Op0Opc == ISD::AND && 
Op1Opc == ISD::AND4
) {
735
0
      if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
736
0
          Op0.getOperand(0).getOpcode() == ISD::SRL) {
737
0
        if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
738
0
            Op1.getOperand(0).getOpcode() != ISD::SRL) {
739
0
          std::swap(Op0, Op1);
740
0
          std::swap(Op0Opc, Op1Opc);
741
0
          std::swap(TargetMask, InsertMask);
742
0
        }
743
0
      }
744
7
    } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
745
1
      if (Op1Opc == ISD::AND && 
Op1.getOperand(0).getOpcode() != ISD::SHL0
&&
746
1
          
Op1.getOperand(0).getOpcode() != ISD::SRL0
) {
747
0
        std::swap(Op0, Op1);
748
0
        std::swap(Op0Opc, Op1Opc);
749
0
        std::swap(TargetMask, InsertMask);
750
0
      }
751
1
    }
752
7
753
7
    unsigned MB, ME;
754
7
    if (isRunOfOnes(InsertMask, MB, ME)) {
755
5
      if ((Op1Opc == ISD::SHL || 
Op1Opc == ISD::SRL4
) &&
756
5
          
isInt32Immediate(Op1.getOperand(1), Value)1
) {
757
1
        Op1 = Op1.getOperand(0);
758
1
        SH  = (Op1Opc == ISD::SHL) ? Value : 
32 - Value0
;
759
1
      }
760
5
      if (Op1Opc == ISD::AND) {
761
0
       // The AND mask might not be a constant, and we need to make sure that
762
0
       // if we're going to fold the masking with the insert, all bits not
763
0
       // know to be zero in the mask are known to be one.
764
0
        KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
765
0
        bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
766
0
767
0
        unsigned SHOpc = Op1.getOperand(0).getOpcode();
768
0
        if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
769
0
            isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
770
0
          // Note that Value must be in range here (less than 32) because
771
0
          // otherwise there would not be any bits set in InsertMask.
772
0
          Op1 = Op1.getOperand(0).getOperand(0);
773
0
          SH  = (SHOpc == ISD::SHL) ? Value : 32 - Value;
774
0
        }
775
0
      }
776
5
777
5
      SH &= 31;
778
5
      SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
779
5
                          getI32Imm(ME, dl) };
780
5
      ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
781
5
      return true;
782
5
    }
783
117
  }
784
117
  return false;
785
117
}
786
787
// Predict the number of instructions that would be generated by calling
788
// selectI64Imm(N).
789
17.4k
static unsigned selectI64ImmInstrCountDirect(int64_t Imm) {
790
17.4k
  // Assume no remaining bits.
791
17.4k
  unsigned Remainder = 0;
792
17.4k
  // Assume no shift required.
793
17.4k
  unsigned Shift = 0;
794
17.4k
795
17.4k
  // If it can't be represented as a 32 bit value.
796
17.4k
  if (!isInt<32>(Imm)) {
797
11.9k
    Shift = countTrailingZeros<uint64_t>(Imm);
798
11.9k
    int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
799
11.9k
800
11.9k
    // If the shifted value fits 32 bits.
801
11.9k
    if (isInt<32>(ImmSh)) {
802
1.07k
      // Go with the shifted value.
803
1.07k
      Imm = ImmSh;
804
10.9k
    } else {
805
10.9k
      // Still stuck with a 64 bit value.
806
10.9k
      Remainder = Imm;
807
10.9k
      Shift = 32;
808
10.9k
      Imm >>= 32;
809
10.9k
    }
810
11.9k
  }
811
17.4k
812
17.4k
  // Intermediate operand.
813
17.4k
  unsigned Result = 0;
814
17.4k
815
17.4k
  // Handle first 32 bits.
816
17.4k
  unsigned Lo = Imm & 0xFFFF;
817
17.4k
818
17.4k
  // Simple value.
819
17.4k
  if (isInt<16>(Imm)) {
820
6.73k
    // Just the Lo bits.
821
6.73k
    ++Result;
822
10.6k
  } else if (Lo) {
823
9.15k
    // Handle the Hi bits and Lo bits.
824
9.15k
    Result += 2;
825
9.15k
  } else {
826
1.51k
    // Just the Hi bits.
827
1.51k
    ++Result;
828
1.51k
  }
829
17.4k
830
17.4k
  // If no shift, we're done.
831
17.4k
  if (!Shift) 
return Result5.41k
;
832
11.9k
833
11.9k
  // If Hi word == Lo word,
834
11.9k
  // we can use rldimi to insert the Lo word into Hi word.
835
11.9k
  if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
836
3.28k
    ++Result;
837
3.28k
    return Result;
838
3.28k
  }
839
8.70k
840
8.70k
  // Shift for next step if the upper 32-bits were not zero.
841
8.70k
  if (Imm)
842
8.67k
    ++Result;
843
8.70k
844
8.70k
  // Add in the last bits as required.
845
8.70k
  if ((Remainder >> 16) & 0xFFFF)
846
6.24k
    ++Result;
847
8.70k
  if (Remainder & 0xFFFF)
848
6.71k
    ++Result;
849
8.70k
850
8.70k
  return Result;
851
8.70k
}
852
853
13.2k
static uint64_t Rot64(uint64_t Imm, unsigned R) {
854
13.2k
  return (Imm << R) | (Imm >> (64 - R));
855
13.2k
}
856
857
1.48k
static unsigned selectI64ImmInstrCount(int64_t Imm) {
858
1.48k
  unsigned Count = selectI64ImmInstrCountDirect(Imm);
859
1.48k
860
1.48k
  // If the instruction count is 1 or 2, we do not need further analysis
861
1.48k
  // since rotate + load constant requires at least 2 instructions.
862
1.48k
  if (Count <= 2)
863
1.39k
    return Count;
864
92
865
5.79k
  
for (unsigned r = 1; 92
r < 63;
++r5.70k
) {
866
5.70k
    uint64_t RImm = Rot64(Imm, r);
867
5.70k
    unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
868
5.70k
    Count = std::min(Count, RCount);
869
5.70k
870
5.70k
    // See comments in selectI64Imm for an explanation of the logic below.
871
5.70k
    unsigned LS = findLastSet(RImm);
872
5.70k
    if (LS != r-1)
873
5.66k
      continue;
874
40
875
40
    uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
876
40
    uint64_t RImmWithOnes = RImm | OnesMask;
877
40
878
40
    RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
879
40
    Count = std::min(Count, RCount);
880
40
  }
881
92
882
92
  return Count;
883
92
}
884
885
// Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount
886
// (above) needs to be kept in sync with this function.
887
static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
888
2.43k
                                  int64_t Imm) {
889
2.43k
  // Assume no remaining bits.
890
2.43k
  unsigned Remainder = 0;
891
2.43k
  // Assume no shift required.
892
2.43k
  unsigned Shift = 0;
893
2.43k
894
2.43k
  // If it can't be represented as a 32 bit value.
895
2.43k
  if (!isInt<32>(Imm)) {
896
148
    Shift = countTrailingZeros<uint64_t>(Imm);
897
148
    int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
898
148
899
148
    // If the shifted value fits 32 bits.
900
148
    if (isInt<32>(ImmSh)) {
901
66
      // Go with the shifted value.
902
66
      Imm = ImmSh;
903
82
    } else {
904
82
      // Still stuck with a 64 bit value.
905
82
      Remainder = Imm;
906
82
      Shift = 32;
907
82
      Imm >>= 32;
908
82
    }
909
148
  }
910
2.43k
911
2.43k
  // Intermediate operand.
912
2.43k
  SDNode *Result;
913
2.43k
914
2.43k
  // Handle first 32 bits.
915
2.43k
  unsigned Lo = Imm & 0xFFFF;
916
2.43k
  unsigned Hi = (Imm >> 16) & 0xFFFF;
917
2.43k
918
2.43k
  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
919
642
      return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
920
642
  };
921
2.43k
922
2.43k
  // Simple value.
923
2.43k
  if (isInt<16>(Imm)) {
924
2.26k
    uint64_t SextImm = SignExtend64(Lo, 16);
925
2.26k
    SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
926
2.26k
    // Just the Lo bits.
927
2.26k
    Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
928
2.26k
  } else 
if (172
Lo172
) {
929
128
    // Handle the Hi bits.
930
128
    unsigned OpC = Hi ? 
PPC::LIS8110
:
PPC::LI818
;
931
128
    Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
932
128
    // And Lo bits.
933
128
    Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
934
128
                                    SDValue(Result, 0), getI32Imm(Lo));
935
128
  } else {
936
44
    // Just the Hi bits.
937
44
    Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
938
44
  }
939
2.43k
940
2.43k
  // If no shift, we're done.
941
2.43k
  if (!Shift) 
return Result2.28k
;
942
148
943
148
  // If Hi word == Lo word,
944
148
  // we can use rldimi to insert the Lo word into Hi word.
945
148
  if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
946
33
    SDValue Ops[] =
947
33
      { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)};
948
33
    return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
949
33
  }
950
115
951
115
  // Shift for next step if the upper 32-bits were not zero.
952
115
  if (Imm) {
953
110
    Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
954
110
                                    SDValue(Result, 0),
955
110
                                    getI32Imm(Shift),
956
110
                                    getI32Imm(63 - Shift));
957
110
  }
958
115
959
115
  // Add in the last bits as required.
960
115
  if ((Hi = (Remainder >> 16) & 0xFFFF)) {
961
29
    Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
962
29
                                    SDValue(Result, 0), getI32Imm(Hi));
963
29
  }
964
115
  if ((Lo = Remainder & 0xFFFF)) {
965
27
    Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
966
27
                                    SDValue(Result, 0), getI32Imm(Lo));
967
27
  }
968
115
969
115
  return Result;
970
115
}
971
972
static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl,
973
2.43k
                            int64_t Imm) {
974
2.43k
  unsigned Count = selectI64ImmInstrCountDirect(Imm);
975
2.43k
976
2.43k
  // If the instruction count is 1 or 2, we do not need further analysis
977
2.43k
  // since rotate + load constant requires at least 2 instructions.
978
2.43k
  if (Count <= 2)
979
2.31k
    return selectI64ImmDirect(CurDAG, dl, Imm);
980
122
981
122
  unsigned RMin = 0;
982
122
983
122
  int64_t MatImm;
984
122
  unsigned MaskEnd;
985
122
986
7.68k
  for (unsigned r = 1; r < 63; 
++r7.56k
) {
987
7.56k
    uint64_t RImm = Rot64(Imm, r);
988
7.56k
    unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
989
7.56k
    if (RCount < Count) {
990
37
      Count = RCount;
991
37
      RMin = r;
992
37
      MatImm = RImm;
993
37
      MaskEnd = 63;
994
37
    }
995
7.56k
996
7.56k
    // If the immediate to generate has many trailing zeros, it might be
997
7.56k
    // worthwhile to generate a rotated value with too many leading ones
998
7.56k
    // (because that's free with li/lis's sign-extension semantics), and then
999
7.56k
    // mask them off after rotation.
1000
7.56k
1001
7.56k
    unsigned LS = findLastSet(RImm);
1002
7.56k
    // We're adding (63-LS) higher-order ones, and we expect to mask them off
1003
7.56k
    // after performing the inverse rotation by (64-r). So we need that:
1004
7.56k
    //   63-LS == 64-r => LS == r-1
1005
7.56k
    if (LS != r-1)
1006
7.37k
      continue;
1007
186
1008
186
    uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
1009
186
    uint64_t RImmWithOnes = RImm | OnesMask;
1010
186
1011
186
    RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
1012
186
    if (RCount < Count) {
1013
3
      Count = RCount;
1014
3
      RMin = r;
1015
3
      MatImm = RImmWithOnes;
1016
3
      MaskEnd = LS;
1017
3
    }
1018
186
  }
1019
122
1020
122
  if (!RMin)
1021
82
    return selectI64ImmDirect(CurDAG, dl, Imm);
1022
40
1023
80
  
auto getI32Imm = [CurDAG, dl](unsigned Imm) 40
{
1024
80
      return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1025
80
  };
1026
40
1027
40
  SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0);
1028
40
  return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val,
1029
40
                                getI32Imm(64 - RMin), getI32Imm(MaskEnd));
1030
40
}
1031
1032
2.18k
static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
1033
2.18k
  unsigned MaxTruncation = 0;
1034
2.18k
  // Cannot use range-based for loop here as we need the actual use (i.e. we
1035
2.18k
  // need the operand number corresponding to the use). A range-based for
1036
2.18k
  // will unbox the use and provide an SDNode*.
1037
2.18k
  for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
1038
2.45k
       Use != UseEnd; 
++Use268
) {
1039
2.23k
    unsigned Opc =
1040
2.23k
      Use->isMachineOpcode() ? 
Use->getMachineOpcode()1.78k
:
Use->getOpcode()449
;
1041
2.23k
    switch (Opc) {
1042
2.23k
    
default: return 01.96k
;
1043
2.23k
    case ISD::TRUNCATE:
1044
0
      if (Use->isMachineOpcode())
1045
0
        return 0;
1046
0
      MaxTruncation =
1047
0
        std::max(MaxTruncation, Use->getValueType(0).getSizeInBits());
1048
0
      continue;
1049
0
    case ISD::STORE: {
1050
0
      if (Use->isMachineOpcode())
1051
0
        return 0;
1052
0
      StoreSDNode *STN = cast<StoreSDNode>(*Use);
1053
0
      unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
1054
0
      if (MemVTSize == 64 || Use.getOperandNo() != 0)
1055
0
        return 0;
1056
0
      MaxTruncation = std::max(MaxTruncation, MemVTSize);
1057
0
      continue;
1058
0
    }
1059
210
    case PPC::STW8:
1060
210
    case PPC::STWX8:
1061
210
    case PPC::STWU8:
1062
210
    case PPC::STWUX8:
1063
210
      if (Use.getOperandNo() != 0)
1064
0
        return 0;
1065
210
      MaxTruncation = std::max(MaxTruncation, 32u);
1066
210
      continue;
1067
210
    case PPC::STH8:
1068
22
    case PPC::STHX8:
1069
22
    case PPC::STHU8:
1070
22
    case PPC::STHUX8:
1071
22
      if (Use.getOperandNo() != 0)
1072
0
        return 0;
1073
22
      MaxTruncation = std::max(MaxTruncation, 16u);
1074
22
      continue;
1075
36
    case PPC::STB8:
1076
36
    case PPC::STBX8:
1077
36
    case PPC::STBU8:
1078
36
    case PPC::STBUX8:
1079
36
      if (Use.getOperandNo() != 0)
1080
0
        return 0;
1081
36
      MaxTruncation = std::max(MaxTruncation, 8u);
1082
36
      continue;
1083
2.23k
    }
1084
2.23k
  }
1085
2.18k
  
return MaxTruncation222
;
1086
2.18k
}
1087
1088
// Select a 64-bit constant.
1089
2.18k
static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {
1090
2.18k
  SDLoc dl(N);
1091
2.18k
1092
2.18k
  // Get 64 bit value.
1093
2.18k
  int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
1094
2.18k
  if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1095
222
    uint64_t SextImm = SignExtend64(Imm, MinSize);
1096
222
    SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1097
222
    if (isInt<16>(SextImm))
1098
194
      return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1099
1.99k
  }
1100
1.99k
  return selectI64Imm(CurDAG, dl, Imm);
1101
1.99k
}
1102
1103
namespace {
1104
1105
class BitPermutationSelector {
1106
  struct ValueBit {
1107
    SDValue V;
1108
1109
    // The bit number in the value, using a convention where bit 0 is the
1110
    // lowest-order bit.
1111
    unsigned Idx;
1112
1113
    // ConstZero means a bit we need to mask off.
1114
    // Variable is a bit comes from an input variable.
1115
    // VariableKnownToBeZero is also a bit comes from an input variable,
1116
    // but it is known to be already zero. So we do not need to mask them.
1117
    enum Kind {
1118
      ConstZero,
1119
      Variable,
1120
      VariableKnownToBeZero
1121
    } K;
1122
1123
    ValueBit(SDValue V, unsigned I, Kind K = Variable)
1124
127k
      : V(V), Idx(I), K(K) {}
1125
    ValueBit(Kind K = Variable)
1126
281k
      : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
1127
1128
78.6k
    bool isZero() const {
1129
78.6k
      return K == ConstZero || 
K == VariableKnownToBeZero35.9k
;
1130
78.6k
    }
1131
1132
239k
    bool hasValue() const {
1133
239k
      return K == Variable || 
K == VariableKnownToBeZero79.9k
;
1134
239k
    }
1135
1136
87.6k
    SDValue getValue() const {
1137
87.6k
      assert(hasValue() && "Cannot get the value of a constant bit");
1138
87.6k
      return V;
1139
87.6k
    }
1140
1141
50.4k
    unsigned getValueBitIndex() const {
1142
50.4k
      assert(hasValue() && "Cannot get the value bit index of a constant bit");
1143
50.4k
      return Idx;
1144
50.4k
    }
1145
  };
1146
1147
  // A bit group has the same underlying value and the same rotate factor.
1148
  struct BitGroup {
1149
    SDValue V;
1150
    unsigned RLAmt;
1151
    unsigned StartIdx, EndIdx;
1152
1153
    // This rotation amount assumes that the lower 32 bits of the quantity are
1154
    // replicated in the high 32 bits by the rotation operator (which is done
1155
    // by rlwinm and friends in 64-bit mode).
1156
    bool Repl32;
1157
    // Did converting to Repl32 == true change the rotation factor? If it did,
1158
    // it decreased it by 32.
1159
    bool Repl32CR;
1160
    // Was this group coalesced after setting Repl32 to true?
1161
    bool Repl32Coalesced;
1162
1163
    BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1164
      : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1165
2.29k
        Repl32Coalesced(false) {
1166
2.29k
      LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
1167
2.29k
                        << " [" << S << ", " << E << "]\n");
1168
2.29k
    }
1169
  };
1170
1171
  // Information on each (Value, RLAmt) pair (like the number of groups
1172
  // associated with each) used to choose the lowering method.
1173
  struct ValueRotInfo {
1174
    SDValue V;
1175
    unsigned RLAmt = std::numeric_limits<unsigned>::max();
1176
    unsigned NumGroups = 0;
1177
    unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1178
    bool Repl32 = false;
1179
1180
1.94k
    ValueRotInfo() = default;
1181
1182
    // For sorting (in reverse order) by NumGroups, and then by
1183
    // FirstGroupStartIdx.
1184
134
    bool operator < (const ValueRotInfo &Other) const {
1185
134
      // We need to sort so that the non-Repl32 come first because, when we're
1186
134
      // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1187
134
      // masking operation.
1188
134
      if (Repl32 < Other.Repl32)
1189
9
        return true;
1190
125
      else if (Repl32 > Other.Repl32)
1191
12
        return false;
1192
113
      else if (NumGroups > Other.NumGroups)
1193
1
        return true;
1194
112
      else if (NumGroups < Other.NumGroups)
1195
3
        return false;
1196
109
      else if (RLAmt == 0 && 
Other.RLAmt != 029
)
1197
17
        return true;
1198
92
      else if (RLAmt != 0 && 
Other.RLAmt == 080
)
1199
18
        return false;
1200
74
      else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1201
30
        return true;
1202
44
      return false;
1203
44
    }
1204
  };
1205
1206
  using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1207
  using ValueBitsMemoizer =
1208
      DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;
1209
  ValueBitsMemoizer Memoizer;
1210
1211
  // Return a pair of bool and a SmallVector pointer to a memoization entry.
1212
  // The bool is true if something interesting was deduced, otherwise if we're
1213
  // providing only a generic representation of V (or something else likewise
1214
  // uninteresting for instruction selection) through the SmallVector.
1215
  std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1216
4.96k
                                                            unsigned NumBits) {
1217
4.96k
    auto &ValueEntry = Memoizer[V];
1218
4.96k
    if (ValueEntry)
1219
58
      return std::make_pair(ValueEntry->first, &ValueEntry->second);
1220
4.91k
    ValueEntry.reset(new ValueBitsMemoizedValue());
1221
4.91k
    bool &Interesting = ValueEntry->first;
1222
4.91k
    SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1223
4.91k
    Bits.resize(NumBits);
1224
4.91k
1225
4.91k
    switch (V.getOpcode()) {
1226
4.91k
    
default: break1.88k
;
1227
4.91k
    case ISD::ROTL:
1228
47
      if (isa<ConstantSDNode>(V.getOperand(1))) {
1229
23
        unsigned RotAmt = V.getConstantOperandVal(1);
1230
23
1231
23
        const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1232
23
1233
855
        for (unsigned i = 0; i < NumBits; 
++i832
)
1234
832
          Bits[i] = LHSBits[i < RotAmt ? 
i + (NumBits - RotAmt)339
:
i - RotAmt493
];
1235
23
1236
23
        return std::make_pair(Interesting = true, &Bits);
1237
23
      }
1238
24
      break;
1239
696
    case ISD::SHL:
1240
696
      if (isa<ConstantSDNode>(V.getOperand(1))) {
1241
653
        unsigned ShiftAmt = V.getConstantOperandVal(1);
1242
653
1243
653
        const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1244
653
1245
31.5k
        for (unsigned i = ShiftAmt; i < NumBits; 
++i30.9k
)
1246
30.9k
          Bits[i] = LHSBits[i - ShiftAmt];
1247
653
1248
4.79k
        for (unsigned i = 0; i < ShiftAmt; 
++i4.13k
)
1249
4.13k
          Bits[i] = ValueBit(ValueBit::ConstZero);
1250
653
1251
653
        return std::make_pair(Interesting = true, &Bits);
1252
653
      }
1253
43
      break;
1254
432
    case ISD::SRL:
1255
432
      if (isa<ConstantSDNode>(V.getOperand(1))) {
1256
390
        unsigned ShiftAmt = V.getConstantOperandVal(1);
1257
390
1258
390
        const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1259
390
1260
9.09k
        for (unsigned i = 0; i < NumBits - ShiftAmt; 
++i8.70k
)
1261
8.70k
          Bits[i] = LHSBits[i + ShiftAmt];
1262
390
1263
11.9k
        for (unsigned i = NumBits - ShiftAmt; i < NumBits; 
++i11.5k
)
1264
11.5k
          Bits[i] = ValueBit(ValueBit::ConstZero);
1265
390
1266
390
        return std::make_pair(Interesting = true, &Bits);
1267
390
      }
1268
42
      break;
1269
914
    case ISD::AND:
1270
914
      if (isa<ConstantSDNode>(V.getOperand(1))) {
1271
853
        uint64_t Mask = V.getConstantOperandVal(1);
1272
853
1273
853
        const SmallVector<ValueBit, 64> *LHSBits;
1274
853
        // Mark this as interesting, only if the LHS was also interesting. This
1275
853
        // prevents the overall procedure from matching a single immediate 'and'
1276
853
        // (which is non-optimal because such an and might be folded with other
1277
853
        // things if we don't select it here).
1278
853
        std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1279
853
1280
37.7k
        for (unsigned i = 0; i < NumBits; 
++i36.9k
)
1281
36.9k
          if (((Mask >> i) & 1) == 1)
1282
9.16k
            Bits[i] = (*LHSBits)[i];
1283
27.7k
          else {
1284
27.7k
            // AND instruction masks this bit. If the input is already zero,
1285
27.7k
            // we have nothing to do here. Otherwise, make the bit ConstZero.
1286
27.7k
            if ((*LHSBits)[i].isZero())
1287
1.96k
              Bits[i] = (*LHSBits)[i];
1288
25.8k
            else
1289
25.8k
              Bits[i] = ValueBit(ValueBit::ConstZero);
1290
27.7k
          }
1291
853
1292
853
        return std::make_pair(Interesting, &Bits);
1293
853
      }
1294
61
      break;
1295
386
    case ISD::OR: {
1296
386
      const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1297
386
      const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1298
386
1299
386
      bool AllDisjoint = true;
1300
386
      SDValue LastVal = SDValue();
1301
386
      unsigned LastIdx = 0;
1302
6.37k
      for (unsigned i = 0; i < NumBits; 
++i5.98k
) {
1303
6.25k
        if (LHSBits[i].isZero() && 
RHSBits[i].isZero()3.87k
) {
1304
1.57k
          // If both inputs are known to be zero and one is ConstZero and
1305
1.57k
          // another is VariableKnownToBeZero, we can select whichever
1306
1.57k
          // we like. To minimize the number of bit groups, we select
1307
1.57k
          // VariableKnownToBeZero if this bit is the next bit of the same
1308
1.57k
          // input variable from the previous bit. Otherwise, we select
1309
1.57k
          // ConstZero.
1310
1.57k
          if (LHSBits[i].hasValue() && 
LHSBits[i].getValue() == LastVal63
&&
1311
1.57k
              
LHSBits[i].getValueBitIndex() == LastIdx + 11
)
1312
0
            Bits[i] = LHSBits[i];
1313
1.57k
          else if (RHSBits[i].hasValue() && 
RHSBits[i].getValue() == LastVal40
&&
1314
1.57k
                   
RHSBits[i].getValueBitIndex() == LastIdx + 124
)
1315
24
            Bits[i] = RHSBits[i];
1316
1.55k
          else
1317
1.55k
            Bits[i] = ValueBit(ValueBit::ConstZero);
1318
1.57k
        }
1319
4.68k
        else if (LHSBits[i].isZero())
1320
2.30k
          Bits[i] = RHSBits[i];
1321
2.38k
        else if (RHSBits[i].isZero())
1322
2.10k
          Bits[i] = LHSBits[i];
1323
272
        else {
1324
272
          AllDisjoint = false;
1325
272
          break;
1326
272
        }
1327
5.98k
        // We remember the value and bit index of this bit.
1328
5.98k
        if (Bits[i].hasValue()) {
1329
4.43k
          LastVal = Bits[i].getValue();
1330
4.43k
          LastIdx = Bits[i].getValueBitIndex();
1331
4.43k
        }
1332
1.55k
        else {
1333
1.55k
          if (LastVal) 
LastVal = SDValue()41
;
1334
1.55k
          LastIdx = 0;
1335
1.55k
        }
1336
5.98k
      }
1337
386
1338
386
      if (!AllDisjoint)
1339
272
        break;
1340
114
1341
114
      return std::make_pair(Interesting = true, &Bits);
1342
114
    }
1343
114
    case ISD::ZERO_EXTEND: {
1344
36
      // We support only the case with zero extension from i32 to i64 so far.
1345
36
      if (V.getValueType() != MVT::i64 ||
1346
36
          
V.getOperand(0).getValueType() != MVT::i3235
)
1347
1
        break;
1348
35
1349
35
      const SmallVector<ValueBit, 64> *LHSBits;
1350
35
      const unsigned NumOperandBits = 32;
1351
35
      std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1352
35
                                                    NumOperandBits);
1353
35
1354
1.15k
      for (unsigned i = 0; i < NumOperandBits; 
++i1.12k
)
1355
1.12k
        Bits[i] = (*LHSBits)[i];
1356
35
1357
1.15k
      for (unsigned i = NumOperandBits; i < NumBits; 
++i1.12k
)
1358
1.12k
        Bits[i] = ValueBit(ValueBit::ConstZero);
1359
35
1360
35
      return std::make_pair(Interesting, &Bits);
1361
35
    }
1362
237
    case ISD::TRUNCATE: {
1363
237
      EVT FromType = V.getOperand(0).getValueType();
1364
237
      EVT ToType = V.getValueType();
1365
237
      // We support only the case with truncate from i64 to i32.
1366
237
      if (FromType != MVT::i64 || ToType != MVT::i32)
1367
0
        break;
1368
237
      const unsigned NumAllBits = FromType.getSizeInBits();
1369
237
      SmallVector<ValueBit, 64> *InBits;
1370
237
      std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1371
237
                                                    NumAllBits);
1372
237
      const unsigned NumValidBits = ToType.getSizeInBits();
1373
237
1374
237
      // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1375
237
      // So, we cannot include this truncate.
1376
237
      bool UseUpper32bit = false;
1377
7.78k
      for (unsigned i = 0; i < NumValidBits; 
++i7.55k
)
1378
7.55k
        if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1379
1
          UseUpper32bit = true;
1380
1
          break;
1381
1
        }
1382
237
      if (UseUpper32bit)
1383
1
        break;
1384
236
1385
7.78k
      
for (unsigned i = 0; 236
i < NumValidBits;
++i7.55k
)
1386
7.55k
        Bits[i] = (*InBits)[i];
1387
236
1388
236
      return std::make_pair(Interesting, &Bits);
1389
236
    }
1390
236
    case ISD::AssertZext: {
1391
58
      // For AssertZext, we look through the operand and
1392
58
      // mark the bits known to be zero.
1393
58
      const SmallVector<ValueBit, 64> *LHSBits;
1394
58
      std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1395
58
                                                    NumBits);
1396
58
1397
58
      EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1398
58
      const unsigned NumValidBits = FromType.getSizeInBits();
1399
1.46k
      for (unsigned i = 0; i < NumValidBits; 
++i1.41k
)
1400
1.41k
        Bits[i] = (*LHSBits)[i];
1401
58
1402
58
      // These bits are known to be zero.
1403
2.20k
      for (unsigned i = NumValidBits; i < NumBits; 
++i2.14k
)
1404
2.14k
        Bits[i] = ValueBit((*LHSBits)[i].getValue(),
1405
2.14k
                           (*LHSBits)[i].getValueBitIndex(),
1406
2.14k
                           ValueBit::VariableKnownToBeZero);
1407
58
1408
58
      return std::make_pair(Interesting, &Bits);
1409
236
    }
1410
236
    case ISD::LOAD:
1411
222
      LoadSDNode *LD = cast<LoadSDNode>(V);
1412
222
      if (ISD::isZEXTLoad(V.getNode()) && 
V.getResNo() == 030
) {
1413
30
        EVT VT = LD->getMemoryVT();
1414
30
        const unsigned NumValidBits = VT.getSizeInBits();
1415
30
1416
470
        for (unsigned i = 0; i < NumValidBits; 
++i440
)
1417
440
          Bits[i] = ValueBit(V, i);
1418
30
1419
30
        // These bits are known to be zero.
1420
806
        for (unsigned i = NumValidBits; i < NumBits; 
++i776
)
1421
776
          Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1422
30
1423
30
        // Zero-extending load itself cannot be optimized. So, it is not
1424
30
        // interesting by itself though it gives useful information.
1425
30
        return std::make_pair(Interesting = false, &Bits);
1426
30
      }
1427
192
      break;
1428
2.51k
    }
1429
2.51k
1430
126k
    
for (unsigned i = 0; 2.51k
i < NumBits;
++i124k
)
1431
124k
      Bits[i] = ValueBit(V, i);
1432
2.51k
1433
2.51k
    return std::make_pair(Interesting = false, &Bits);
1434
2.51k
  }
1435
1436
  // For each value (except the constant ones), compute the left-rotate amount
1437
  // to get it from its original to final position.
1438
957
  void computeRotationAmounts() {
1439
957
    NeedMask = false;
1440
957
    RLAmt.resize(Bits.size());
1441
51.7k
    for (unsigned i = 0; i < Bits.size(); 
++i50.7k
)
1442
50.7k
      if (Bits[i].hasValue()) {
1443
34.9k
        unsigned VBI = Bits[i].getValueBitIndex();
1444
34.9k
        if (i >= VBI)
1445
29.3k
          RLAmt[i] = i - VBI;
1446
5.61k
        else
1447
5.61k
          RLAmt[i] = Bits.size() - (VBI - i);
1448
34.9k
      } else 
if (15.7k
Bits[i].isZero()15.7k
) {
1449
15.7k
        NeedMask = true;
1450
15.7k
        RLAmt[i] = UINT32_MAX;
1451
15.7k
      } else {
1452
0
        llvm_unreachable("Unknown value bit type");
1453
0
      }
1454
957
  }
1455
1456
  // Collect groups of consecutive bits with the same underlying value and
1457
  // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1458
  // they break up groups.
1459
1.82k
  void collectBitGroups(bool LateMask) {
1460
1.82k
    BitGroups.clear();
1461
1.82k
1462
1.82k
    unsigned LastRLAmt = RLAmt[0];
1463
1.82k
    SDValue LastValue = Bits[0].hasValue() ? 
Bits[0].getValue()735
:
SDValue()1.09k
;
1464
1.82k
    unsigned LastGroupStartIdx = 0;
1465
1.82k
    bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1466
98.0k
    for (unsigned i = 1; i < Bits.size(); 
++i96.1k
) {
1467
96.1k
      unsigned ThisRLAmt = RLAmt[i];
1468
96.1k
      SDValue ThisValue = Bits[i].hasValue() ? 
Bits[i].getValue()65.7k
:
SDValue()30.4k
;
1469
96.1k
      if (LateMask && 
!ThisValue46.3k
) {
1470
15.2k
        ThisValue = LastValue;
1471
15.2k
        ThisRLAmt = LastRLAmt;
1472
15.2k
        // If we're doing late masking, then the first bit group always starts
1473
15.2k
        // at zero (even if the first bits were zero).
1474
15.2k
        if (BitGroups.empty())
1475
14.8k
          LastGroupStartIdx = 0;
1476
15.2k
      }
1477
96.1k
1478
96.1k
      // If this bit is known to be zero and the current group is a bit group
1479
96.1k
      // of zeros, we do not need to terminate the current bit group even the
1480
96.1k
      // Value or RLAmt does not match here. Instead, we terminate this group
1481
96.1k
      // when the first non-zero bit appears later.
1482
96.1k
      if (IsGroupOfZeros && 
Bits[i].isZero()17.9k
)
1483
16.4k
        continue;
1484
79.6k
1485
79.6k
      // If this bit has the same underlying value and the same rotate factor as
1486
79.6k
      // the last one, then they're part of the same group.
1487
79.6k
      if (ThisRLAmt == LastRLAmt && 
ThisValue == LastValue77.4k
)
1488
77.4k
        // We cannot continue the current group if this bits is not known to
1489
77.4k
        // be zero in a bit group of zeros.
1490
77.4k
        if (!(IsGroupOfZeros && 
ThisValue0
&&
!Bits[i].isZero()0
))
1491
77.4k
          continue;
1492
2.24k
1493
2.24k
      if (LastValue.getNode())
1494
829
        BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1495
829
                                     i-1));
1496
2.24k
      LastRLAmt = ThisRLAmt;
1497
2.24k
      LastValue = ThisValue;
1498
2.24k
      LastGroupStartIdx = i;
1499
2.24k
      IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1500
2.24k
    }
1501
1.82k
    if (LastValue.getNode())
1502
1.46k
      BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1503
1.46k
                                   Bits.size()-1));
1504
1.82k
1505
1.82k
    if (BitGroups.empty())
1506
0
      return;
1507
1.82k
1508
1.82k
    // We might be able to combine the first and last groups.
1509
1.82k
    if (BitGroups.size() > 1) {
1510
128
      // If the first and last groups are the same, then remove the first group
1511
128
      // in favor of the last group, making the ending index of the last group
1512
128
      // equal to the ending index of the to-be-removed first group.
1513
128
      if (BitGroups[0].StartIdx == 0 &&
1514
128
          
BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1114
&&
1515
128
          
BitGroups[0].V == BitGroups[BitGroups.size()-1].V82
&&
1516
128
          
BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt33
) {
1517
13
        LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1518
13
        BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1519
13
        BitGroups.erase(BitGroups.begin());
1520
13
      }
1521
128
    }
1522
1.82k
  }
1523
1524
  // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1525
  // associated with each. If the number of groups are same, we prefer a group
1526
  // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1527
  // instruction. If there is a degeneracy, pick the one that occurs
1528
  // first (in the final value).
1529
1.82k
  void collectValueRotInfo() {
1530
1.82k
    ValueRots.clear();
1531
1.82k
1532
2.27k
    for (auto &BG : BitGroups) {
1533
2.27k
      unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 
64156
:
02.11k
);
1534
2.27k
      ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1535
2.27k
      VRI.V = BG.V;
1536
2.27k
      VRI.RLAmt = BG.RLAmt;
1537
2.27k
      VRI.Repl32 = BG.Repl32;
1538
2.27k
      VRI.NumGroups += 1;
1539
2.27k
      VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1540
2.27k
    }
1541
1.82k
1542
1.82k
    // Now that we've collected the various ValueRotInfo instances, we need to
1543
1.82k
    // sort them.
1544
1.82k
    ValueRotsVec.clear();
1545
1.94k
    for (auto &I : ValueRots) {
1546
1.94k
      ValueRotsVec.push_back(I.second);
1547
1.94k
    }
1548
1.82k
    llvm::sort(ValueRotsVec);
1549
1.82k
  }
1550
1551
  // In 64-bit mode, rlwinm and friends have a rotation operator that
1552
  // replicates the low-order 32 bits into the high-order 32-bits. The mask
1553
  // indices of these instructions can only be in the lower 32 bits, so they
1554
  // can only represent some 64-bit bit groups. However, when they can be used,
1555
  // the 32-bit replication can be used to represent, as a single bit group,
1556
  // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1557
  // groups when possible. Returns true if any of the bit groups were
1558
  // converted.
1559
1.23k
  void assignRepl32BitGroups() {
1560
1.23k
    // If we have bits like this:
1561
1.23k
    //
1562
1.23k
    // Indices:    15 14 13 12 11 10 9 8  7  6  5  4  3  2  1  0
1563
1.23k
    // V bits: ... 7  6  5  4  3  2  1 0 31 30 29 28 27 26 25 24
1564
1.23k
    // Groups:    |      RLAmt = 8      |      RLAmt = 40       |
1565
1.23k
    //
1566
1.23k
    // But, making use of a 32-bit operation that replicates the low-order 32
1567
1.23k
    // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1568
1.23k
    // of 8.
1569
1.23k
1570
1.23k
    auto IsAllLow32 = [this](BitGroup & BG) {
1571
347
      if (BG.StartIdx <= BG.EndIdx) {
1572
1.47k
        for (unsigned i = BG.StartIdx; i <= BG.EndIdx; 
++i1.13k
) {
1573
1.31k
          if (!Bits[i].hasValue())
1574
28
            continue;
1575
1.28k
          if (Bits[i].getValueBitIndex() >= 32)
1576
183
            return false;
1577
1.28k
        }
1578
347
      } else {
1579
0
        for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1580
0
          if (!Bits[i].hasValue())
1581
0
            continue;
1582
0
          if (Bits[i].getValueBitIndex() >= 32)
1583
0
            return false;
1584
0
        }
1585
0
        for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1586
0
          if (!Bits[i].hasValue())
1587
0
            continue;
1588
0
          if (Bits[i].getValueBitIndex() >= 32)
1589
0
            return false;
1590
0
        }
1591
0
      }
1592
347
1593
347
      
return true164
;
1594
347
    };
1595
1.23k
1596
1.52k
    for (auto &BG : BitGroups) {
1597
1.52k
      // If this bit group has RLAmt of 0 and will not be merged with
1598
1.52k
      // another bit group, we don't benefit from Repl32. We don't mark
1599
1.52k
      // such group to give more freedom for later instruction selection.
1600
1.52k
      if (BG.RLAmt == 0) {
1601
17
        auto PotentiallyMerged = [this](BitGroup & BG) {
1602
17
          for (auto &BG2 : BitGroups)
1603
36
            if (&BG != &BG2 && 
BG.V == BG2.V20
&&
1604
36
                
(10
BG2.RLAmt == 010
||
BG2.RLAmt == 328
))
1605
4
              return true;
1606
17
          
return false13
;
1607
17
        };
1608
17
        if (!PotentiallyMerged(BG))
1609
13
          continue;
1610
1.51k
      }
1611
1.51k
      if (BG.StartIdx < 32 && 
BG.EndIdx < 321.34k
) {
1612
344
        if (IsAllLow32(BG)) {
1613
161
          if (BG.RLAmt >= 32) {
1614
136
            BG.RLAmt -= 32;
1615
136
            BG.Repl32CR = true;
1616
136
          }
1617
161
1618
161
          BG.Repl32 = true;
1619
161
1620
161
          LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
1621
161
                            << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
1622
161
                            << BG.StartIdx << ", " << BG.EndIdx << "]\n");
1623
161
        }
1624
344
      }
1625
1.51k
    }
1626
1.23k
1627
1.23k
    // Now walk through the bit groups, consolidating where possible.
1628
2.75k
    for (auto I = BitGroups.begin(); I != BitGroups.end();) {
1629
1.52k
      // We might want to remove this bit group by merging it with the previous
1630
1.52k
      // group (which might be the ending group).
1631
1.52k
      auto IP = (I == BitGroups.begin()) ?
1632
1.23k
                std::prev(BitGroups.end()) : 
std::prev(I)289
;
1633
1.52k
      if (I->Repl32 && 
IP->Repl32161
&&
I->V == IP->V137
&&
I->RLAmt == IP->RLAmt135
&&
1634
1.52k
          
I->StartIdx == (IP->EndIdx + 1) % 64133
&&
I != IP4
) {
1635
4
1636
4
        LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
1637
4
                          << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
1638
4
                          << I->StartIdx << ", " << I->EndIdx
1639
4
                          << "] with group with range [" << IP->StartIdx << ", "
1640
4
                          << IP->EndIdx << "]\n");
1641
4
1642
4
        IP->EndIdx = I->EndIdx;
1643
4
        IP->Repl32CR = IP->Repl32CR || 
I->Repl32CR0
;
1644
4
        IP->Repl32Coalesced = true;
1645
4
        I = BitGroups.erase(I);
1646
4
        continue;
1647
1.52k
      } else {
1648
1.52k
        // There is a special case worth handling: If there is a single group
1649
1.52k
        // covering the entire upper 32 bits, and it can be merged with both
1650
1.52k
        // the next and previous groups (which might be the same group), then
1651
1.52k
        // do so. If it is the same group (so there will be only one group in
1652
1.52k
        // total), then we need to reverse the order of the range so that it
1653
1.52k
        // covers the entire 64 bits.
1654
1.52k
        if (I->StartIdx == 32 && 
I->EndIdx == 6329
) {
1655
15
          assert(std::next(I) == BitGroups.end() &&
1656
15
                 "bit group ends at index 63 but there is another?");
1657
15
          auto IN = BitGroups.begin();
1658
15
1659
15
          if (IP->Repl32 && 
IN->Repl325
&&
I->V == IP->V5
&&
I->V == IN->V5
&&
1660
15
              
(I->RLAmt % 32) == IP->RLAmt5
&&
(I->RLAmt % 32) == IN->RLAmt3
&&
1661
15
              
IP->EndIdx == 313
&&
IN->StartIdx == 03
&&
I != IP3
&&
1662
15
              
IsAllLow32(*I)3
) {
1663
3
1664
3
            LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
1665
3
                              << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
1666
3
                              << ", " << I->EndIdx
1667
3
                              << "] with 32-bit replicated groups with ranges ["
1668
3
                              << IP->StartIdx << ", " << IP->EndIdx << "] and ["
1669
3
                              << IN->StartIdx << ", " << IN->EndIdx << "]\n");
1670
3
1671
3
            if (IP == IN) {
1672
2
              // There is only one other group; change it to cover the whole
1673
2
              // range (backward, so that it can still be Repl32 but cover the
1674
2
              // whole 64-bit range).
1675
2
              IP->StartIdx = 31;
1676
2
              IP->EndIdx = 30;
1677
2
              IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
1678
2
              IP->Repl32Coalesced = true;
1679
2
              I = BitGroups.erase(I);
1680
2
            } else {
1681
1
              // There are two separate groups, one before this group and one
1682
1
              // after us (at the beginning). We're going to remove this group,
1683
1
              // but also the group at the very beginning.
1684
1
              IP->EndIdx = IN->EndIdx;
1685
1
              IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
1686
1
              IP->Repl32Coalesced = true;
1687
1
              I = BitGroups.erase(I);
1688
1
              BitGroups.erase(BitGroups.begin());
1689
1
            }
1690
3
1691
3
            // This must be the last group in the vector (and we might have
1692
3
            // just invalidated the iterator above), so break here.
1693
3
            break;
1694
3
          }
1695
1.51k
        }
1696
1.52k
      }
1697
1.51k
1698
1.51k
      ++I;
1699
1.51k
    }
1700
1.23k
  }
1701
1702
5.21k
  SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
1703
5.21k
    return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1704
5.21k
  }
1705
1706
870
  uint64_t getZerosMask() {
1707
870
    uint64_t Mask = 0;
1708
48.1k
    for (unsigned i = 0; i < Bits.size(); 
++i47.2k
) {
1709
47.2k
      if (Bits[i].hasValue())
1710
31.4k
        continue;
1711
15.7k
      Mask |= (UINT64_C(1) << i);
1712
15.7k
    }
1713
870
1714
870
    return ~Mask;
1715
870
  }
1716
1717
  // This method extends an input value to 64 bit if input is 32-bit integer.
1718
  // While selecting instructions in BitPermutationSelector in 64-bit mode,
1719
  // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
1720
  // In such case, we extend it to 64 bit to be consistent with other values.
1721
2.07k
  SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
1722
2.07k
    if (V.getValueSizeInBits() == 64)
1723
2.01k
      return V;
1724
53
1725
53
    assert(V.getValueSizeInBits() == 32);
1726
53
    SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1727
53
    SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
1728
53
                                                   MVT::i64), 0);
1729
53
    SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
1730
53
                                                    MVT::i64, ImDef, V,
1731
53
                                                    SubRegIdx), 0);
1732
53
    return ExtVal;
1733
53
  }
1734
1735
659
  SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
1736
659
    if (V.getValueSizeInBits() == 32)
1737
487
      return V;
1738
172
1739
172
    assert(V.getValueSizeInBits() == 64);
1740
172
    SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1741
172
    SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
1742
172
                                                    MVT::i32, V, SubRegIdx), 0);
1743
172
    return SubVal;
1744
172
  }
1745
1746
  // Depending on the number of groups for a particular value, it might be
1747
  // better to rotate, mask explicitly (using andi/andis), and then or the
1748
  // result. Select this part of the result first.
1749
591
  void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1750
591
    if (BPermRewriterNoMasking)
1751
0
      return;
1752
591
1753
655
    
for (ValueRotInfo &VRI : ValueRotsVec)591
{
1754
655
      unsigned Mask = 0;
1755
21.6k
      for (unsigned i = 0; i < Bits.size(); 
++i20.9k
) {
1756
20.9k
        if (!Bits[i].hasValue() || 
Bits[i].getValue() != VRI.V14.5k
)
1757
7.86k
          continue;
1758
13.0k
        if (RLAmt[i] != VRI.RLAmt)
1759
254
          continue;
1760
12.8k
        Mask |= (1u << i);
1761
12.8k
      }
1762
655
1763
655
      // Compute the masks for andi/andis that would be necessary.
1764
655
      unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1765
655
      assert((ANDIMask != 0 || ANDISMask != 0) &&
1766
655
             "No set bits in mask for value bit groups");
1767
655
      bool NeedsRotate = VRI.RLAmt != 0;
1768
655
1769
655
      // We're trying to minimize the number of instructions. If we have one
1770
655
      // group, using one of andi/andis can break even.  If we have three
1771
655
      // groups, we can use both andi and andis and break even (to use both
1772
655
      // andi and andis we also need to or the results together). We need four
1773
655
      // groups if we also need to rotate. To use andi/andis we need to do more
1774
655
      // than break even because rotate-and-mask instructions tend to be easier
1775
655
      // to schedule.
1776
655
1777
655
      // FIXME: We've biased here against using andi/andis, which is right for
1778
655
      // POWER cores, but not optimal everywhere. For example, on the A2,
1779
655
      // andi/andis have single-cycle latency whereas the rotate-and-mask
1780
655
      // instructions take two cycles, and it would be better to bias toward
1781
655
      // andi/andis in break-even cases.
1782
655
1783
655
      unsigned NumAndInsts = (unsigned) NeedsRotate +
1784
655
                             (unsigned) (ANDIMask != 0) +
1785
655
                             (unsigned) (ANDISMask != 0) +
1786
655
                             (unsigned) (ANDIMask != 0 && 
ANDISMask != 0596
) +
1787
655
                             (unsigned) (bool) Res;
1788
655
1789
655
      LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
1790
655
                        << " RL: " << VRI.RLAmt << ":"
1791
655
                        << "\n\t\t\tisel using masking: " << NumAndInsts
1792
655
                        << " using rotates: " << VRI.NumGroups << "\n");
1793
655
1794
655
      if (NumAndInsts >= VRI.NumGroups)
1795
641
        continue;
1796
14
1797
14
      LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1798
14
1799
14
      if (InstCnt) *InstCnt += NumAndInsts;
1800
14
1801
14
      SDValue VRot;
1802
14
      if (VRI.RLAmt) {
1803
12
        SDValue Ops[] =
1804
12
          { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
1805
12
            getI32Imm(0, dl), getI32Imm(31, dl) };
1806
12
        VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
1807
12
                                              Ops), 0);
1808
12
      } else {
1809
2
        VRot = TruncateToInt32(VRI.V, dl);
1810
2
      }
1811
14
1812
14
      SDValue ANDIVal, ANDISVal;
1813
14
      if (ANDIMask != 0)
1814
14
        ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1815
14
                            VRot, getI32Imm(ANDIMask, dl)), 0);
1816
14
      if (ANDISMask != 0)
1817
8
        ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1818
8
                             VRot, getI32Imm(ANDISMask, dl)), 0);
1819
14
1820
14
      SDValue TotalVal;
1821
14
      if (!ANDIVal)
1822
0
        TotalVal = ANDISVal;
1823
14
      else if (!ANDISVal)
1824
6
        TotalVal = ANDIVal;
1825
8
      else
1826
8
        TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1827
8
                             ANDIVal, ANDISVal), 0);
1828
14
1829
14
      if (!Res)
1830
14
        Res = TotalVal;
1831
0
      else
1832
0
        Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1833
0
                        Res, TotalVal), 0);
1834
14
1835
14
      // Now, remove all groups with this underlying value and rotation
1836
14
      // factor.
1837
110
      eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1838
110
        return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1839
110
      });
1840
14
    }
1841
591
  }
1842
1843
  // Instruction selection for the 32-bit case.
1844
591
  SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
1845
591
    SDLoc dl(N);
1846
591
    SDValue Res;
1847
591
1848
591
    if (InstCnt) 
*InstCnt = 0526
;
1849
591
1850
591
    // Take care of cases that should use andi/andis first.
1851
591
    SelectAndParts32(dl, Res, InstCnt);
1852
591
1853
591
    // If we've not yet selected a 'starting' instruction, and we have no zeros
1854
591
    // to fill in, select the (Value, RLAmt) with the highest priority (largest
1855
591
    // number of groups), and start with this rotated value.
1856
591
    if ((!NeedMask || 
LateMask526
) &&
!Res328
) {
1857
328
      ValueRotInfo &VRI = ValueRotsVec[0];
1858
328
      if (VRI.RLAmt) {
1859
292
        if (InstCnt) 
*InstCnt += 1258
;
1860
292
        SDValue Ops[] =
1861
292
          { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
1862
292
            getI32Imm(0, dl), getI32Imm(31, dl) };
1863
292
        Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
1864
292
                      0);
1865
292
      } else {
1866
36
        Res = TruncateToInt32(VRI.V, dl);
1867
36
      }
1868
328
1869
328
      // Now, remove all groups with this underlying value and rotation factor.
1870
386
      eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1871
386
        return BG.V == VRI.V && 
BG.RLAmt == VRI.RLAmt339
;
1872
386
      });
1873
328
    }
1874
591
1875
591
    if (InstCnt) 
*InstCnt += BitGroups.size()526
;
1876
591
1877
591
    // Insert the other groups (one at a time).
1878
591
    for (auto &BG : BitGroups) {
1879
317
      if (!Res) {
1880
249
        SDValue Ops[] =
1881
249
          { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
1882
249
            getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1883
249
            getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1884
249
        Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
1885
249
      } else {
1886
68
        SDValue Ops[] =
1887
68
          { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
1888
68
              getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1889
68
            getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1890
68
        Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
1891
68
      }
1892
317
    }
1893
591
1894
591
    if (LateMask) {
1895
263
      unsigned Mask = (unsigned) getZerosMask();
1896
263
1897
263
      unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1898
263
      assert((ANDIMask != 0 || ANDISMask != 0) &&
1899
263
             "No set bits in zeros mask?");
1900
263
1901
263
      if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
1902
263
                               (unsigned) (ANDISMask != 0) +
1903
263
                               (unsigned) (ANDIMask != 0 && 
ANDISMask != 0246
);
1904
263
1905
263
      SDValue ANDIVal, ANDISVal;
1906
263
      if (ANDIMask != 0)
1907
246
        ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1908
246
                            Res, getI32Imm(ANDIMask, dl)), 0);
1909
263
      if (ANDISMask != 0)
1910
189
        ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1911
189
                             Res, getI32Imm(ANDISMask, dl)), 0);
1912
263
1913
263
      if (!ANDIVal)
1914
17
        Res = ANDISVal;
1915
246
      else if (!ANDISVal)
1916
74
        Res = ANDIVal;
1917
172
      else
1918
172
        Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1919
172
                        ANDIVal, ANDISVal), 0);
1920
263
    }
1921
591
1922
591
    return Res.getNode();
1923
591
  }
1924
1925
  unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
1926
                                unsigned MaskStart, unsigned MaskEnd,
1927
2.70k
                                bool IsIns) {
1928
2.70k
    // In the notation used by the instructions, 'start' and 'end' are reversed
1929
2.70k
    // because bits are counted from high to low order.
1930
2.70k
    unsigned InstMaskStart = 64 - MaskEnd - 1,
1931
2.70k
             InstMaskEnd   = 64 - MaskStart - 1;
1932
2.70k
1933
2.70k
    if (Repl32)
1934
238
      return 1;
1935
2.46k
1936
2.46k
    if ((!IsIns && 
(1.76k
InstMaskEnd == 631.76k
||
InstMaskStart == 01.18k
)) ||
1937
2.46k
        
InstMaskEnd == 63 - RLAmt741
)
1938
2.13k
      return 1;
1939
333
1940
333
    return 2;
1941
333
  }
1942
1943
  // For 64-bit values, not all combinations of rotates and masks are
1944
  // available. Produce one if it is available.
1945
  SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
1946
                          bool Repl32, unsigned MaskStart, unsigned MaskEnd,
1947
1.25k
                          unsigned *InstCnt = nullptr) {
1948
1.25k
    // In the notation used by the instructions, 'start' and 'end' are reversed
1949
1.25k
    // because bits are counted from high to low order.
1950
1.25k
    unsigned InstMaskStart = 64 - MaskEnd - 1,
1951
1.25k
             InstMaskEnd   = 64 - MaskStart - 1;
1952
1.25k
1953
1.25k
    if (InstCnt) 
*InstCnt += 11.19k
;
1954
1.25k
1955
1.25k
    if (Repl32) {
1956
39
      // This rotation amount assumes that the lower 32 bits of the quantity
1957
39
      // are replicated in the high 32 bits by the rotation operator (which is
1958
39
      // done by rlwinm and friends).
1959
39
      assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1960
39
      assert(InstMaskEnd   >= 32 && "Mask cannot end out of range");
1961
39
      SDValue Ops[] =
1962
39
        { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1963
39
          getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
1964
39
      return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
1965
39
                                            Ops), 0);
1966
39
    }
1967
1.21k
1968
1.21k
    if (InstMaskEnd == 63) {
1969
828
      SDValue Ops[] =
1970
828
        { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1971
828
          getI32Imm(InstMaskStart, dl) };
1972
828
      return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
1973
828
    }
1974
385
1975
385
    if (InstMaskStart == 0) {
1976
376
      SDValue Ops[] =
1977
376
        { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1978
376
          getI32Imm(InstMaskEnd, dl) };
1979
376
      return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
1980
376
    }
1981
9
1982
9
    if (InstMaskEnd == 63 - RLAmt) {
1983
9
      SDValue Ops[] =
1984
9
        { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1985
9
          getI32Imm(InstMaskStart, dl) };
1986
9
      return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
1987
9
    }
1988
0
1989
0
    // We cannot do this with a single instruction, so we'll use two. The
1990
0
    // problem is that we're not free to choose both a rotation amount and mask
1991
0
    // start and end independently. We can choose an arbitrary mask start and
1992
0
    // end, but then the rotation amount is fixed. Rotation, however, can be
1993
0
    // inverted, and so by applying an "inverse" rotation first, we can get the
1994
0
    // desired result.
1995
0
    if (InstCnt) *InstCnt += 1;
1996
0
1997
0
    // The rotation mask for the second instruction must be MaskStart.
1998
0
    unsigned RLAmt2 = MaskStart;
1999
0
    // The first instruction must rotate V so that the overall rotation amount
2000
0
    // is RLAmt.
2001
0
    unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2002
0
    if (RLAmt1)
2003
0
      V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2004
0
    return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
2005
0
  }
2006
2007
  // For 64-bit values, not all combinations of rotates and masks are
2008
  // available. Produce a rotate-mask-and-insert if one is available.
2009
  SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
2010
                             unsigned RLAmt, bool Repl32, unsigned MaskStart,
2011
80
                             unsigned MaskEnd, unsigned *InstCnt = nullptr) {
2012
80
    // In the notation used by the instructions, 'start' and 'end' are reversed
2013
80
    // because bits are counted from high to low order.
2014
80
    unsigned InstMaskStart = 64 - MaskEnd - 1,
2015
80
             InstMaskEnd   = 64 - MaskStart - 1;
2016
80
2017
80
    if (InstCnt) 
*InstCnt += 115
;
2018
80
2019
80
    if (Repl32) {
2020
11
      // This rotation amount assumes that the lower 32 bits of the quantity
2021
11
      // are replicated in the high 32 bits by the rotation operator (which is
2022
11
      // done by rlwinm and friends).
2023
11
      assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2024
11
      assert(InstMaskEnd   >= 32 && "Mask cannot end out of range");
2025
11
      SDValue Ops[] =
2026
11
        { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2027
11
          getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2028
11
      return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2029
11
                                            Ops), 0);
2030
11
    }
2031
69
2032
69
    if (InstMaskEnd == 63 - RLAmt) {
2033
42
      SDValue Ops[] =
2034
42
        { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2035
42
          getI32Imm(InstMaskStart, dl) };
2036
42
      return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
2037
42
    }
2038
27
2039
27
    // We cannot do this with a single instruction, so we'll use two. The
2040
27
    // problem is that we're not free to choose both a rotation amount and mask
2041
27
    // start and end independently. We can choose an arbitrary mask start and
2042
27
    // end, but then the rotation amount is fixed. Rotation, however, can be
2043
27
    // inverted, and so by applying an "inverse" rotation first, we can get the
2044
27
    // desired result.
2045
27
    if (InstCnt) 
*InstCnt += 17
;
2046
27
2047
27
    // The rotation mask for the second instruction must be MaskStart.
2048
27
    unsigned RLAmt2 = MaskStart;
2049
27
    // The first instruction must rotate V so that the overall rotation amount
2050
27
    // is RLAmt.
2051
27
    unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2052
27
    if (RLAmt1)
2053
27
      V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2054
27
    return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
2055
27
  }
2056
2057
1.23k
  void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2058
1.23k
    if (BPermRewriterNoMasking)
2059
0
      return;
2060
1.23k
2061
1.23k
    // The idea here is the same as in the 32-bit version, but with additional
2062
1.23k
    // complications from the fact that Repl32 might be true. Because we
2063
1.23k
    // aggressively convert bit groups to Repl32 form (which, for small
2064
1.23k
    // rotation factors, involves no other change), and then coalesce, it might
2065
1.23k
    // be the case that a single 64-bit masking operation could handle both
2066
1.23k
    // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2067
1.23k
    // form allowed coalescing, then we must use a 32-bit rotaton in order to
2068
1.23k
    // completely capture the new combined bit group.
2069
1.23k
2070
1.29k
    
for (ValueRotInfo &VRI : ValueRotsVec)1.23k
{
2071
1.29k
      uint64_t Mask = 0;
2072
1.29k
2073
1.29k
      // We need to add to the mask all bits from the associated bit groups.
2074
1.29k
      // If Repl32 is false, we need to add bits from bit groups that have
2075
1.29k
      // Repl32 true, but are trivially convertable to Repl32 false. Such a
2076
1.29k
      // group is trivially convertable if it overlaps only with the lower 32
2077
1.29k
      // bits, and the group has not been coalesced.
2078
3.61k
      auto MatchingBG = [VRI](const BitGroup &BG) {
2079
3.61k
        if (VRI.V != BG.V)
2080
60
          return false;
2081
3.55k
2082
3.55k
        unsigned EffRLAmt = BG.RLAmt;
2083
3.55k
        if (!VRI.Repl32 && 
BG.Repl323.43k
) {
2084
328
          if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2085
328
              !BG.Repl32Coalesced) {
2086
322
            if (BG.Repl32CR)
2087
310
              EffRLAmt += 32;
2088
322
          } else {
2089
6
            return false;
2090
6
          }
2091
3.22k
        } else if (VRI.Repl32 != BG.Repl32) {
2092
10
          return false;
2093
10
        }
2094
3.53k
2095
3.53k
        return VRI.RLAmt == EffRLAmt;
2096
3.53k
      };
2097
1.29k
2098
1.68k
      for (auto &BG : BitGroups) {
2099
1.68k
        if (!MatchingBG(BG))
2100
167
          continue;
2101
1.51k
2102
1.51k
        if (BG.StartIdx <= BG.EndIdx) {
2103
66.2k
          for (unsigned i = BG.StartIdx; i <= BG.EndIdx; 
++i64.7k
)
2104
64.7k
            Mask |= (UINT64_C(1) << i);
2105
1.51k
        } else {
2106
205
          for (unsigned i = BG.StartIdx; i < Bits.size(); 
++i200
)
2107
200
            Mask |= (UINT64_C(1) << i);
2108
89
          for (unsigned i = 0; i <= BG.EndIdx; 
++i84
)
2109
84
            Mask |= (UINT64_C(1) << i);
2110
5
        }
2111
1.51k
      }
2112
1.29k
2113
1.29k
      // We can use the 32-bit andi/andis technique if the mask does not
2114
1.29k
      // require any higher-order bits. This can save an instruction compared
2115
1.29k
      // to always using the general 64-bit technique.
2116
1.29k
      bool Use32BitInsts = isUInt<32>(Mask);
2117
1.29k
      // Compute the masks for andi/andis that would be necessary.
2118
1.29k
      unsigned ANDIMask = (Mask & UINT16_MAX),
2119
1.29k
               ANDISMask = (Mask >> 16) & UINT16_MAX;
2120
1.29k
2121
1.29k
      bool NeedsRotate = VRI.RLAmt || 
(16
VRI.Repl3216
&&
!isUInt<32>(Mask)3
);
2122
1.29k
2123
1.29k
      unsigned NumAndInsts = (unsigned) NeedsRotate +
2124
1.29k
                             (unsigned) (bool) Res;
2125
1.29k
      if (Use32BitInsts)
2126
227
        NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2127
227
                       (unsigned) (ANDIMask != 0 && 
ANDISMask != 0209
);
2128
1.06k
      else
2129
1.06k
        NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1;
2130
1.29k
2131
1.29k
      unsigned NumRLInsts = 0;
2132
1.29k
      bool FirstBG = true;
2133
1.29k
      bool MoreBG = false;
2134
1.68k
      for (auto &BG : BitGroups) {
2135
1.68k
        if (!MatchingBG(BG)) {
2136
167
          MoreBG = true;
2137
167
          continue;
2138
167
        }
2139
1.51k
        NumRLInsts +=
2140
1.51k
          SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2141
1.51k
                               !FirstBG);
2142
1.51k
        FirstBG = false;
2143
1.51k
      }
2144
1.29k
2145
1.29k
      LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2146
1.29k
                        << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
2147
1.29k
                        << "\n\t\t\tisel using masking: " << NumAndInsts
2148
1.29k
                        << " using rotates: " << NumRLInsts << "\n");
2149
1.29k
2150
1.29k
      // When we'd use andi/andis, we bias toward using the rotates (andi only
2151
1.29k
      // has a record form, and is cracked on POWER cores). However, when using
2152
1.29k
      // general 64-bit constant formation, bias toward the constant form,
2153
1.29k
      // because that exposes more opportunities for CSE.
2154
1.29k
      if (NumAndInsts > NumRLInsts)
2155
1.26k
        continue;
2156
28
      // When merging multiple bit groups, instruction or is used.
2157
28
      // But when rotate is used, rldimi can inert the rotated value into any
2158
28
      // register, so instruction or can be avoided.
2159
28
      if ((Use32BitInsts || 
MoreBG21
) &&
NumAndInsts == NumRLInsts12
)
2160
10
        continue;
2161
18
2162
18
      LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2163
18
2164
18
      if (InstCnt) *InstCnt += NumAndInsts;
2165
18
2166
18
      SDValue VRot;
2167
18
      // We actually need to generate a rotation if we have a non-zero rotation
2168
18
      // factor or, in the Repl32 case, if we care about any of the
2169
18
      // higher-order replicated bits. In the latter case, we generate a mask
2170
18
      // backward so that it actually includes the entire 64 bits.
2171
18
      if (VRI.RLAmt || 
(0
VRI.Repl320
&&
!isUInt<32>(Mask)0
))
2172
18
        VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2173
18
                               VRI.Repl32 ? 
310
: 0, VRI.Repl32 ?
300
: 63);
2174
0
      else
2175
0
        VRot = VRI.V;
2176
18
2177
18
      SDValue TotalVal;
2178
18
      if (Use32BitInsts) {
2179
1
        assert((ANDIMask != 0 || ANDISMask != 0) &&
2180
1
               "No set bits in mask when using 32-bit ands for 64-bit value");
2181
1
2182
1
        SDValue ANDIVal, ANDISVal;
2183
1
        if (ANDIMask != 0)
2184
0
          ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
2185
0
                                                   ExtendToInt64(VRot, dl),
2186
0
                                                   getI32Imm(ANDIMask, dl)),
2187
0
                            0);
2188
1
        if (ANDISMask != 0)
2189
1
          ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
2190
1
                                                    ExtendToInt64(VRot, dl),
2191
1
                                                    getI32Imm(ANDISMask, dl)),
2192
1
                             0);
2193
1
2194
1
        if (!ANDIVal)
2195
1
          TotalVal = ANDISVal;
2196
0
        else if (!ANDISVal)
2197
0
          TotalVal = ANDIVal;
2198
0
        else
2199
0
          TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2200
0
                               ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2201
17
      } else {
2202
17
        TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2203
17
        TotalVal =
2204
17
          SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2205
17
                                         ExtendToInt64(VRot, dl), TotalVal),
2206
17
                  0);
2207
17
     }
2208
18
2209
18
      if (!Res)
2210
17
        Res = TotalVal;
2211
1
      else
2212
1
        Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2213
1
                                             ExtendToInt64(Res, dl), TotalVal),
2214
1
                      0);
2215
18
2216
18
      // Now, remove all groups with this underlying value and rotation
2217
18
      // factor.
2218
18
      eraseMatchingBitGroups(MatchingBG);
2219
18
    }
2220
1.23k
  }
2221
2222
  // Instruction selection for the 64-bit case.
2223
1.23k
  SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2224
1.23k
    SDLoc dl(N);
2225
1.23k
    SDValue Res;
2226
1.23k
2227
1.23k
    if (InstCnt) 
*InstCnt = 01.21k
;
2228
1.23k
2229
1.23k
    // Take care of cases that should use andi/andis first.
2230
1.23k
    SelectAndParts64(dl, Res, InstCnt);
2231
1.23k
2232
1.23k
    // If we've not yet selected a 'starting' instruction, and we have no zeros
2233
1.23k
    // to fill in, select the (Value, RLAmt) with the highest priority (largest
2234
1.23k
    // number of groups), and start with this rotated value.
2235
1.23k
    if ((!NeedMask || 
LateMask1.21k
) &&
!Res629
) {
2236
629
      // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2237
629
      // groups will come first, and so the VRI representing the largest number
2238
629
      // of groups might not be first (it might be the first Repl32 groups).
2239
629
      unsigned MaxGroupsIdx = 0;
2240
629
      if (!ValueRotsVec[0].Repl32) {
2241
1.28k
        for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; 
++i654
)
2242
661
          if (ValueRotsVec[i].Repl32) {
2243
7
            if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2244
0
              MaxGroupsIdx = i;
2245
7
            break;
2246
7
          }
2247
627
      }
2248
629
2249
629
      ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2250
629
      bool NeedsRotate = false;
2251
629
      if (VRI.RLAmt) {
2252
615
        NeedsRotate = true;
2253
615
      } else 
if (14
VRI.Repl3214
) {
2254
2
        for (auto &BG : BitGroups) {
2255
2
          if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2256
2
              BG.Repl32 != VRI.Repl32)
2257
0
            continue;
2258
2
2259
2
          // We don't need a rotate if the bit group is confined to the lower
2260
2
          // 32 bits.
2261
2
          if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2262
0
            continue;
2263
2
2264
2
          NeedsRotate = true;
2265
2
          break;
2266
2
        }
2267
2
      }
2268
629
2269
629
      if (NeedsRotate)
2270
617
        Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2271
617
                              VRI.Repl32 ? 
312
:
0615
, VRI.Repl32 ?
302
:
63615
,
2272
617
                              InstCnt);
2273
12
      else
2274
12
        Res = VRI.V;
2275
629
2276
629
      // Now, remove all groups with this underlying value and rotation factor.
2277
629
      if (Res)
2278
679
        
eraseMatchingBitGroups([VRI](const BitGroup &BG) 629
{
2279
679
          return BG.V == VRI.V && 
BG.RLAmt == VRI.RLAmt665
&&
2280
679
                 
BG.Repl32 == VRI.Repl32634
;
2281
679
        });
2282
629
    }
2283
1.23k
2284
1.23k
    // Because 64-bit rotates are more flexible than inserts, we might have a
2285
1.23k
    // preference regarding which one we do first (to save one instruction).
2286
1.23k
    if (!Res)
2287
1.01k
      
for (auto I = BitGroups.begin(), IE = BitGroups.end(); 590
I != IE;
++I425
) {
2288
594
        if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2289
594
                                false) <
2290
594
            SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2291
594
                                true)) {
2292
169
          if (I != BitGroups.begin()) {
2293
1
            BitGroup BG = *I;
2294
1
            BitGroups.erase(I);
2295
1
            BitGroups.insert(BitGroups.begin(), BG);
2296
1
          }
2297
169
2298
169
          break;
2299
169
        }
2300
594
      }
2301
1.23k
2302
1.23k
    // Insert the other groups (one at a time).
2303
1.23k
    for (auto &BG : BitGroups) {
2304
643
      if (!Res)
2305
590
        Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2306
590
                              BG.EndIdx, InstCnt);
2307
53
      else
2308
53
        Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2309
53
                                 BG.StartIdx, BG.EndIdx, InstCnt);
2310
643
    }
2311
1.23k
2312
1.23k
    if (LateMask) {
2313
607
      uint64_t Mask = getZerosMask();
2314
607
2315
607
      // We can use the 32-bit andi/andis technique if the mask does not
2316
607
      // require any higher-order bits. This can save an instruction compared
2317
607
      // to always using the general 64-bit technique.
2318
607
      bool Use32BitInsts = isUInt<32>(Mask);
2319
607
      // Compute the masks for andi/andis that would be necessary.
2320
607
      unsigned ANDIMask = (Mask & UINT16_MAX),
2321
607
               ANDISMask = (Mask >> 16) & UINT16_MAX;
2322
607
2323
607
      if (Use32BitInsts) {
2324
186
        assert((ANDIMask != 0 || ANDISMask != 0) &&
2325
186
               "No set bits in mask when using 32-bit ands for 64-bit value");
2326
186
2327
186
        if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2328
186
                                 (unsigned) (ANDISMask != 0) +
2329
186
                                 (unsigned) (ANDIMask != 0 && 
ANDISMask != 0184
);
2330
186
2331
186
        SDValue ANDIVal, ANDISVal;
2332
186
        if (ANDIMask != 0)
2333
184
          ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
2334
184
                              ExtendToInt64(Res, dl), getI32Imm(ANDIMask, dl)), 0);
2335
186
        if (ANDISMask != 0)
2336
46
          ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
2337
46
                               ExtendToInt64(Res, dl), getI32Imm(ANDISMask, dl)), 0);
2338
186
2339
186
        if (!ANDIVal)
2340
2
          Res = ANDISVal;
2341
184
        else if (!ANDISVal)
2342
140
          Res = ANDIVal;
2343
44
        else
2344
44
          Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2345
44
                          ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2346
421
      } else {
2347
421
        if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1;
2348
421
2349
421
        SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2350
421
        Res =
2351
421
          SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2352
421
                                         ExtendToInt64(Res, dl), MaskVal), 0);
2353
421
      }
2354
607
    }
2355
1.23k
2356
1.23k
    return Res.getNode();
2357
1.23k
  }
2358
2359
1.82k
  SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2360
1.82k
    // Fill in BitGroups.
2361
1.82k
    collectBitGroups(LateMask);
2362
1.82k
    if (BitGroups.empty())
2363
0
      return nullptr;
2364
1.82k
2365
1.82k
    // For 64-bit values, figure out when we can use 32-bit instructions.
2366
1.82k
    if (Bits.size() == 64)
2367
1.23k
      assignRepl32BitGroups();
2368
1.82k
2369
1.82k
    // Fill in ValueRotsVec.
2370
1.82k
    collectValueRotInfo();
2371
1.82k
2372
1.82k
    if (Bits.size() == 32) {
2373
591
      return Select32(N, LateMask, InstCnt);
2374
1.23k
    } else {
2375
1.23k
      assert(Bits.size() == 64 && "Not 64 bits here?");
2376
1.23k
      return Select64(N, LateMask, InstCnt);
2377
1.23k
    }
2378
0
2379
0
    return nullptr;
2380
0
  }
2381
2382
989
  void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2383
989
    BitGroups.erase(remove_if(BitGroups, F), BitGroups.end());
2384
989
  }
2385
2386
  SmallVector<ValueBit, 64> Bits;
2387
2388
  bool NeedMask;
2389
  SmallVector<unsigned, 64> RLAmt;
2390
2391
  SmallVector<BitGroup, 16> BitGroups;
2392
2393
  DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2394
  SmallVector<ValueRotInfo, 16> ValueRotsVec;
2395
2396
  SelectionDAG *CurDAG;
2397
2398
public:
2399
  BitPermutationSelector(SelectionDAG *DAG)
2400
1.94k
    : CurDAG(DAG) {}
2401
2402
  // Here we try to match complex bit permutations into a set of
2403
  // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2404
  // known to produce optimal code for common cases (like i32 byte swapping).
2405
1.94k
  SDNode *Select(SDNode *N) {
2406
1.94k
    Memoizer.clear();
2407
1.94k
    auto Result =
2408
1.94k
        getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2409
1.94k
    if (!Result.first)
2410
990
      return nullptr;
2411
957
    Bits = std::move(*Result.second);
2412
957
2413
957
    LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2414
957
                         " selection for:    ");
2415
957
    LLVM_DEBUG(N->dump(CurDAG));
2416
957
2417
957
    // Fill it RLAmt and set NeedMask.
2418
957
    computeRotationAmounts();
2419
957
2420
957
    if (!NeedMask)
2421
87
      return Select(N, false);
2422
870
2423
870
    // We currently have two techniques for handling results with zeros: early
2424
870
    // masking (the default) and late masking. Late masking is sometimes more
2425
870
    // efficient, but because the structure of the bit groups is different, it
2426
870
    // is hard to tell without generating both and comparing the results. With
2427
870
    // late masking, we ignore zeros in the resulting value when inserting each
2428
870
    // set of bit groups, and then mask in the zeros at the end. With early
2429
870
    // masking, we only insert the non-zero parts of the result at every step.
2430
870
2431
870
    unsigned InstCnt = 0, InstCntLateMask = 0;
2432
870
    LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2433
870
    SDNode *RN = Select(N, false, &InstCnt);
2434
870
    LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2435
870
2436
870
    LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2437
870
    SDNode *RNLM = Select(N, true, &InstCntLateMask);
2438
870
    LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2439
870
                      << " instructions\n");
2440
870
2441
870
    if (InstCnt <= InstCntLateMask) {
2442
869
      LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2443
869
      return RN;
2444
869
    }
2445
1
2446
1
    LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2447
1
    return RNLM;
2448
1
  }
2449
};
2450
2451
class IntegerCompareEliminator {
2452
  SelectionDAG *CurDAG;
2453
  PPCDAGToDAGISel *S;
2454
  // Conversion type for interpreting results of a 32-bit instruction as
2455
  // a 64-bit value or vice versa.
2456
  enum ExtOrTruncConversion { Ext, Trunc };
2457
2458
  // Modifiers to guide how an ISD::SETCC node's result is to be computed
2459
  // in a GPR.
2460
  // ZExtOrig - use the original condition code, zero-extend value
2461
  // ZExtInvert - invert the condition code, zero-extend value
2462
  // SExtOrig - use the original condition code, sign-extend value
2463
  // SExtInvert - invert the condition code, sign-extend value
2464
  enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2465
2466
  // Comparisons against zero to emit GPR code sequences for. Each of these
2467
  // sequences may need to be emitted for two or more equivalent patterns.
2468
  // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2469
  // matters as well as the extension type: sext (-1/0), zext (1/0).
2470
  // GEZExt - (zext (LHS >= 0))
2471
  // GESExt - (sext (LHS >= 0))
2472
  // LEZExt - (zext (LHS <= 0))
2473
  // LESExt - (sext (LHS <= 0))
2474
  enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2475
2476
  SDNode *tryEXTEND(SDNode *N);
2477
  SDNode *tryLogicOpOfCompares(SDNode *N);
2478
  SDValue computeLogicOpInGPR(SDValue LogicOp);
2479
  SDValue signExtendInputIfNeeded(SDValue Input);
2480
  SDValue zeroExtendInputIfNeeded(SDValue Input);
2481
  SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2482
  SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2483
                                        ZeroCompare CmpTy);
2484
  SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2485
                              int64_t RHSValue, SDLoc dl);
2486
 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2487
                              int64_t RHSValue, SDLoc dl);
2488
  SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2489
                              int64_t RHSValue, SDLoc dl);
2490
  SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2491
                              int64_t RHSValue, SDLoc dl);
2492
  SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2493
2494
public:
2495
  IntegerCompareEliminator(SelectionDAG *DAG,
2496
2.49k
                           PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2497
2.49k
    assert(CurDAG->getTargetLoweringInfo()
2498
2.49k
           .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2499
2.49k
           "Only expecting to use this on 64 bit targets.");
2500
2.49k
  }
2501
2.49k
  SDNode *Select(SDNode *N) {
2502
2.49k
    if (CmpInGPR == ICGPR_None)
2503
0
      return nullptr;
2504
2.49k
    switch (N->getOpcode()) {
2505
2.49k
    
default: break0
;
2506
2.49k
    case ISD::ZERO_EXTEND:
2507
750
      if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||
2508
750
          CmpInGPR == ICGPR_SextI64)
2509
0
        return nullptr;
2510
750
      LLVM_FALLTHROUGH;
2511
1.59k
    case ISD::SIGN_EXTEND:
2512
1.59k
      if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||
2513
1.59k
          CmpInGPR == ICGPR_ZextI64)
2514
0
        return nullptr;
2515
1.59k
      return tryEXTEND(N);
2516
1.59k
    case ISD::AND:
2517
903
    case ISD::OR:
2518
903
    case ISD::XOR:
2519
903
      return tryLogicOpOfCompares(N);
2520
0
    }
2521
0
    return nullptr;
2522
0
  }
2523
};
2524
2525
1.61k
static bool isLogicOp(unsigned Opc) {
2526
1.61k
  return Opc == ISD::AND || 
Opc == ISD::OR1.57k
||
Opc == ISD::XOR1.55k
;
2527
1.61k
}
2528
// The obvious case for wanting to keep the value in a GPR. Namely, the
2529
// result of the comparison is actually needed in a GPR.
2530
1.59k
SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2531
1.59k
  assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2532
1.59k
          N->getOpcode() == ISD::SIGN_EXTEND) &&
2533
1.59k
         "Expecting a zero/sign extend node!");
2534
1.59k
  SDValue WideRes;
2535
1.59k
  // If we are zero-extending the result of a logical operation on i1
2536
1.59k
  // values, we can keep the values in GPRs.
2537
1.59k
  if (isLogicOp(N->getOperand(0).getOpcode()) &&
2538
1.59k
      
N->getOperand(0).getValueType() == MVT::i139
&&
2539
1.59k
      
N->getOpcode() == ISD::ZERO_EXTEND24
)
2540
24
    WideRes = computeLogicOpInGPR(N->getOperand(0));
2541
1.56k
  else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2542
470
    return nullptr;
2543
1.09k
  else
2544
1.09k
    WideRes =
2545
1.09k
      getSETCCInGPR(N->getOperand(0),
2546
1.09k
                    N->getOpcode() == ISD::SIGN_EXTEND ?
2547
567
                    
SetccInGPROpts::SExtOrig531
: SetccInGPROpts::ZExtOrig);
2548
1.59k
2549
1.59k
  
if (1.12k
!WideRes1.12k
)
2550
30
    return nullptr;
2551
1.09k
2552
1.09k
  SDLoc dl(N);
2553
1.09k
  bool Input32Bit = WideRes.getValueType() == MVT::i32;
2554
1.09k
  bool Output32Bit = N->getValueType(0) == MVT::i32;
2555
1.09k
2556
1.09k
  NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 
1530
:
0562
;
2557
1.09k
  NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 
0530
:
1562
;
2558
1.09k
2559
1.09k
  SDValue ConvOp = WideRes;
2560
1.09k
  if (Input32Bit != Output32Bit)
2561
413
    ConvOp = addExtOrTrunc(WideRes, Input32Bit ? 
ExtOrTruncConversion::Ext273
:
2562
413
                           
ExtOrTruncConversion::Trunc140
);
2563
1.09k
  return ConvOp.getNode();
2564
1.09k
}
2565
2566
// Attempt to perform logical operations on the results of comparisons while
2567
// keeping the values in GPRs. Without doing so, these would end up being
2568
// lowered to CR-logical operations which suffer from significant latency and
2569
// low ILP.
2570
903
SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2571
903
  if (N->getValueType(0) != MVT::i1)
2572
903
    return nullptr;
2573
0
  assert(isLogicOp(N->getOpcode()) &&
2574
0
         "Expected a logic operation on setcc results.");
2575
0
  SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
2576
0
  if (!LoweredLogical)
2577
0
    return nullptr;
2578
0
2579
0
  SDLoc dl(N);
2580
0
  bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
2581
0
  unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
2582
0
  SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
2583
0
  SDValue LHS = LoweredLogical.getOperand(0);
2584
0
  SDValue RHS = LoweredLogical.getOperand(1);
2585
0
  SDValue WideOp;
2586
0
  SDValue OpToConvToRecForm;
2587
0
2588
0
  // Look through any 32-bit to 64-bit implicit extend nodes to find the
2589
0
  // opcode that is input to the XORI.
2590
0
  if (IsBitwiseNegate &&
2591
0
      LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
2592
0
    OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
2593
0
  else if (IsBitwiseNegate)
2594
0
    // If the input to the XORI isn't an extension, that's what we're after.
2595
0
    OpToConvToRecForm = LoweredLogical.getOperand(0);
2596
0
  else
2597
0
    // If this is not an XORI, it is a reg-reg logical op and we can convert
2598
0
    // it to record-form.
2599
0
    OpToConvToRecForm = LoweredLogical;
2600
0
2601
0
  // Get the record-form version of the node we're looking to use to get the
2602
0
  // CR result from.
2603
0
  uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
2604
0
  int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
2605
0
2606
0
  // Convert the right node to record-form. This is either the logical we're
2607
0
  // looking at or it is the input node to the negation (if we're looking at
2608
0
  // a bitwise negation).
2609
0
  if (NewOpc != -1 && IsBitwiseNegate) {
2610
0
    // The input to the XORI has a record-form. Use it.
2611
0
    assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
2612
0
           "Expected a PPC::XORI8 only for bitwise negation.");
2613
0
    // Emit the record-form instruction.
2614
0
    std::vector<SDValue> Ops;
2615
0
    for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
2616
0
      Ops.push_back(OpToConvToRecForm.getOperand(i));
2617
0
2618
0
    WideOp =
2619
0
      SDValue(CurDAG->getMachineNode(NewOpc, dl,
2620
0
                                     OpToConvToRecForm.getValueType(),
2621
0
                                     MVT::Glue, Ops), 0);
2622
0
  } else {
2623
0
    assert((NewOpc != -1 || !IsBitwiseNegate) &&
2624
0
           "No record form available for AND8/OR8/XOR8?");
2625
0
    WideOp =
2626
0
      SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl,
2627
0
                                     MVT::i64, MVT::Glue, LHS, RHS), 0);
2628
0
  }
2629
0
2630
0
  // Select this node to a single bit from CR0 set by the record-form node
2631
0
  // just created. For bitwise negation, use the EQ bit which is the equivalent
2632
0
  // of negating the result (i.e. it is a bit set when the result of the
2633
0
  // operation is zero).
2634
0
  SDValue SRIdxVal =
2635
0
    CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
2636
0
  SDValue CRBit =
2637
0
    SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2638
0
                                   MVT::i1, CR0Reg, SRIdxVal,
2639
0
                                   WideOp.getValue(1)), 0);
2640
0
  return CRBit.getNode();
2641
0
}
2642
2643
// Lower a logical operation on i1 values into a GPR sequence if possible.
2644
// The result can be kept in a GPR if requested.
2645
// Three types of inputs can be handled:
2646
// - SETCC
2647
// - TRUNCATE
2648
// - Logical operation (AND/OR/XOR)
2649
// There is also a special case that is handled (namely a complement operation
2650
// achieved with xor %a, -1).
2651
43
SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
2652
43
  assert(isLogicOp(LogicOp.getOpcode()) &&
2653
43
        "Can only handle logic operations here.");
2654
43
  assert(LogicOp.getValueType() == MVT::i1 &&
2655
43
         "Can only handle logic operations on i1 values here.");
2656
43
  SDLoc dl(LogicOp);
2657
43
  SDValue LHS, RHS;
2658
43
2659
43
 // Special case: xor %a, -1
2660
43
  bool IsBitwiseNegation = isBitwiseNot(LogicOp);
2661
43
2662
43
  // Produces a GPR sequence for each operand of the binary logic operation.
2663
43
  // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
2664
43
  // the value in a GPR and for logic operations, it will recursively produce
2665
43
  // a GPR sequence for the operation.
2666
86
 auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
2667
86
    unsigned OperandOpcode = Operand.getOpcode();
2668
86
    if (OperandOpcode == ISD::SETCC)
2669
59
      return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
2670
27
    else if (OperandOpcode == ISD::TRUNCATE) {
2671
7
      SDValue InputOp = Operand.getOperand(0);
2672
7
     EVT InVT = InputOp.getValueType();
2673
7
      return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? 
PPC::RLDICL_320
:
2674
7
                                            PPC::RLDICL, dl, InVT, InputOp,
2675
7
                                            S->getI64Imm(0, dl),
2676
7
                                            S->getI64Imm(63, dl)), 0);
2677
20
    } else if (isLogicOp(OperandOpcode))
2678
19
      return computeLogicOpInGPR(Operand);
2679
1
    return SDValue();
2680
1
  };
2681
43
  LHS = getLogicOperand(LogicOp.getOperand(0));
2682
43
  RHS = getLogicOperand(LogicOp.getOperand(1));
2683
43
2684
43
  // If a GPR sequence can't be produced for the LHS we can't proceed.
2685
43
  // Not producing a GPR sequence for the RHS is only a problem if this isn't
2686
43
  // a bitwise negation operation.
2687
43
  if (!LHS || 
(17
!RHS17
&&
!IsBitwiseNegation2
))
2688
28
    return SDValue();
2689
15
2690
15
  NumLogicOpsOnComparison++;
2691
15
2692
15
  // We will use the inputs as 64-bit values.
2693
15
  if (LHS.getValueType() == MVT::i32)
2694
7
    LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
2695
15
  if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
2696
6
    RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
2697
15
2698
15
  unsigned NewOpc;
2699
15
  switch (LogicOp.getOpcode()) {
2700
15
  
default: 0
llvm_unreachable0
("Unknown logic operation.");
2701
15
  
case ISD::AND: NewOpc = PPC::AND8; break8
;
2702
15
  
case ISD::OR: NewOpc = PPC::OR8; break7
;
2703
15
  
case ISD::XOR: NewOpc = PPC::XOR8; break0
;
2704
15
  }
2705
15
2706
15
  if (IsBitwiseNegation) {
2707
0
    RHS = S->getI64Imm(1, dl);
2708
0
    NewOpc = PPC::XORI8;
2709
0
  }
2710
15
2711
15
  return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
2712
15
2713
15
}
2714
2715
/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
2716
/// Otherwise just reinterpret it as a 64-bit value.
2717
/// Useful when emitting comparison code for 32-bit values without using
2718
/// the compare instruction (which only considers the lower 32-bits).
2719
351
SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
2720
351
  assert(Input.getValueType() == MVT::i32 &&
2721
351
         "Can only sign-extend 32-bit values here.");
2722
351
  unsigned Opc = Input.getOpcode();
2723
351
2724
351
  // The value was sign extended and then truncated to 32-bits. No need to
2725
351
  // sign extend it again.
2726
351
  if (Opc == ISD::TRUNCATE &&
2727
351
      
(314
Input.getOperand(0).getOpcode() == ISD::AssertSext314
||
2728
314
       
Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND0
))
2729
314
    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2730
37
2731
37
  LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2732
37
  // The input is a sign-extending load. All ppc sign-extending loads
2733
37
  // sign-extend to the full 64-bits.
2734
37
  if (InputLoad && 
InputLoad->getExtensionType() == ISD::SEXTLOAD0
)
2735
0
    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2736
37
2737
37
  ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
2738
37
  // We don't sign-extend constants.
2739
37
  if (InputConst)
2740
24
    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2741
13
2742
13
  SDLoc dl(Input);
2743
13
  SignExtensionsAdded++;
2744
13
  return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
2745
13
                                        MVT::i64, Input), 0);
2746
13
}
2747
2748
/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
2749
/// Otherwise just reinterpret it as a 64-bit value.
2750
/// Useful when emitting comparison code for 32-bit values without using
2751
/// the compare instruction (which only considers the lower 32-bits).
2752
214
SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
2753
214
  assert(Input.getValueType() == MVT::i32 &&
2754
214
         "Can only zero-extend 32-bit values here.");
2755
214
  unsigned Opc = Input.getOpcode();
2756
214
2757
214
  // The only condition under which we can omit the actual extend instruction:
2758
214
  // - The value is a positive constant
2759
214
  // - The value comes from a load that isn't a sign-extending load
2760
214
  // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
2761
214
  bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
2762
214
    
(192
Input.getOperand(0).getOpcode() == ISD::AssertZext192
||
2763
192
     
Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND0
);
2764
214
  if (IsTruncateOfZExt)
2765
192
    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2766
22
2767
22
  ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
2768
22
  if (InputConst && 
InputConst->getSExtValue() >= 00
)
2769
0
    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2770
22
2771
22
  LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2772
22
  // The input is a load that doesn't sign-extend (it will be zero-extended).
2773
22
  if (InputLoad && 
InputLoad->getExtensionType() != ISD::SEXTLOAD0
)
2774
0
    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2775
22
2776
22
  // None of the above, need to zero-extend.
2777
22
  SDLoc dl(Input);
2778
22
  ZeroExtensionsAdded++;
2779
22
  return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
2780
22
                                        S->getI64Imm(0, dl),
2781
22
                                        S->getI64Imm(32, dl)), 0);
2782
22
}
2783
2784
// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
2785
// course not actual zero/sign extensions that will generate machine code,
2786
// they're just a way to reinterpret a 32 bit value in a register as a
2787
// 64 bit value and vice-versa.
2788
SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
2789
956
                                                ExtOrTruncConversion Conv) {
2790
956
  SDLoc dl(NatWidthRes);
2791
956
2792
956
  // For reinterpreting 32-bit values as 64 bit values, we generate
2793
956
  // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
2794
956
  if (Conv == ExtOrTruncConversion::Ext) {
2795
816
    SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
2796
816
    SDValue SubRegIdx =
2797
816
      CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2798
816
    return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
2799
816
                                          ImDef, NatWidthRes, SubRegIdx), 0);
2800
816
  }
2801
140
2802
140
  assert(Conv == ExtOrTruncConversion::Trunc &&
2803
140
         "Unknown convertion between 32 and 64 bit values.");
2804
140
  // For reinterpreting 64-bit values as 32-bit values, we just need to
2805
140
  // EXTRACT_SUBREG (i.e. extract the low word).
2806
140
  SDValue SubRegIdx =
2807
140
    CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2808
140
  return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
2809
140
                                        NatWidthRes, SubRegIdx), 0);
2810
140
}
2811
2812
// Produce a GPR sequence for compound comparisons (<=, >=) against zero.
2813
// Handle both zero-extensions and sign-extensions.
2814
SDValue
2815
IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2816
21
                                                         ZeroCompare CmpTy) {
2817
21
  EVT InVT = LHS.getValueType();
2818
21
  bool Is32Bit = InVT == MVT::i32;
2819
21
  SDValue ToExtend;
2820
21
2821
21
  // Produce the value that needs to be either zero or sign extended.
2822
21
  switch (CmpTy) {
2823
21
  case ZeroCompare::GEZExt:
2824
4
  case ZeroCompare::GESExt:
2825
4
    ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : 
PPC::NOR80
,
2826
4
                                              dl, InVT, LHS, LHS), 0);
2827
4
    break;
2828
17
  case ZeroCompare::LEZExt:
2829
17
  case ZeroCompare::LESExt: {
2830
17
    if (Is32Bit) {
2831
1
      // Upper 32 bits cannot be undefined for this sequence.
2832
1
      LHS = signExtendInputIfNeeded(LHS);
2833
1
      SDValue Neg =
2834
1
        SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2835
1
      ToExtend =
2836
1
        SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2837
1
                                       Neg, S->getI64Imm(1, dl),
2838
1
                                       S->getI64Imm(63, dl)), 0);
2839
16
    } else {
2840
16
      SDValue Addi =
2841
16
        SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
2842
16
                                       S->getI64Imm(~0ULL, dl)), 0);
2843
16
      ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2844
16
                                                Addi, LHS), 0);
2845
16
    }
2846
17
    break;
2847
21
  }
2848
21
  }
2849
21
2850
21
  // For 64-bit sequences, the extensions are the same for the GE/LE cases.
2851
21
  if (!Is32Bit &&
2852
21
      
(16
CmpTy == ZeroCompare::GEZExt16
||
CmpTy == ZeroCompare::LEZExt16
))
2853
8
    return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2854
8
                                          ToExtend, S->getI64Imm(1, dl),
2855
8
                                          S->getI64Imm(63, dl)), 0);
2856
13
  if (!Is32Bit &&
2857
13
      
(8
CmpTy == ZeroCompare::GESExt8
||
CmpTy == ZeroCompare::LESExt8
))
2858
8
    return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
2859
8
                                          S->getI64Imm(63, dl)), 0);
2860
5
2861
5
  assert(Is32Bit && "Should have handled the 32-bit sequences above.");
2862
5
  // For 32-bit sequences, the extensions differ between GE/LE cases.
2863
5
  switch (CmpTy) {
2864
5
  case ZeroCompare::GEZExt: {
2865
3
    SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
2866
3
                           S->getI32Imm(31, dl) };
2867
3
    return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2868
3
                                          ShiftOps), 0);
2869
5
  }
2870
5
  case ZeroCompare::GESExt:
2871
1
    return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
2872
1
                                          S->getI32Imm(31, dl)), 0);
2873
5
  case ZeroCompare::LEZExt:
2874
1
    return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
2875
1
                                          S->getI32Imm(1, dl)), 0);
2876
5
  case ZeroCompare::LESExt:
2877
0
    return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
2878
0
                                          S->getI32Imm(-1, dl)), 0);
2879
0
  }
2880
0
2881
0
  // The above case covers all the enumerators so it can't have a default clause
2882
0
  // to avoid compiler warnings.
2883
0
  llvm_unreachable("Unknown zero-comparison type.");
2884
0
}
2885
2886
/// Produces a zero-extended result of comparing two 32-bit values according to
2887
/// the passed condition code.
2888
SDValue
2889
IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
2890
                                              ISD::CondCode CC,
2891
378
                                              int64_t RHSValue, SDLoc dl) {
2892
378
  if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
2893
378
      CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext)
2894
0
    return SDValue();
2895
378
  bool IsRHSZero = RHSValue == 0;
2896
378
  bool IsRHSOne = RHSValue == 1;
2897
378
  bool IsRHSNegOne = RHSValue == -1LL;
2898
378
  switch (CC) {
2899
378
  
default: return SDValue()0
;
2900
378
  case ISD::SETEQ: {
2901
153
    // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
2902
153
    // (zext (setcc %a, 0, seteq))  -> (lshr (cntlzw %a), 5)
2903
153
    SDValue Xor = IsRHSZero ? 
LHS103
:
2904
153
      
SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0)50
;
2905
153
    SDValue Clz =
2906
153
      SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
2907
153
    SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
2908
153
      S->getI32Imm(31, dl) };
2909
153
    return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2910
153
                                          ShiftOps), 0);
2911
378
  }
2912
378
  case ISD::SETNE: {
2913
117
    // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
2914
117
    // (zext (setcc %a, 0, setne))  -> (xor (lshr (cntlzw %a), 5), 1)
2915
117
    SDValue Xor = IsRHSZero ? 
LHS80
:
2916
117
      
SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0)37
;
2917
117
    SDValue Clz =
2918
117
      SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
2919
117
    SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
2920
117
      S->getI32Imm(31, dl) };
2921
117
    SDValue Shift =
2922
117
      SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
2923
117
    return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
2924
117
                                          S->getI32Imm(1, dl)), 0);
2925
378
  }
2926
378
  case ISD::SETGE: {
2927
24
    // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
2928
24
    // (zext (setcc %a, 0, setge))  -> (lshr (~ %a), 31)
2929
24
    if(IsRHSZero)
2930
0
      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
2931
24
2932
24
    // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
2933
24
    // by swapping inputs and falling through.
2934
24
    std::swap(LHS, RHS);
2935
24
    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2936
24
    IsRHSZero = RHSConst && 
RHSConst->isNullValue()0
;
2937
24
    LLVM_FALLTHROUGH;
2938
24
  }
2939
50
  case ISD::SETLE: {
2940
50
    if (CmpInGPR == ICGPR_NonExtIn)
2941
0
      return SDValue();
2942
50
    // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
2943
50
    // (zext (setcc %a, 0, setle))  -> (xor (lshr (- %a), 63), 1)
2944
50
    if(IsRHSZero) {
2945
0
      if (CmpInGPR == ICGPR_NonExtIn)
2946
0
        return SDValue();
2947
0
      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
2948
0
    }
2949
50
2950
50
    // The upper 32-bits of the register can't be undefined for this sequence.
2951
50
    LHS = signExtendInputIfNeeded(LHS);
2952
50
    RHS = signExtendInputIfNeeded(RHS);
2953
50
    SDValue Sub =
2954
50
      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
2955
50
    SDValue Shift =
2956
50
      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
2957
50
                                     S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
2958
50
              0);
2959
50
    return
2960
50
      SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
2961
50
                                     MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
2962
50
  }
2963
50
  case ISD::SETGT: {
2964
30
    // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
2965
30
    // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
2966
30
    // (zext (setcc %a, 0, setgt))  -> (lshr (- %a), 63)
2967
30
    // Handle SETLT -1 (which is equivalent to SETGE 0).
2968
30
    if (IsRHSNegOne)
2969
3
      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
2970
27
2971
27
    if (IsRHSZero) {
2972
12
      if (CmpInGPR == ICGPR_NonExtIn)
2973
0
        return SDValue();
2974
12
      // The upper 32-bits of the register can't be undefined for this sequence.
2975
12
      LHS = signExtendInputIfNeeded(LHS);
2976
12
      RHS = signExtendInputIfNeeded(RHS);
2977
12
      SDValue Neg =
2978
12
        SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2979
12
      return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2980
12
                     Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
2981
12
    }
2982
15
    // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
2983
15
    // (%b < %a) by swapping inputs and falling through.
2984
15
    std::swap(LHS, RHS);
2985
15
    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2986
15
    IsRHSZero = RHSConst && 
RHSConst->isNullValue()0
;
2987
15
    IsRHSOne = RHSConst && 
RHSConst->getSExtValue() == 10
;
2988
15
    LLVM_FALLTHROUGH;
2989
15
  }
2990
33
  case ISD::SETLT: {
2991
33
    // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
2992
33
    // (zext (setcc %a, 1, setlt))  -> (xor (lshr (- %a), 63), 1)
2993
33
    // (zext (setcc %a, 0, setlt))  -> (lshr %a, 31)
2994
33
    // Handle SETLT 1 (which is equivalent to SETLE 0).
2995
33
    if (IsRHSOne) {
2996
1
      if (CmpInGPR == ICGPR_NonExtIn)
2997
0
        return SDValue();
2998
1
      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
2999
1
    }
3000
32
3001
32
    if (IsRHSZero) {
3002
3
      SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3003
3
                             S->getI32Imm(31, dl) };
3004
3
      return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3005
3
                                            ShiftOps), 0);
3006
3
    }
3007
29
3008
29
    if (CmpInGPR == ICGPR_NonExtIn)
3009
0
      return SDValue();
3010
29
    // The upper 32-bits of the register can't be undefined for this sequence.
3011
29
    LHS = signExtendInputIfNeeded(LHS);
3012
29
    RHS = signExtendInputIfNeeded(RHS);
3013
29
    SDValue SUBFNode =
3014
29
      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3015
29
    return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3016
29
                                    SUBFNode, S->getI64Imm(1, dl),
3017
29
                                    S->getI64Imm(63, dl)), 0);
3018
29
  }
3019
29
  case ISD::SETUGE:
3020
0
    // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3021
0
    // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3022
0
    std::swap(LHS, RHS);
3023
0
    LLVM_FALLTHROUGH;
3024
2
  case ISD::SETULE: {
3025
2
    if (CmpInGPR == ICGPR_NonExtIn)
3026
0
      return SDValue();
3027
2
    // The upper 32-bits of the register can't be undefined for this sequence.
3028
2
    LHS = zeroExtendInputIfNeeded(LHS);
3029
2
    RHS = zeroExtendInputIfNeeded(RHS);
3030
2
    SDValue Subtract =
3031
2
      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3032
2
    SDValue SrdiNode =
3033
2
      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3034
2
                                          Subtract, S->getI64Imm(1, dl),
3035
2
                                          S->getI64Imm(63, dl)), 0);
3036
2
    return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3037
2
                                            S->getI32Imm(1, dl)), 0);
3038
2
  }
3039
4
  case ISD::SETUGT:
3040
4
    // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3041
4
    // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3042
4
    std::swap(LHS, RHS);
3043
4
    LLVM_FALLTHROUGH;
3044
8
  case ISD::SETULT: {
3045
8
    if (CmpInGPR == ICGPR_NonExtIn)
3046
0
      return SDValue();
3047
8
    // The upper 32-bits of the register can't be undefined for this sequence.
3048
8
    LHS = zeroExtendInputIfNeeded(LHS);
3049
8
    RHS = zeroExtendInputIfNeeded(RHS);
3050
8
    SDValue Subtract =
3051
8
      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3052
8
    return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3053
8
                                          Subtract, S->getI64Imm(1, dl),
3054
8
                                          S->getI64Imm(63, dl)), 0);
3055
8
  }
3056
378
  }
3057
378
}
3058
3059
/// Produces a sign-extended result of comparing two 32-bit values according to
3060
/// the passed condition code.
3061
SDValue
3062
IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3063
                                              ISD::CondCode CC,
3064
385
                                              int64_t RHSValue, SDLoc dl) {
3065
385
  if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
3066
385
      CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext)
3067
0
    return SDValue();
3068
385
  bool IsRHSZero = RHSValue == 0;
3069
385
  bool IsRHSOne = RHSValue == 1;
3070
385
  bool IsRHSNegOne = RHSValue == -1LL;
3071
385
3072
385
  switch (CC) {
3073
385
  
default: return SDValue()0
;
3074
385
  case ISD::SETEQ: {
3075
120
    // (sext (setcc %a, %b, seteq)) ->
3076
120
    //   (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3077
120
    // (sext (setcc %a, 0, seteq)) ->
3078
120
    //   (ashr (shl (ctlz %a), 58), 63)
3079
120
    SDValue CountInput = IsRHSZero ? 
LHS72
:
3080
120
      
SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0)48
;
3081
120
    SDValue Cntlzw =
3082
120
      SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
3083
120
    SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
3084
120
                         S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3085
120
    SDValue Slwi =
3086
120
      SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
3087
120
    return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
3088
385
  }
3089
385
  case ISD::SETNE: {
3090
75
    // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3091
75
    // flip the bit, finally take 2's complement.
3092
75
    // (sext (setcc %a, %b, setne)) ->
3093
75
    //   (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3094
75
    // Same as above, but the first xor is not needed.
3095
75
    // (sext (setcc %a, 0, setne)) ->
3096
75
    //   (neg (xor (lshr (ctlz %a), 5), 1))
3097
75
    SDValue Xor = IsRHSZero ? 
LHS48
:
3098
75
      
SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0)27
;
3099
75
    SDValue Clz =
3100
75
      SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3101
75
    SDValue ShiftOps[] =
3102
75
      { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3103
75
    SDValue Shift =
3104
75
      SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3105
75
    SDValue Xori =
3106
75
      SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3107
75
                                     S->getI32Imm(1, dl)), 0);
3108
75
    return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
3109
385
  }
3110
385
  case ISD::SETGE: {
3111
24
    // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3112
24
    // (sext (setcc %a, 0, setge))  -> (ashr (~ %a), 31)
3113
24
    if (IsRHSZero)
3114
0
      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3115
24
3116
24
    // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3117
24
    // by swapping inputs and falling through.
3118
24
    std::swap(LHS, RHS);
3119
24
    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3120
24
    IsRHSZero = RHSConst && 
RHSConst->isNullValue()0
;
3121
24
    LLVM_FALLTHROUGH;
3122
24
  }
3123
48
  case ISD::SETLE: {
3124
48
    if (CmpInGPR == ICGPR_NonExtIn)
3125
0
      return SDValue();
3126
48
    // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3127
48
    // (sext (setcc %a, 0, setle))  -> (add (lshr (- %a), 63), -1)
3128
48
    if (IsRHSZero)
3129
0
      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3130
48
3131
48
    // The upper 32-bits of the register can't be undefined for this sequence.
3132
48
    LHS = signExtendInputIfNeeded(LHS);
3133
48
    RHS = signExtendInputIfNeeded(RHS);
3134
48
    SDValue SUBFNode =
3135
48
      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
3136
48
                                     LHS, RHS), 0);
3137
48
    SDValue Srdi =
3138
48
      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3139
48
                                     SUBFNode, S->getI64Imm(1, dl),
3140
48
                                     S->getI64Imm(63, dl)), 0);
3141
48
    return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
3142
48
                                          S->getI32Imm(-1, dl)), 0);
3143
48
  }
3144
48
  case ISD::SETGT: {
3145
25
    // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3146
25
    // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3147
25
    // (sext (setcc %a, 0, setgt))  -> (ashr (- %a), 63)
3148
25
    if (IsRHSNegOne)
3149
1
      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3150
24
    if (IsRHSZero) {
3151
12
      if (CmpInGPR == ICGPR_NonExtIn)
3152
0
        return SDValue();
3153
12
      // The upper 32-bits of the register can't be undefined for this sequence.
3154
12
      LHS = signExtendInputIfNeeded(LHS);
3155
12
      RHS = signExtendInputIfNeeded(RHS);
3156
12
      SDValue Neg =
3157
12
        SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3158
12
        return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3159
12
                                              S->getI64Imm(63, dl)), 0);
3160
12
    }
3161
12
    // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3162
12
    // (%b < %a) by swapping inputs and falling through.
3163
12
    std::swap(LHS, RHS);
3164
12
    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3165
12
    IsRHSZero = RHSConst && 
RHSConst->isNullValue()0
;
3166
12
    IsRHSOne = RHSConst && 
RHSConst->getSExtValue() == 10
;
3167
12
    LLVM_FALLTHROUGH;
3168
12
  }
3169
32
  case ISD::SETLT: {
3170
32
    // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3171
32
    // (sext (setcc %a, 1, setgt))  -> (add (lshr (- %a), 63), -1)
3172
32
    // (sext (setcc %a, 0, setgt))  -> (ashr %a, 31)
3173
32
    if (IsRHSOne) {
3174
0
      if (CmpInGPR == ICGPR_NonExtIn)
3175
0
        return SDValue();
3176
0
      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3177
0
    }
3178
32
    if (IsRHSZero)
3179
8
      return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3180
8
                                            S->getI32Imm(31, dl)), 0);
3181
24
3182
24
    if (CmpInGPR == ICGPR_NonExtIn)
3183
0
      return SDValue();
3184
24
    // The upper 32-bits of the register can't be undefined for this sequence.
3185
24
    LHS = signExtendInputIfNeeded(LHS);
3186
24
    RHS = signExtendInputIfNeeded(RHS);
3187
24
    SDValue SUBFNode =
3188
24
      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3189
24
    return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3190
24
                                          SUBFNode, S->getI64Imm(63, dl)), 0);
3191
24
  }
3192
24
  case ISD::SETUGE:
3193
24
    // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3194
24
    // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3195
24
    std::swap(LHS, RHS);
3196
24
    LLVM_FALLTHROUGH;
3197
48
  case ISD::SETULE: {
3198
48
    if (CmpInGPR == ICGPR_NonExtIn)
3199
0
      return SDValue();
3200
48
    // The upper 32-bits of the register can't be undefined for this sequence.
3201
48
    LHS = zeroExtendInputIfNeeded(LHS);
3202
48
    RHS = zeroExtendInputIfNeeded(RHS);
3203
48
    SDValue Subtract =
3204
48
      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3205
48
    SDValue Shift =
3206
48
      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3207
48
                                     S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3208
48
              0);
3209
48
    return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3210
48
                                          S->getI32Imm(-1, dl)), 0);
3211
48
  }
3212
48
  case ISD::SETUGT:
3213
24
    // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3214
24
    // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3215
24
    std::swap(LHS, RHS);
3216
24
    LLVM_FALLTHROUGH;
3217
49
  case ISD::SETULT: {
3218
49
    if (CmpInGPR == ICGPR_NonExtIn)
3219
0
      return SDValue();
3220
49
    // The upper 32-bits of the register can't be undefined for this sequence.
3221
49
    LHS = zeroExtendInputIfNeeded(LHS);
3222
49
    RHS = zeroExtendInputIfNeeded(RHS);
3223
49
    SDValue Subtract =
3224
49
      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3225
49
    return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3226
49
                                          Subtract, S->getI64Imm(63, dl)), 0);
3227
49
  }
3228
385
  }
3229
385
}
3230
3231
/// Produces a zero-extended result of comparing two 64-bit values according to
3232
/// the passed condition code.
3233
SDValue
3234
IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3235
                                              ISD::CondCode CC,
3236
196
                                              int64_t RHSValue, SDLoc dl) {
3237
196
  if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3238
196
      CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext)
3239
0
    return SDValue();
3240
196
  bool IsRHSZero = RHSValue == 0;
3241
196
  bool IsRHSOne = RHSValue == 1;
3242
196
  bool IsRHSNegOne = RHSValue == -1LL;
3243
196
  switch (CC) {
3244
196
  
default: return SDValue()0
;
3245
196
  case ISD::SETEQ: {
3246
62
    // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3247
62
    // (zext (setcc %a, 0, seteq)) ->  (lshr (ctlz %a), 6)
3248
62
    SDValue Xor = IsRHSZero ? 
LHS28
:
3249
62
      
SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0)34
;
3250
62
    SDValue Clz =
3251
62
      SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3252
62
    return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3253
62
                                          S->getI64Imm(58, dl),
3254
62
                                          S->getI64Imm(63, dl)), 0);
3255
196
  }
3256
196
  case ISD::SETNE: {
3257
54
    // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3258
54
    // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3259
54
    // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3260
54
    // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3261
54
    SDValue Xor = IsRHSZero ? 
LHS24
:
3262
54
      
SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0)30
;
3263
54
    SDValue AC =
3264
54
      SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3265
54
                                     Xor, S->getI32Imm(~0U, dl)), 0);
3266
54
    return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3267
54
                                          Xor, AC.getValue(1)), 0);
3268
196
  }
3269
196
  case ISD::SETGE: {
3270
8
    // {subc.reg, subc.CA} = (subcarry %a, %b)
3271
8
    // (zext (setcc %a, %b, setge)) ->
3272
8
    //   (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3273
8
    // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3274
8
    if (IsRHSZero)
3275
0
      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3276
8
    std::swap(LHS, RHS);
3277
8
    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3278
8
    IsRHSZero = RHSConst && 
RHSConst->isNullValue()0
;
3279
8
    LLVM_FALLTHROUGH;
3280
8
  }
3281
16
  case ISD::SETLE: {
3282
16
    // {subc.reg, subc.CA} = (subcarry %b, %a)
3283
16
    // (zext (setcc %a, %b, setge)) ->
3284
16
    //   (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3285
16
    // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3286
16
    if (IsRHSZero)
3287
0
      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3288
16
    SDValue ShiftL =
3289
16
      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3290
16
                                     S->getI64Imm(1, dl),
3291
16
                                     S->getI64Imm(63, dl)), 0);
3292
16
    SDValue ShiftR =
3293
16
      SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3294
16
                                     S->getI64Imm(63, dl)), 0);
3295
16
    SDValue SubtractCarry =
3296
16
      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3297
16
                                     LHS, RHS), 1);
3298
16
    return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3299
16
                                          ShiftR, ShiftL, SubtractCarry), 0);
3300
16
  }
3301
21
  case ISD::SETGT: {
3302
21
    // {subc.reg, subc.CA} = (subcarry %b, %a)
3303
21
    // (zext (setcc %a, %b, setgt)) ->
3304
21
    //   (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3305
21
    // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3306
21
    if (IsRHSNegOne)
3307
0
      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3308
21
    if (IsRHSZero) {
3309
10
      SDValue Addi =
3310
10
        SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3311
10
                                       S->getI64Imm(~0ULL, dl)), 0);
3312
10
      SDValue Nor =
3313
10
        SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3314
10
      return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3315
10
                                            S->getI64Imm(1, dl),
3316
10
                                            S->getI64Imm(63, dl)), 0);
3317
10
    }
3318
11
    std::swap(LHS, RHS);
3319
11
    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3320
11
    IsRHSZero = RHSConst && 
RHSConst->isNullValue()0
;
3321
11
    IsRHSOne = RHSConst && 
RHSConst->getSExtValue() == 10
;
3322
11
    LLVM_FALLTHROUGH;
3323
11
  }
3324
29
  case ISD::SETLT: {
3325
29
    // {subc.reg, subc.CA} = (subcarry %a, %b)
3326
29
    // (zext (setcc %a, %b, setlt)) ->
3327
29
    //   (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3328
29
    // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3329
29
    if (IsRHSOne)
3330
8
      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3331
21
    if (IsRHSZero)
3332
0
      return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3333
0
                                            S->getI64Imm(1, dl),
3334
0
                                            S->getI64Imm(63, dl)), 0);
3335
21
    SDValue SRADINode =
3336
21
      SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3337
21
                                     LHS, S->getI64Imm(63, dl)), 0);
3338
21
    SDValue SRDINode =
3339
21
      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3340
21
                                     RHS, S->getI64Imm(1, dl),
3341
21
                                     S->getI64Imm(63, dl)), 0);
3342
21
    SDValue SUBFC8Carry =
3343
21
      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3344
21
                                     RHS, LHS), 1);
3345
21
    SDValue ADDE8Node =
3346
21
      SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3347
21
                                     SRDINode, SRADINode, SUBFC8Carry), 0);
3348
21
    return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3349
21
                                          ADDE8Node, S->getI64Imm(1, dl)), 0);
3350
21
  }
3351
21
  case ISD::SETUGE:
3352
8
    // {subc.reg, subc.CA} = (subcarry %a, %b)
3353
8
    // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3354
8
    std::swap(LHS, RHS);
3355
8
    LLVM_FALLTHROUGH;
3356
16
  case ISD::SETULE: {
3357
16
    // {subc.reg, subc.CA} = (subcarry %b, %a)
3358
16
    // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3359
16
    SDValue SUBFC8Carry =
3360
16
      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3361
16
                                     LHS, RHS), 1);
3362
16
    SDValue SUBFE8Node =
3363
16
      SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3364
16
                                     LHS, LHS, SUBFC8Carry), 0);
3365
16
    return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3366
16
                                          SUBFE8Node, S->getI64Imm(1, dl)), 0);
3367
8
  }
3368
8
  case ISD::SETUGT:
3369
2
    // {subc.reg, subc.CA} = (subcarry %b, %a)
3370
2
    // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3371
2
    std::swap(LHS, RHS);
3372
2
    LLVM_FALLTHROUGH;
3373
9
  case ISD::SETULT: {
3374
9
    // {subc.reg, subc.CA} = (subcarry %a, %b)
3375
9
    // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3376
9
    SDValue SubtractCarry =
3377
9
      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3378
9
                                     RHS, LHS), 1);
3379
9
    SDValue ExtSub =
3380
9
      SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3381
9
                                     LHS, LHS, SubtractCarry), 0);
3382
9
    return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3383
9
                                          ExtSub), 0);
3384
2
  }
3385
196
  }
3386
196
}
3387
3388
/// Produces a sign-extended result of comparing two 64-bit values according to
3389
/// the passed condition code.
3390
SDValue
3391
IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3392
                                              ISD::CondCode CC,
3393
145
                                              int64_t RHSValue, SDLoc dl) {
3394
145
  if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3395
145
      CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext)
3396
0
    return SDValue();
3397
145
  bool IsRHSZero = RHSValue == 0;
3398
145
  bool IsRHSOne = RHSValue == 1;
3399
145
  bool IsRHSNegOne = RHSValue == -1LL;
3400
145
  switch (CC) {
3401
145
  
default: return SDValue()0
;
3402
145
  case ISD::SETEQ: {
3403
40
    // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3404
40
    // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3405
40
    // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3406
40
    // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3407
40
    SDValue AddInput = IsRHSZero ? 
LHS24
:
3408
40
      
SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0)16
;
3409
40
    SDValue Addic =
3410
40
      SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3411
40
                                     AddInput, S->getI32Imm(~0U, dl)), 0);
3412
40
    return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3413
40
                                          Addic, Addic.getValue(1)), 0);
3414
145
  }
3415
145
  case ISD::SETNE: {
3416
37
    // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3417
37
    // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3418
37
    // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3419
37
    // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3420
37
    SDValue Xor = IsRHSZero ? 
LHS16
:
3421
37
      
SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0)21
;
3422
37
    SDValue SC =
3423
37
      SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3424
37
                                     Xor, S->getI32Imm(0, dl)), 0);
3425
37
    return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3426
37
                                          SC, SC.getValue(1)), 0);
3427
145
  }
3428
145
  case ISD::SETGE: {
3429
8
    // {subc.reg, subc.CA} = (subcarry %a, %b)
3430
8
    // (zext (setcc %a, %b, setge)) ->
3431
8
    //   (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3432
8
    // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3433
8
    if (IsRHSZero)
3434
0
      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3435
8
    std::swap(LHS, RHS);
3436
8
    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3437
8
    IsRHSZero = RHSConst && 
RHSConst->isNullValue()0
;
3438
8
    LLVM_FALLTHROUGH;
3439
8
  }
3440
16
  case ISD::SETLE: {
3441
16
    // {subc.reg, subc.CA} = (subcarry %b, %a)
3442
16
    // (zext (setcc %a, %b, setge)) ->
3443
16
    //   (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3444
16
    // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3445
16
    if (IsRHSZero)
3446
0
      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3447
16
    SDValue ShiftR =
3448
16
      SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3449
16
                                     S->getI64Imm(63, dl)), 0);
3450
16
    SDValue ShiftL =
3451
16
      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3452
16
                                     S->getI64Imm(1, dl),
3453
16
                                     S->getI64Imm(63, dl)), 0);
3454
16
    SDValue SubtractCarry =
3455
16
      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3456
16
                                     LHS, RHS), 1);
3457
16
    SDValue Adde =
3458
16
      SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3459
16
                                     ShiftR, ShiftL, SubtractCarry), 0);
3460
16
    return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3461
16
  }
3462
18
  case ISD::SETGT: {
3463
18
    // {subc.reg, subc.CA} = (subcarry %b, %a)
3464
18
    // (zext (setcc %a, %b, setgt)) ->
3465
18
    //   -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3466
18
    // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3467
18
    if (IsRHSNegOne)
3468
0
      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3469
18
    if (IsRHSZero) {
3470
8
      SDValue Add =
3471
8
        SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3472
8
                                       S->getI64Imm(-1, dl)), 0);
3473
8
      SDValue Nor =
3474
8
        SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3475
8
      return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3476
8
                                            S->getI64Imm(63, dl)), 0);
3477
8
    }
3478
10
    std::swap(LHS, RHS);
3479
10
    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3480
10
    IsRHSZero = RHSConst && 
RHSConst->isNullValue()0
;
3481
10
    IsRHSOne = RHSConst && 
RHSConst->getSExtValue() == 10
;
3482
10
    LLVM_FALLTHROUGH;
3483
10
  }
3484
28
  case ISD::SETLT: {
3485
28
    // {subc.reg, subc.CA} = (subcarry %a, %b)
3486
28
    // (zext (setcc %a, %b, setlt)) ->
3487
28
    //   -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3488
28
    // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3489
28
    if (IsRHSOne)
3490
8
      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3491
20
    if (IsRHSZero) {
3492
0
      return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3493
0
                                            S->getI64Imm(63, dl)), 0);
3494
0
    }
3495
20
    SDValue SRADINode =
3496
20
      SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3497
20
                                     LHS, S->getI64Imm(63, dl)), 0);
3498
20
    SDValue SRDINode =
3499
20
      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3500
20
                                     RHS, S->getI64Imm(1, dl),
3501
20
                                     S->getI64Imm(63, dl)), 0);
3502
20
    SDValue SUBFC8Carry =
3503
20
      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3504
20
                                     RHS, LHS), 1);
3505
20
    SDValue ADDE8Node =
3506
20
      SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3507
20
                                     SRDINode, SRADINode, SUBFC8Carry), 0);
3508
20
    SDValue XORI8Node =
3509
20
      SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3510
20
                                     ADDE8Node, S->getI64Imm(1, dl)), 0);
3511
20
    return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3512
20
                                          XORI8Node), 0);
3513
20
  }
3514
20
  case ISD::SETUGE:
3515
8
    // {subc.reg, subc.CA} = (subcarry %a, %b)
3516
8
    // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3517
8
    std::swap(LHS, RHS);
3518
8
    LLVM_FALLTHROUGH;
3519
16
  case ISD::SETULE: {
3520
16
    // {subc.reg, subc.CA} = (subcarry %b, %a)
3521
16
    // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3522
16
    SDValue SubtractCarry =
3523
16
      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3524
16
                                     LHS, RHS), 1);
3525
16
    SDValue ExtSub =
3526
16
      SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3527
16
                                     LHS, SubtractCarry), 0);
3528
16
    return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3529
16
                                          ExtSub, ExtSub), 0);
3530
8
  }
3531
8
  case ISD::SETUGT:
3532
0
    // {subc.reg, subc.CA} = (subcarry %b, %a)
3533
0
    // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3534
0
    std::swap(LHS, RHS);
3535
0
    LLVM_FALLTHROUGH;
3536
0
  case ISD::SETULT: {
3537
0
    // {subc.reg, subc.CA} = (subcarry %a, %b)
3538
0
    // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3539
0
    SDValue SubCarry =
3540
0
      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3541
0
                                     RHS, LHS), 1);
3542
0
    return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3543
0
                                     LHS, LHS, SubCarry), 0);
3544
0
  }
3545
145
  }
3546
145
}
3547
3548
/// Do all uses of this SDValue need the result in a GPR?
3549
/// This is meant to be used on values that have type i1 since
3550
/// it is somewhat meaningless to ask if values of other types
3551
/// should be kept in GPR's.
3552
1.15k
static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3553
1.15k
  assert(Compare.getOpcode() == ISD::SETCC &&
3554
1.15k
         "An ISD::SETCC node required here.");
3555
1.15k
3556
1.15k
  // For values that have a single use, the caller should obviously already have
3557
1.15k
  // checked if that use is an extending use. We check the other uses here.
3558
1.15k
  if (Compare.hasOneUse())
3559
1.15k
    return true;
3560
1
  // We want the value in a GPR if it is being extended, used for a select, or
3561
1
  // used in logical operations.
3562
1
  for (auto CompareUse : Compare.getNode()->uses())
3563
2
    if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3564
2
        CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3565
2
        
CompareUse->getOpcode() != ISD::SELECT1
&&
3566
2
        
!isLogicOp(CompareUse->getOpcode())1
) {
3567
1
      OmittedForNonExtendUses++;
3568
1
      return false;
3569
1
    }
3570
1
  
return true0
;
3571
1
}
3572
3573
/// Returns an equivalent of a SETCC node but with the result the same width as
3574
/// the inputs. This can also be used for SELECT_CC if either the true or false
3575
/// values is a power of two while the other is zero.
3576
SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
3577
1.15k
                                                SetccInGPROpts ConvOpts) {
3578
1.15k
  assert((Compare.getOpcode() == ISD::SETCC ||
3579
1.15k
          Compare.getOpcode() == ISD::SELECT_CC) &&
3580
1.15k
         "An ISD::SETCC node required here.");
3581
1.15k
3582
1.15k
  // Don't convert this comparison to a GPR sequence because there are uses
3583
1.15k
  // of the i1 result (i.e. uses that require the result in the CR).
3584
1.15k
  if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
3585
1
    return SDValue();
3586
1.15k
3587
1.15k
  SDValue LHS = Compare.getOperand(0);
3588
1.15k
  SDValue RHS = Compare.getOperand(1);
3589
1.15k
3590
1.15k
  // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
3591
1.15k
  int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 
40
: 2;
3592
1.15k
  ISD::CondCode CC =
3593
1.15k
    cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
3594
1.15k
  EVT InputVT = LHS.getValueType();
3595
1.15k
  if (InputVT != MVT::i32 && 
InputVT != MVT::i64393
)
3596
52
    return SDValue();
3597
1.10k
3598
1.10k
  if (ConvOpts == SetccInGPROpts::ZExtInvert ||
3599
1.10k
      ConvOpts == SetccInGPROpts::SExtInvert)
3600
0
    CC = ISD::getSetCCInverse(CC, true);
3601
1.10k
3602
1.10k
  bool Inputs32Bit = InputVT == MVT::i32;
3603
1.10k
3604
1.10k
  SDLoc dl(Compare);
3605
1.10k
  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3606
1.10k
  int64_t RHSValue = RHSConst ? 
RHSConst->getSExtValue()489
: INT64_MAX;
3607
1.10k
  bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
3608
1.10k
    
ConvOpts == SetccInGPROpts::SExtInvert574
;
3609
1.10k
3610
1.10k
  if (IsSext && 
Inputs32Bit530
)
3611
385
    return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3612
719
  else if (Inputs32Bit)
3613
378
    return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3614
341
  else if (IsSext)
3615
145
    return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3616
196
  return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3617
196
}
3618
3619
} // end anonymous namespace
3620
3621
198k
bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
3622
198k
  if (N->getValueType(0) != MVT::i32 &&
3623
198k
      
N->getValueType(0) != MVT::i64174k
)
3624
118k
    return false;
3625
80.1k
3626
80.1k
  // This optimization will emit code that assumes 64-bit registers
3627
80.1k
  // so we don't want to run it in 32-bit mode. Also don't run it
3628
80.1k
  // on functions that are not to be optimized.
3629
80.1k
  if (TM.getOptLevel() == CodeGenOpt::None || 
!TM.isPPC64()76.3k
)
3630
12.8k
    return false;
3631
67.3k
3632
67.3k
  switch (N->getOpcode()) {
3633
67.3k
  
default: break64.8k
;
3634
67.3k
  case ISD::ZERO_EXTEND:
3635
2.49k
  case ISD::SIGN_EXTEND:
3636
2.49k
  case ISD::AND:
3637
2.49k
  case ISD::OR:
3638
2.49k
  case ISD::XOR: {
3639
2.49k
    IntegerCompareEliminator ICmpElim(CurDAG, this);
3640
2.49k
    if (SDNode *New = ICmpElim.Select(N)) {
3641
1.09k
      ReplaceNode(N, New);
3642
1.09k
      return true;
3643
1.09k
    }
3644
66.2k
  }
3645
66.2k
  }
3646
66.2k
  return false;
3647
66.2k
}
3648
3649
199k
bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
3650
199k
  if (N->getValueType(0) != MVT::i32 &&
3651
199k
      
N->getValueType(0) != MVT::i64175k
)
3652
118k
    return false;
3653
81.1k
3654
81.1k
  if (!UseBitPermRewriter)
3655
0
    return false;
3656
81.1k
3657
81.1k
  switch (N->getOpcode()) {
3658
81.1k
  
default: break79.1k
;
3659
81.1k
  case ISD::ROTL:
3660
1.94k
  case ISD::SHL:
3661
1.94k
  case ISD::SRL:
3662
1.94k
  case ISD::AND:
3663
1.94k
  case ISD::OR: {
3664
1.94k
    BitPermutationSelector BPS(CurDAG);
3665
1.94k
    if (SDNode *New = BPS.Select(N)) {
3666
957
      ReplaceNode(N, New);
3667
957
      return true;
3668
957
    }
3669
990
    return false;
3670
990
  }
3671
79.1k
  }
3672
79.1k
3673
79.1k
  return false;
3674
79.1k
}
3675
3676
/// SelectCC - Select a comparison of the specified values with the specified
3677
/// condition code, returning the CR# of the expression.
3678
SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3679
1.34k
                                  const SDLoc &dl) {
3680
1.34k
  // Always select the LHS.
3681
1.34k
  unsigned Opc;
3682
1.34k
3683
1.34k
  if (LHS.getValueType() == MVT::i32) {
3684
805
    unsigned Imm;
3685
805
    if (CC == ISD::SETEQ || 
CC == ISD::SETNE555
) {
3686
477
      if (isInt32Immediate(RHS, Imm)) {
3687
423
        // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3688
423
        if (isUInt<16>(Imm))
3689
401
          return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3690
401
                                                getI32Imm(Imm & 0xFFFF, dl)),
3691
401
                         0);
3692
22
        // If this is a 16-bit signed immediate, fold it.
3693
22
        if (isInt<16>((int)Imm))
3694
15
          return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3695
15
                                                getI32Imm(Imm & 0xFFFF, dl)),
3696
15
                         0);
3697
7
3698
7
        // For non-equality comparisons, the default code would materialize the
3699
7
        // constant, then compare against it, like this:
3700
7
        //   lis r2, 4660
3701
7
        //   ori r2, r2, 22136
3702
7
        //   cmpw cr0, r3, r2
3703
7
        // Since we are just comparing for equality, we can emit this instead:
3704
7
        //   xoris r0,r3,0x1234
3705
7
        //   cmplwi cr0,r0,0x5678
3706
7
        //   beq cr0,L6
3707
7
        SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
3708
7
                                           getI32Imm(Imm >> 16, dl)), 0);
3709
7
        return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
3710
7
                                              getI32Imm(Imm & 0xFFFF, dl)), 0);
3711
7
      }
3712
54
      Opc = PPC::CMPLW;
3713
328
    } else if (ISD::isUnsignedIntSetCC(CC)) {
3714
84
      if (isInt32Immediate(RHS, Imm) && 
isUInt<16>(Imm)49
)
3715
46
        return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3716
46
                                              getI32Imm(Imm & 0xFFFF, dl)), 0);
3717
38
      Opc = PPC::CMPLW;
3718
244
    } else {
3719
244
      int16_t SImm;
3720
244
      if (isIntS16Immediate(RHS, SImm))
3721
151
        return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3722
151
                                              getI32Imm((int)SImm & 0xFFFF,
3723
151
                                                        dl)),
3724
151
                         0);
3725
93
      Opc = PPC::CMPW;
3726
93
    }
3727
805
  } else 
if (536
LHS.getValueType() == MVT::i64536
) {
3728
387
    uint64_t Imm;
3729
387
    if (CC == ISD::SETEQ || 
CC == ISD::SETNE298
) {
3730
197
      if (isInt64Immediate(RHS.getNode(), Imm)) {
3731
154
        // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3732
154
        if (isUInt<16>(Imm))
3733
142
          return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
3734
142
                                                getI32Imm(Imm & 0xFFFF, dl)),
3735
142
                         0);
3736
12
        // If this is a 16-bit signed immediate, fold it.
3737
12
        if (isInt<16>(Imm))
3738
9
          return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
3739
9
                                                getI32Imm(Imm & 0xFFFF, dl)),
3740
9
                         0);
3741
3
3742
3
        // For non-equality comparisons, the default code would materialize the
3743
3
        // constant, then compare against it, like this:
3744
3
        //   lis r2, 4660
3745
3
        //   ori r2, r2, 22136
3746
3
        //   cmpd cr0, r3, r2
3747
3
        // Since we are just comparing for equality, we can emit this instead:
3748
3
        //   xoris r0,r3,0x1234
3749
3
        //   cmpldi cr0,r0,0x5678
3750
3
        //   beq cr0,L6
3751
3
        if (isUInt<32>(Imm)) {
3752
1
          SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
3753
1
                                             getI64Imm(Imm >> 16, dl)), 0);
3754
1
          return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
3755
1
                                                getI64Imm(Imm & 0xFFFF, dl)),
3756
1
                         0);
3757
1
        }
3758
45
      }
3759
45
      Opc = PPC::CMPLD;
3760
190
    } else if (ISD::isUnsignedIntSetCC(CC)) {
3761
63
      if (isInt64Immediate(RHS.getNode(), Imm) && 
isUInt<16>(Imm)23
)
3762
21
        return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
3763
21
                                              getI64Imm(Imm & 0xFFFF, dl)), 0);
3764
42
      Opc = PPC::CMPLD;
3765
127
    } else {
3766
127
      int16_t SImm;
3767
127
      if (isIntS16Immediate(RHS, SImm))
3768
35
        return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
3769
35
                                              getI64Imm(SImm & 0xFFFF, dl)),
3770
35
                         0);
3771
92
      Opc = PPC::CMPD;
3772
92
    }
3773
387
  } else 
if (149
LHS.getValueType() == MVT::f32149
) {
3774
87
    if (PPCSubTarget->hasSPE()) {
3775
3
      switch (CC) {
3776
3
        default:
3777
3
        case ISD::SETEQ:
3778
3
        case ISD::SETNE:
3779
3
          Opc = PPC::EFSCMPEQ;
3780
3
          break;
3781
3
        case ISD::SETLT:
3782
0
        case ISD::SETGE:
3783
0
        case ISD::SETOLT:
3784
0
        case ISD::SETOGE:
3785
0
        case ISD::SETULT:
3786
0
        case ISD::SETUGE:
3787
0
          Opc = PPC::EFSCMPLT;
3788
0
          break;
3789
0
        case ISD::SETGT:
3790
0
        case ISD::SETLE:
3791
0
        case ISD::SETOGT:
3792
0
        case ISD::SETOLE:
3793
0
        case ISD::SETUGT:
3794
0
        case ISD::SETULE:
3795
0
          Opc = PPC::EFSCMPGT;
3796
0
          break;
3797
84
      }
3798
84
    } else
3799
84
      Opc = PPC::FCMPUS;
3800
87
  } else 
if (62
LHS.getValueType() == MVT::f6462
) {
3801
58
    if (PPCSubTarget->hasSPE()) {
3802
3
      switch (CC) {
3803
3
        default:
3804
3
        case ISD::SETEQ:
3805
3
        case ISD::SETNE:
3806
3
          Opc = PPC::EFDCMPEQ;
3807
3
          break;
3808
3
        case ISD::SETLT:
3809
0
        case ISD::SETGE:
3810
0
        case ISD::SETOLT:
3811
0
        case ISD::SETOGE:
3812
0
        case ISD::SETULT:
3813
0
        case ISD::SETUGE:
3814
0
          Opc = PPC::EFDCMPLT;
3815
0
          break;
3816
0
        case ISD::SETGT:
3817
0
        case ISD::SETLE:
3818
0
        case ISD::SETOGT:
3819
0
        case ISD::SETOLE:
3820
0
        case ISD::SETUGT:
3821
0
        case ISD::SETULE:
3822
0
          Opc = PPC::EFDCMPGT;
3823
0
          break;
3824
55
      }
3825
55
    } else
3826
55
      Opc = PPCSubTarget->hasVSX() ? 
PPC::XSCMPUDP29
:
PPC::FCMPUD26
;
3827
58
  } else {
3828
4
    assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
3829
4
    assert(PPCSubTarget->hasVSX() && "__float128 requires VSX");
3830
4
    Opc = PPC::XSCMPUQP;
3831
4
  }
3832
1.34k
  
return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0)513
;
3833
1.34k
}
3834
3835
1.51k
static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
3836
1.51k
  switch (CC) {
3837
1.51k
  case ISD::SETUEQ:
3838
0
  case ISD::SETONE:
3839
0
  case ISD::SETOLE:
3840
0
  case ISD::SETOGE:
3841
0
    llvm_unreachable("Should be lowered by legalize!");
3842
0
  default: llvm_unreachable("Unknown condition!");
3843
379
  case ISD::SETOEQ:
3844
379
  case ISD::SETEQ:  return PPC::PRED_EQ;
3845
410
  case ISD::SETUNE:
3846
410
  case ISD::SETNE:  return PPC::PRED_NE;
3847
410
  case ISD::SETOLT:
3848
187
  case ISD::SETLT:  return PPC::PRED_LT;
3849
187
  case ISD::SETULE:
3850
63
  case ISD::SETLE:  return PPC::PRED_LE;
3851
233
  case ISD::SETOGT:
3852
233
  case ISD::SETGT:  return PPC::PRED_GT;
3853
233
  case ISD::SETUGE:
3854
97
  case ISD::SETGE:  return PPC::PRED_GE;
3855
97
  
case ISD::SETO: return PPC::PRED_NU4
;
3856
97
  
case ISD::SETUO: return PPC::PRED_UN0
;
3857
97
    // These two are invalid for floating point.  Assume we have int.
3858
97
  
case ISD::SETULT: return PPC::PRED_LT73
;
3859
97
  
case ISD::SETUGT: return PPC::PRED_GT70
;
3860
1.51k
  }
3861
1.51k
}
3862
3863
/// getCRIdxForSetCC - Return the index of the condition register field
3864
/// associated with the SetCC condition, and whether or not the field is
3865
/// treated as inverted.  That is, lt = 0; ge = 0 inverted.
3866
21
static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
3867
21
  Invert = false;
3868
21
  switch (CC) {
3869
21
  
default: 0
llvm_unreachable0
("Unknown condition!");
3870
21
  case ISD::SETOLT:
3871
1
  case ISD::SETLT:  return 0;                  // Bit #0 = SETOLT
3872
1
  case ISD::SETOGT:
3873
1
  case ISD::SETGT:  return 1;                  // Bit #1 = SETOGT
3874
9
  case ISD::SETOEQ:
3875
9
  case ISD::SETEQ:  return 2;                  // Bit #2 = SETOEQ
3876
9
  
case ISD::SETUO: return 33
; // Bit #3 = SETUO
3877
9
  case ISD::SETUGE:
3878
1
  case ISD::SETGE:  Invert = true; return 0;   // !Bit #0 = SETUGE
3879
1
  case ISD::SETULE:
3880
1
  case ISD::SETLE:  Invert = true; return 1;   // !Bit #1 = SETULE
3881
1
  case ISD::SETUNE:
3882
1
  case ISD::SETNE:  Invert = true; return 2;   // !Bit #2 = SETUNE
3883
3
  case ISD::SETO:   Invert = true; return 3;   // !Bit #3 = SETO
3884
1
  case ISD::SETUEQ:
3885
0
  case ISD::SETOGE:
3886
0
  case ISD::SETOLE:
3887
0
  case ISD::SETONE:
3888
0
    llvm_unreachable("Invalid branch code: should be expanded by legalize");
3889
0
  // These are invalid for floating point.  Assume integer.
3890
1
  case ISD::SETULT: return 0;
3891
0
  case ISD::SETUGT: return 1;
3892
21
  }
3893
21
}
3894
3895
// getVCmpInst: return the vector compare instruction for the specified
3896
// vector type and condition code. Since this is for altivec specific code,
3897
// only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
3898
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
3899
396
                                bool HasVSX, bool &Swap, bool &Negate) {
3900
396
  Swap = false;
3901
396
  Negate = false;
3902
396
3903
396
  if (VecVT.isFloatingPoint()) {
3904
49
    /* Handle some cases by swapping input operands.  */
3905
49
    switch (CC) {
3906
49
      
case ISD::SETLE: CC = ISD::SETGE; Swap = true; break0
;
3907
49
      
case ISD::SETLT: CC = ISD::SETGT; Swap = true; break0
;
3908
49
      
case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break1
;
3909
49
      
case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break1
;
3910
49
      
case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break1
;
3911
49
      
case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break1
;
3912
49
      
default: break45
;
3913
49
    }
3914
49
    /* Handle some cases by negating the result.  */
3915
49
    switch (CC) {
3916
49
      
case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break1
;
3917
49
      
case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break11
;
3918
49
      
case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break2
;
3919
49
      
case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break2
;
3920
49
      
default: break33
;
3921
49
    }
3922
49
    /* We have instructions implementing the remaining cases.  */
3923
49
    switch (CC) {
3924
49
      case ISD::SETEQ:
3925
41
      case ISD::SETOEQ:
3926
41
        if (VecVT == MVT::v4f32)
3927
31
          return HasVSX ? 
PPC::XVCMPEQSP23
:
PPC::VCMPEQFP8
;
3928
10
        else if (VecVT == MVT::v2f64)
3929
10
          return PPC::XVCMPEQDP;
3930
0
        break;
3931
4
      case ISD::SETGT:
3932
4
      case ISD::SETOGT:
3933
4
        if (VecVT == MVT::v4f32)
3934
4
          return HasVSX ? 
PPC::XVCMPGTSP0
: PPC::VCMPGTFP;
3935
0
        else if (VecVT == MVT::v2f64)
3936
0
          return PPC::XVCMPGTDP;
3937
0
        break;
3938
4
      case ISD::SETGE:
3939
4
      case ISD::SETOGE:
3940
4
        if (VecVT == MVT::v4f32)
3941
4
          return HasVSX ? 
PPC::XVCMPGESP0
: PPC::VCMPGEFP;
3942
0
        else if (VecVT == MVT::v2f64)
3943
0
          return PPC::XVCMPGEDP;
3944
0
        break;
3945
0
      default:
3946
0
        break;
3947
0
    }
3948
0
    llvm_unreachable("Invalid floating-point vector compare condition");
3949
347
  } else {
3950
347
    /* Handle some cases by swapping input operands.  */
3951
347
    switch (CC) {
3952
347
      
case ISD::SETGE: CC = ISD::SETLE; Swap = true; break3
;
3953
347
      
case ISD::SETLT: CC = ISD::SETGT; Swap = true; break5
;
3954
347
      
case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break9
;
3955
347
      
case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break10
;
3956
347
      
default: break320
;
3957
347
    }
3958
347
    /* Handle some cases by negating the result.  */
3959
347
    switch (CC) {
3960
347
      
case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break11
;
3961
347
      
case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break0
;
3962
347
      
case ISD::SETLE: CC = ISD::SETGT; Negate = true; break6
;
3963
347
      
case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break18
;
3964
347
      
default: break312
;
3965
347
    }
3966
347
    /* We have instructions implementing the remaining cases.  */
3967
347
    switch (CC) {
3968
347
      case ISD::SETEQ:
3969
145
      case ISD::SETUEQ:
3970
145
        if (VecVT == MVT::v16i8)
3971
15
          return PPC::VCMPEQUB;
3972
130
        else if (VecVT == MVT::v8i16)
3973
18
          return PPC::VCMPEQUH;
3974
112
        else if (VecVT == MVT::v4i32)
3975
44
          return PPC::VCMPEQUW;
3976
68
        else if (VecVT == MVT::v2i64)
3977
68
          return PPC::VCMPEQUD;
3978
0
        break;
3979
80
      case ISD::SETGT:
3980
80
        if (VecVT == MVT::v16i8)
3981
4
          return PPC::VCMPGTSB;
3982
76
        else if (VecVT == MVT::v8i16)
3983
4
          return PPC::VCMPGTSH;
3984
72
        else if (VecVT == MVT::v4i32)
3985
10
          return PPC::VCMPGTSW;
3986
62
        else if (VecVT == MVT::v2i64)
3987
62
          return PPC::VCMPGTSD;
3988
0
        break;
3989
122
      case ISD::SETUGT:
3990
122
        if (VecVT == MVT::v16i8)
3991
16
          return PPC::VCMPGTUB;
3992
106
        else if (VecVT == MVT::v8i16)
3993
16
          return PPC::VCMPGTUH;
3994
90
        else if (VecVT == MVT::v4i32)
3995
20
          return PPC::VCMPGTUW;
3996
70
        else if (VecVT == MVT::v2i64)
3997
70
          return PPC::VCMPGTUD;
3998
0
        break;
3999
0
      default:
4000
0
        break;
4001
0
    }
4002
0
    llvm_unreachable("Invalid integer vector compare condition");
4003
0
  }
4004
396
}
4005
4006
1.27k
bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
4007
1.27k
  SDLoc dl(N);
4008
1.27k
  unsigned Imm;
4009
1.27k
  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
4010
1.27k
  EVT PtrVT =
4011
1.27k
      CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
4012
1.27k
  bool isPPC64 = (PtrVT == MVT::i64);
4013
1.27k
4014
1.27k
  if (!PPCSubTarget->useCRBits() &&
4015
1.27k
      
isInt32Immediate(N->getOperand(1), Imm)25
) {
4016
0
    // We can codegen setcc op, imm very efficiently compared to a brcond.
4017
0
    // Check for those cases here.
4018
0
    // setcc op, 0
4019
0
    if (Imm == 0) {
4020
0
      SDValue Op = N->getOperand(0);
4021
0
      switch (CC) {
4022
0
      default: break;
4023
0
      case ISD::SETEQ: {
4024
0
        Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
4025
0
        SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
4026
0
                          getI32Imm(31, dl) };
4027
0
        CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4028
0
        return true;
4029
0
      }
4030
0
      case ISD::SETNE: {
4031
0
        if (isPPC64) break;
4032
0
        SDValue AD =
4033
0
          SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4034
0
                                         Op, getI32Imm(~0U, dl)), 0);
4035
0
        CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
4036
0
        return true;
4037
0
      }
4038
0
      case ISD::SETLT: {
4039
0
        SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4040
0
                          getI32Imm(31, dl) };
4041
0
        CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4042
0
        return true;
4043
0
      }
4044
0
      case ISD::SETGT: {
4045
0
        SDValue T =
4046
0
          SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
4047
0
        T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
4048
0
        SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
4049
0
                          getI32Imm(31, dl) };
4050
0
        CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4051
0
        return true;
4052
0
      }
4053
0
      }
4054
0
    } else if (Imm == ~0U) {        // setcc op, -1
4055
0
      SDValue Op = N->getOperand(0);
4056
0
      switch (CC) {
4057
0
      default: break;
4058
0
      case ISD::SETEQ:
4059
0
        if (isPPC64) break;
4060
0
        Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4061
0
                                            Op, getI32Imm(1, dl)), 0);
4062
0
        CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
4063
0
                             SDValue(CurDAG->getMachineNode(PPC::LI, dl,
4064
0
                                                            MVT::i32,
4065
0
                                                            getI32Imm(0, dl)),
4066
0
                                     0), Op.getValue(1));
4067
0
        return true;
4068
0
      case ISD::SETNE: {
4069
0
        if (isPPC64) break;
4070
0
        Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
4071
0
        SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4072
0
                                            Op, getI32Imm(~0U, dl));
4073
0
        CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
4074
0
                             SDValue(AD, 1));
4075
0
        return true;
4076
0
      }
4077
0
      case ISD::SETLT: {
4078
0
        SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
4079
0
                                                    getI32Imm(1, dl)), 0);
4080
0
        SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
4081
0
                                                    Op), 0);
4082
0
        SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4083
0
                          getI32Imm(31, dl) };
4084
0
        CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4085
0
        return true;
4086
0
      }
4087
0
      case ISD::SETGT: {
4088
0
        SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4089
0
                          getI32Imm(31, dl) };
4090
0
        Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4091
0
        CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
4092
0
        return true;
4093
1.27k
      }
4094
0
      }
4095
0
    }
4096
0
  }
4097
1.27k
4098
1.27k
  SDValue LHS = N->getOperand(0);
4099
1.27k
  SDValue RHS = N->getOperand(1);
4100
1.27k
4101
1.27k
  // Altivec Vector compare instructions do not set any CR register by default and
4102
1.27k
  // vector compare operations return the same type as the operands.
4103
1.27k
  if (LHS.getValueType().isVector()) {
4104
405
    if (PPCSubTarget->hasQPX() || 
PPCSubTarget->hasSPE()396
)
4105
9
      return false;
4106
396
4107
396
    EVT VecVT = LHS.getValueType();
4108
396
    bool Swap, Negate;
4109
396
    unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
4110
396
                                        PPCSubTarget->hasVSX(), Swap, Negate);
4111
396
    if (Swap)
4112
31
      std::swap(LHS, RHS);
4113
396
4114
396
    EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4115
396
    if (Negate) {
4116
51
      SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
4117
51
      CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? 
PPC::XXLNOR31
:
PPC::VNOR20
,
4118
51
                           ResVT, VCmp, VCmp);
4119
51
      return true;
4120
51
    }
4121
345
4122
345
    CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
4123
345
    return true;
4124
345
  }
4125
865
4126
865
  if (PPCSubTarget->useCRBits())
4127
850
    return false;
4128
15
4129
15
  bool Inv;
4130
15
  unsigned Idx = getCRIdxForSetCC(CC, Inv);
4131
15
  SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
4132
15
  SDValue IntCR;
4133
15
4134
15
  // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4135
15
  // The correct compare instruction is already set by SelectCC()
4136
15
  if (PPCSubTarget->hasSPE() && 
LHS.getValueType().isFloatingPoint()0
) {
4137
0
    Idx = 1;
4138
0
  }
4139
15
4140
15
  // Force the ccreg into CR7.
4141
15
  SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
4142
15
4143
15
  SDValue InFlag(nullptr, 0);  // Null incoming flag value.
4144
15
  CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
4145
15
                               InFlag).getValue(1);
4146
15
4147
15
  IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
4148
15
                                         CCReg), 0);
4149
15
4150
15
  SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
4151
15
                      getI32Imm(31, dl), getI32Imm(31, dl) };
4152
15
  if (!Inv) {
4153
9
    CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4154
9
    return true;
4155
9
  }
4156
6
4157
6
  // Get the specified bit.
4158
6
  SDValue Tmp =
4159
6
    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4160
6
  CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
4161
6
  return true;
4162
6
}
4163
4164
/// Does this node represent a load/store node whose address can be represented
4165
/// with a register plus an immediate that's a multiple of \p Val:
4166
5.10k
bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
4167
5.10k
  LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
4168
5.10k
  StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
4169
5.10k
  SDValue AddrOp;
4170
5.10k
  if (LDN)
4171
3.29k
    AddrOp = LDN->getOperand(1);
4172
1.80k
  else if (STN)
4173
1.80k
    AddrOp = STN->getOperand(2);
4174
5.10k
4175
5.10k
  // If the address points a frame object or a frame object with an offset,
4176
5.10k
  // we need to check the object alignment.
4177
5.10k
  short Imm = 0;
4178
5.10k
  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4179
488
          AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
4180
488
                                           AddrOp)) {
4181
488
    // If op0 is a frame index that is under aligned, we can't do it either,
4182
488
    // because it is translated to r31 or r1 + slot + offset. We won't know the
4183
488
    // slot number until the stack frame is finalized.
4184
488
    const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4185
488
    unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex());
4186
488
    if ((SlotAlign % Val) != 0)
4187
10
      return false;
4188
478
4189
478
    // If we have an offset, we need further check on the offset.
4190
478
    if (AddrOp.getOpcode() != ISD::ADD)
4191
463
      return true;
4192
4.62k
  }
4193
4.62k
4194
4.62k
  if (AddrOp.getOpcode() == ISD::ADD)
4195
1.46k
    return isIntS16Immediate(AddrOp.getOperand(1), Imm) && 
!(Imm % Val)1.40k
;
4196
3.16k
4197
3.16k
  // If the address comes from the outside, the offset will be zero.
4198
3.16k
  return AddrOp.getOpcode() == ISD::CopyFromReg;
4199
3.16k
}
4200
4201
1.32k
void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
4202
1.32k
  // Transfer memoperands.
4203
1.32k
  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4204
1.32k
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
4205
1.32k
}
4206
4207
static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
4208
62
                         bool &NeedSwapOps, bool &IsUnCmp) {
4209
62
4210
62
  assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
4211
62
4212
62
  SDValue LHS = N->getOperand(0);
4213
62
  SDValue RHS = N->getOperand(1);
4214
62
  SDValue TrueRes = N->getOperand(2);
4215
62
  SDValue FalseRes = N->getOperand(3);
4216
62
  ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
4217
62
  if (!TrueConst)
4218
10
    return false;
4219
52
4220
52
  assert((N->getSimpleValueType(0) == MVT::i64 ||
4221
52
          N->getSimpleValueType(0) == MVT::i32) &&
4222
52
         "Expecting either i64 or i32 here.");
4223
52
4224
52
  // We are looking for any of:
4225
52
  // (select_cc lhs, rhs,  1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4226
52
  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4227
52
  // (select_cc lhs, rhs,  0, (select_cc [lr]hs, [lr]hs,  1, -1, cc2), seteq)
4228
52
  // (select_cc lhs, rhs,  0, (select_cc [lr]hs, [lr]hs, -1,  1, cc2), seteq)
4229
52
  int64_t TrueResVal = TrueConst->getSExtValue();
4230
52
  if ((TrueResVal < -1 || TrueResVal > 1) ||
4231
52
      (TrueResVal == -1 && 
FalseRes.getOpcode() != ISD::ZERO_EXTEND25
) ||
4232
52
      (TrueResVal == 1 && 
FalseRes.getOpcode() != ISD::SIGN_EXTEND16
) ||
4233
52
      
(51
TrueResVal == 051
&&
4234
51
       
(11
FalseRes.getOpcode() != ISD::SELECT_CC11
||
CC != ISD::SETEQ11
)))
4235
1
    return false;
4236
51
4237
51
  bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC;
4238
51
  SDValue SetOrSelCC = InnerIsSel ? 
FalseRes11
:
FalseRes.getOperand(0)40
;
4239
51
  if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4240
51
      
SetOrSelCC.getOpcode() != ISD::SELECT_CC12
)
4241
1
    return false;
4242
50
4243
50
  // Without this setb optimization, the outer SELECT_CC will be manually
4244
50
  // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4245
50
  // transforms pseudo instruction to isel instruction. When there are more than
4246
50
  // one use for result like zext/sext, with current optimization we only see
4247
50
  // isel is replaced by setb but can't see any significant gain. Since
4248
50
  // setb has longer latency than original isel, we should avoid this. Another
4249
50
  // point is that setb requires comparison always kept, it can break the
4250
50
  // opportunity to get the comparison away if we have in future.
4251
50
  if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && 
!FalseRes.hasOneUse()39
))
4252
0
    return false;
4253
50
4254
50
  SDValue InnerLHS = SetOrSelCC.getOperand(0);
4255
50
  SDValue InnerRHS = SetOrSelCC.getOperand(1);
4256
50
  ISD::CondCode InnerCC =
4257
50
      cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 
411
:
239
))->get();
4258
50
  // If the inner comparison is a select_cc, make sure the true/false values are
4259
50
  // 1/-1 and canonicalize it if needed.
4260
50
  if (InnerIsSel) {
4261
11
    ConstantSDNode *SelCCTrueConst =
4262
11
        dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
4263
11
    ConstantSDNode *SelCCFalseConst =
4264
11
        dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
4265
11
    if (!SelCCTrueConst || !SelCCFalseConst)
4266
0
      return false;
4267
11
    int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4268
11
    int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4269
11
    // The values must be -1/1 (requiring a swap) or 1/-1.
4270
11
    if (SelCCTVal == -1 && 
SelCCFVal == 17
) {
4271
7
      std::swap(InnerLHS, InnerRHS);
4272
7
    } else 
if (4
SelCCTVal != 14
||
SelCCFVal != -14
)
4273
0
      return false;
4274
50
  }
4275
50
4276
50
  // Canonicalize unsigned case
4277
50
  if (InnerCC == ISD::SETULT || 
InnerCC == ISD::SETUGT48
) {
4278
3
    IsUnCmp = true;
4279
3
    InnerCC = (InnerCC == ISD::SETULT) ? 
ISD::SETLT2
:
ISD::SETGT1
;
4280
3
  }
4281
50
4282
50
  bool InnerSwapped = false;
4283
50
  if (LHS == InnerRHS && 
RHS == InnerLHS23
)
4284
23
    InnerSwapped = true;
4285
27
  else if (LHS != InnerLHS || RHS != InnerRHS)
4286
0
    return false;
4287
50
4288
50
  switch (CC) {
4289
50
  // (select_cc lhs, rhs,  0, \
4290
50
  //     (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4291
50
  case ISD::SETEQ:
4292
11
    if (!InnerIsSel)
4293
0
      return false;
4294
11
    if (InnerCC != ISD::SETLT && 
InnerCC != ISD::SETGT7
)
4295
0
      return false;
4296
11
    NeedSwapOps = (InnerCC == ISD::SETGT) ? 
InnerSwapped7
:
!InnerSwapped4
;
4297
11
    break;
4298
11
4299
11
  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4300
11
  // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4301
11
  // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4302
11
  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4303
11
  // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4304
11
  // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4305
11
  case ISD::SETULT:
4306
2
    if (!IsUnCmp && InnerCC != ISD::SETNE)
4307
0
      return false;
4308
2
    IsUnCmp = true;
4309
2
    LLVM_FALLTHROUGH;
4310
20
  case ISD::SETLT:
4311
20
    if (InnerCC == ISD::SETNE || 
(10
InnerCC == ISD::SETGT10
&&
!InnerSwapped5
) ||
4312
20
        
(5
InnerCC == ISD::SETLT5
&&
InnerSwapped4
))
4313
19
      NeedSwapOps = (TrueResVal == 1);
4314
1
    else
4315
1
      return false;
4316
19
    break;
4317
19
4318
19
  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4319
19
  // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4320
19
  // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4321
19
  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4322
19
  // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4323
19
  // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4324
19
  case ISD::SETUGT:
4325
4
    if (!IsUnCmp && 
InnerCC != ISD::SETNE2
)
4326
0
      return false;
4327
4
    IsUnCmp = true;
4328
4
    LLVM_FALLTHROUGH;
4329
19
  case ISD::SETGT:
4330
19
    if (InnerCC == ISD::SETNE || 
(9
InnerCC == ISD::SETLT9
&&
!InnerSwapped6
) ||
4331
19
        
(3
InnerCC == ISD::SETGT3
&&
InnerSwapped3
))
4332
19
      NeedSwapOps = (TrueResVal == -1);
4333
0
    else
4334
0
      return false;
4335
19
    break;
4336
19
4337
19
  default:
4338
0
    return false;
4339
49
  }
4340
49
4341
49
  LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4342
49
  LLVM_DEBUG(N->dump());
4343
49
4344
49
  return true;
4345
49
}
4346
4347
// Select - Convert the specified operand from a target-independent to a
4348
// target-specific node if it hasn't already been changed.
4349
199k
void PPCDAGToDAGISel::Select(SDNode *N) {
4350
199k
  SDLoc dl(N);
4351
199k
  if (N->isMachineOpcode()) {
4352
69
    N->setNodeId(-1);
4353
69
    return;   // Already selected.
4354
69
  }
4355
199k
4356
199k
  // In case any misguided DAG-level optimizations form an ADD with a
4357
199k
  // TargetConstant operand, crash here instead of miscompiling (by selecting
4358
199k
  // an r+r add instead of some kind of r+i add).
4359
199k
  if (N->getOpcode() == ISD::ADD &&
4360
199k
      
N->getOperand(1).getOpcode() == ISD::TargetConstant2.58k
)
4361
199k
    
llvm_unreachable0
("Invalid ADD with TargetConstant operand");
4362
199k
4363
199k
  // Try matching complex bit permutations before doing anything else.
4364
199k
  if (tryBitPermutation(N))
4365
957
    return;
4366
198k
4367
198k
  // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
4368
198k
  if (tryIntCompareInGPR(N))
4369
1.09k
    return;
4370
197k
4371
197k
  switch (N->getOpcode()) {
4372
197k
  
default: break166k
;
4373
197k
4374
197k
  case ISD::Constant:
4375
3.19k
    if (N->getValueType(0) == MVT::i64) {
4376
2.18k
      ReplaceNode(N, selectI64Imm(CurDAG, N));
4377
2.18k
      return;
4378
2.18k
    }
4379
1.00k
    break;
4380
1.00k
4381
1.27k
  case ISD::SETCC:
4382
1.27k
    if (trySETCC(N))
4383
411
      return;
4384
859
    break;
4385
859
  // These nodes will be transformed into GETtlsADDR32 node, which
4386
859
  // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
4387
859
  case PPCISD::ADDI_TLSLD_L_ADDR:
4388
30
  case PPCISD::ADDI_TLSGD_L_ADDR: {
4389
30
    const Module *Mod = MF->getFunction().getParent();
4390
30
    if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
4391
30
        
!PPCSubTarget->isSecurePlt()10
||
!PPCSubTarget->isTargetELF()2
||
4392
30
        
Mod->getPICLevel() == PICLevel::SmallPIC2
)
4393
29
      break;
4394
1
    // Attach global base pointer on GETtlsADDR32 node in order to
4395
1
    // generate secure plt code for TLS symbols.
4396
1
    getGlobalBaseReg();
4397
1
  } break;
4398
467
  case PPCISD::CALL: {
4399
467
    if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
4400
467
        
!TM.isPositionIndependent()382
||
!PPCSubTarget->isSecurePlt()24
||
4401
467
        
!PPCSubTarget->isTargetELF()8
)
4402
459
      break;
4403
8
4404
8
    SDValue Op = N->getOperand(1);
4405
8
4406
8
    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
4407
8
      if (GA->getTargetFlags() == PPCII::MO_PLT)
4408
8
        getGlobalBaseReg();
4409
8
    }
4410
0
    else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
4411
0
      if (ES->getTargetFlags() == PPCII::MO_PLT)
4412
0
        getGlobalBaseReg();
4413
0
    }
4414
8
  }
4415
8
    break;
4416
8
4417
34
  case PPCISD::GlobalBaseReg:
4418
34
    ReplaceNode(N, getGlobalBaseReg());
4419
34
    return;
4420
8
4421
880
  case ISD::FrameIndex:
4422
880
    selectFrameIndex(N, N);
4423
880
    return;
4424
8
4425
8
  case PPCISD::MFOCRF: {
4426
7
    SDValue InFlag = N->getOperand(1);
4427
7
    ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
4428
7
                                          N->getOperand(0), InFlag));
4429
7
    return;
4430
8
  }
4431
8
4432
8
  case PPCISD::READ_TIME_BASE:
4433
2
    ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
4434
2
                                          MVT::Other, N->getOperand(0)));
4435
2
    return;
4436
8
4437
13
  case PPCISD::SRA_ADDZE: {
4438
13
    SDValue N0 = N->getOperand(0);
4439
13
    SDValue ShiftAmt =
4440
13
      CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
4441
13
                                  getConstantIntValue(), dl,
4442
13
                                  N->getValueType(0));
4443
13
    if (N->getValueType(0) == MVT::i64) {
4444
2
      SDNode *Op =
4445
2
        CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
4446
2
                               N0, ShiftAmt);
4447
2
      CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
4448
2
                           SDValue(Op, 1));
4449
2
      return;
4450
11
    } else {
4451
11
      assert(N->getValueType(0) == MVT::i32 &&
4452
11
             "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
4453
11
      SDNode *Op =
4454
11
        CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
4455
11
                               N0, ShiftAmt);
4456
11
      CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
4457
11
                           SDValue(Op, 1));
4458
11
      return;
4459
11
    }
4460
0
  }
4461
0
4462
6.50k
  case ISD::STORE: {
4463
6.50k
    // Change TLS initial-exec D-form stores to X-form stores.
4464
6.50k
    StoreSDNode *ST = cast<StoreSDNode>(N);
4465
6.50k
    if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() &&
4466
6.50k
        
ST->getAddressingMode() != ISD::PRE_INC1.78k
)
4467
1.76k
      if (tryTLSXFormStore(ST))
4468
9
        return;
4469
6.49k
    break;
4470
6.49k
  }
4471
7.42k
  case ISD::LOAD: {
4472
7.42k
    // Handle preincrement loads.
4473
7.42k
    LoadSDNode *LD = cast<LoadSDNode>(N);
4474
7.42k
    EVT LoadedVT = LD->getMemoryVT();
4475
7.42k
4476
7.42k
    // Normal loads are handled by code generated from the .td file.
4477
7.42k
    if (LD->getAddressingMode() != ISD::PRE_INC) {
4478
7.17k
      // Change TLS initial-exec D-form loads to X-form loads.
4479
7.17k
      if (EnableTLSOpt && PPCSubTarget->isELFv2ABI())
4480
2.85k
        if (tryTLSXFormLoad(LD))
4481
9
          return;
4482
7.16k
      break;
4483
7.16k
    }
4484
248
4485
248
    SDValue Offset = LD->getOffset();
4486
248
    if (Offset.getOpcode() == ISD::TargetConstant ||
4487
248
        
Offset.getOpcode() == ISD::TargetGlobalAddress81
) {
4488
195
4489
195
      unsigned Opcode;
4490
195
      bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
4491
195
      if (LD->getValueType(0) != MVT::i64) {
4492
182
        // Handle PPC32 integer and normal FP loads.
4493
182
        assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4494
182
        switch (LoadedVT.getSimpleVT().SimpleTy) {
4495
182
          
default: 0
llvm_unreachable0
("Invalid PPC load type!");
4496
182
          
case MVT::f64: Opcode = PPC::LFDU; break12
;
4497
182
          
case MVT::f32: Opcode = PPC::LFSU; break11
;
4498
182
          
case MVT::i32: Opcode = PPC::LWZU; break101
;
4499
182
          
case MVT::i16: Opcode = isSExt 6
?
PPC::LHAU3
:
PPC::LHZU3
; break;
4500
182
          case MVT::i1:
4501
52
          case MVT::i8:  Opcode = PPC::LBZU; break;
4502
13
        }
4503
13
      } else {
4504
13
        assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
4505
13
        assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4506
13
        switch (LoadedVT.getSimpleVT().SimpleTy) {
4507
13
          
default: 0
llvm_unreachable0
("Invalid PPC load type!");
4508
13
          
case MVT::i64: Opcode = PPC::LDU; break12
;
4509
13
          
case MVT::i32: Opcode = PPC::LWZU8; break0
;
4510
13
          
case MVT::i16: Opcode = isSExt 1
?
PPC::LHAU81
:
PPC::LHZU80
; break;
4511
13
          case MVT::i1:
4512
0
          case MVT::i8:  Opcode = PPC::LBZU8; break;
4513
195
        }
4514
195
      }
4515
195
4516
195
      SDValue Chain = LD->getChain();
4517
195
      SDValue Base = LD->getBasePtr();
4518
195
      SDValue Ops[] = { Offset, Base, Chain };
4519
195
      SDNode *MN = CurDAG->getMachineNode(
4520
195
          Opcode, dl, LD->getValueType(0),
4521
195
          PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
4522
195
      transferMemOperands(N, MN);
4523
195
      ReplaceNode(N, MN);
4524
195
      return;
4525
195
    } else {
4526
53
      unsigned Opcode;
4527
53
      bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
4528
53
      if (LD->getValueType(0) != MVT::i64) {
4529
53
        // Handle PPC32 integer and normal FP loads.
4530
53
        assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4531
53
        switch (LoadedVT.getSimpleVT().SimpleTy) {
4532
53
          
default: 0
llvm_unreachable0
("Invalid PPC load type!");
4533
53
          
case MVT::v4f64: Opcode = PPC::QVLFDUX; break1
; // QPX
4534
53
          
case MVT::v4f32: Opcode = PPC::QVLFSUX; break0
; // QPX
4535
53
          
case MVT::f64: Opcode = PPC::LFDUX; break18
;
4536
53
          
case MVT::f32: Opcode = PPC::LFSUX; break33
;
4537
53
          
case MVT::i32: Opcode = PPC::LWZUX; break0
;
4538
53
          
case MVT::i16: Opcode = isSExt 0
?
PPC::LHAUX0
:
PPC::LHZUX0
; break;
4539
53
          case MVT::i1:
4540
1
          case MVT::i8:  Opcode = PPC::LBZUX; break;
4541
0
        }
4542
0
      } else {
4543
0
        assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
4544
0
        assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
4545
0
               "Invalid sext update load");
4546
0
        switch (LoadedVT.getSimpleVT().SimpleTy) {
4547
0
          default: llvm_unreachable("Invalid PPC load type!");
4548
0
          case MVT::i64: Opcode = PPC::LDUX; break;
4549
0
          case MVT::i32: Opcode = isSExt ? PPC::LWAUX  : PPC::LWZUX8; break;
4550
0
          case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
4551
0
          case MVT::i1:
4552
0
          case MVT::i8:  Opcode = PPC::LBZUX8; break;
4553
53
        }
4554
53
      }
4555
53
4556
53
      SDValue Chain = LD->getChain();
4557
53
      SDValue Base = LD->getBasePtr();
4558
53
      SDValue Ops[] = { Base, Offset, Chain };
4559
53
      SDNode *MN = CurDAG->getMachineNode(
4560
53
          Opcode, dl, LD->getValueType(0),
4561
53
          PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
4562
53
      transferMemOperands(N, MN);
4563
53
      ReplaceNode(N, MN);
4564
53
      return;
4565
53
    }
4566
0
  }
4567
0
4568
838
  case ISD::AND: {
4569
838
    unsigned Imm, Imm2, SH, MB, ME;
4570
838
    uint64_t Imm64;
4571
838
4572
838
    // If this is an and of a value rotated between 0 and 31 bits and then and'd
4573
838
    // with a mask, emit rlwinm
4574
838
    if (isInt32Immediate(N->getOperand(1), Imm) &&
4575
838
        
isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)438
) {
4576
0
      SDValue Val = N->getOperand(0).getOperand(0);
4577
0
      SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4578
0
                        getI32Imm(ME, dl) };
4579
0
      CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4580
0
      return;
4581
0
    }
4582
838
    // If this is just a masked value where the input is not handled above, and
4583
838
    // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4584
838
    if (isInt32Immediate(N->getOperand(1), Imm) &&
4585
838
        
isRunOfOnes(Imm, MB, ME)438
&&
4586
838
        
N->getOperand(0).getOpcode() != ISD::ROTL414
) {
4587
412
      SDValue Val = N->getOperand(0);
4588
412
      SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4589
412
                        getI32Imm(ME, dl) };
4590
412
      CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4591
412
      return;
4592
412
    }
4593
426
    // If this is a 64-bit zero-extension mask, emit rldicl.
4594
426
    if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
4595
426
        
isMask_64(Imm64)201
) {
4596
152
      SDValue Val = N->getOperand(0);
4597
152
      MB = 64 - countTrailingOnes(Imm64);
4598
152
      SH = 0;
4599
152
4600
152
      if (Val.getOpcode() == ISD::ANY_EXTEND) {
4601
73
        auto Op0 = Val.getOperand(0);
4602
73
        if ( Op0.getOpcode() == ISD::SRL &&
4603
73
           
isInt32Immediate(Op0.getOperand(1).getNode(), Imm)1
&&
Imm <= MB1
) {
4604
1
4605
1
           auto ResultType = Val.getNode()->getValueType(0);
4606
1
           auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
4607
1
                                               ResultType);
4608
1
           SDValue IDVal (ImDef, 0);
4609
1
4610
1
           Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
4611
1
                         ResultType, IDVal, Op0.getOperand(0),
4612
1
                         getI32Imm(1, dl)), 0);
4613
1
           SH = 64 - Imm;
4614
1
        }
4615
73
      }
4616
152
4617
152
      // If the operand is a logical right shift, we can fold it into this
4618
152
      // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
4619
152
      // for n <= mb. The right shift is really a left rotate followed by a
4620
152
      // mask, and this mask is a more-restrictive sub-mask of the mask implied
4621
152
      // by the shift.
4622
152
      if (Val.getOpcode() == ISD::SRL &&
4623
152
          
isInt32Immediate(Val.getOperand(1).getNode(), Imm)0
&&
Imm <= MB0
) {
4624
0
        assert(Imm < 64 && "Illegal shift amount");
4625
0
        Val = Val.getOperand(0);
4626
0
        SH = 64 - Imm;
4627
0
      }
4628
152
4629
152
      SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
4630
152
      CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
4631
152
      return;
4632
152
    }
4633
274
    // If this is a negated 64-bit zero-extension mask,
4634
274
    // i.e. the immediate is a sequence of ones from most significant side
4635
274
    // and all zero for reminder, we should use rldicr.
4636
274
    if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
4637
274
        
isMask_64(~Imm64)49
) {
4638
22
      SDValue Val = N->getOperand(0);
4639
22
      MB = 63 - countTrailingOnes(~Imm64);
4640
22
      SH = 0;
4641
22
      SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
4642
22
      CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
4643
22
      return;
4644
22
    }
4645
252
4646
252
    // AND X, 0 -> 0, not "rlwinm 32".
4647
252
    if (isInt32Immediate(N->getOperand(1), Imm) && 
(Imm == 0)26
) {
4648
0
      ReplaceUses(SDValue(N, 0), N->getOperand(1));
4649
0
      return;
4650
0
    }
4651
252
    // ISD::OR doesn't get all the bitfield insertion fun.
4652
252
    // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
4653
252
    // bitfield insert.
4654
252
    if (isInt32Immediate(N->getOperand(1), Imm) &&
4655
252
        
N->getOperand(0).getOpcode() == ISD::OR26
&&
4656
252
        
isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)2
) {
4657
2
      // The idea here is to check whether this is equivalent to:
4658
2
      //   (c1 & m) | (x & ~m)
4659
2
      // where m is a run-of-ones mask. The logic here is that, for each bit in
4660
2
      // c1 and c2:
4661
2
      //  - if both are 1, then the output will be 1.
4662
2
      //  - if both are 0, then the output will be 0.
4663
2
      //  - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
4664
2
      //    come from x.
4665
2
      //  - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
4666
2
      //    be 0.
4667
2
      //  If that last condition is never the case, then we can form m from the
4668
2
      //  bits that are the same between c1 and c2.
4669
2
      unsigned MB, ME;
4670
2
      if (isRunOfOnes(~(Imm^Imm2), MB, ME) && 
!(~Imm & Imm2)1
) {
4671
0
        SDValue Ops[] = { N->getOperand(0).getOperand(0),
4672
0
                            N->getOperand(0).getOperand(1),
4673
0
                            getI32Imm(0, dl), getI32Imm(MB, dl),
4674
0
                            getI32Imm(ME, dl) };
4675
0
        ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
4676
0
        return;
4677
0
      }
4678
252
    }
4679
252
4680
252
    // Other cases are autogenerated.
4681
252
    break;
4682
252
  }
4683
356
  case ISD::OR: {
4684
356
    if (N->getValueType(0) == MVT::i32)
4685
122
      if (tryBitfieldInsert(N))
4686
5
        return;
4687
351
4688
351
    int16_t Imm;
4689
351
    if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
4690
351
        
isIntS16Immediate(N->getOperand(1), Imm)9
) {
4691
9
      KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
4692
9
4693
9
      // If this is equivalent to an add, then we can fold it with the
4694
9
      // FrameIndex calculation.
4695
9
      if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
4696
9
        selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
4697
9
        return;
4698
9
      }
4699
342
    }
4700
342
4701
342
    // OR with a 32-bit immediate can be handled by ori + oris
4702
342
    // without creating an immediate in a GPR.
4703
342
    uint64_t Imm64 = 0;
4704
342
    bool IsPPC64 = PPCSubTarget->isPPC64();
4705
342
    if (IsPPC64 && 
isInt64Immediate(N->getOperand(1), Imm64)225
&&
4706
342
        
(Imm64 & ~0xFFFFFFFFuLL) == 026
) {
4707
15
      // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
4708
15
      uint64_t ImmHi = Imm64 >> 16;
4709
15
      uint64_t ImmLo = Imm64 & 0xFFFF;
4710
15
      if (ImmHi != 0 && 
ImmLo != 07
) {
4711
5
        SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
4712
5
                                            N->getOperand(0),
4713
5
                                            getI16Imm(ImmLo, dl));
4714
5
        SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
4715
5
        CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
4716
5
        return;
4717
5
      }
4718
337
    }
4719
337
4720
337
    // Other cases are autogenerated.
4721
337
    break;
4722
337
  }
4723
348
  case ISD::XOR: {
4724
348
    // XOR with a 32-bit immediate can be handled by xori + xoris
4725
348
    // without creating an immediate in a GPR.
4726
348
    uint64_t Imm64 = 0;
4727
348
    bool IsPPC64 = PPCSubTarget->isPPC64();
4728
348
    if (IsPPC64 && 
isInt64Immediate(N->getOperand(1), Imm64)269
&&
4729
348
        
(Imm64 & ~0xFFFFFFFFuLL) == 0110
) {
4730
13
      // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
4731
13
      uint64_t ImmHi = Imm64 >> 16;
4732
13
      uint64_t ImmLo = Imm64 & 0xFFFF;
4733
13
      if (ImmHi != 0 && 
ImmLo != 06
) {
4734
4
        SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
4735
4
                                            N->getOperand(0),
4736
4
                                            getI16Imm(ImmLo, dl));
4737
4
        SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
4738
4
        CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
4739
4
        return;
4740
4
      }
4741
344
    }
4742
344
4743
344
    break;
4744
344
  }
4745
2.58k
  case ISD::ADD: {
4746
2.58k
    int16_t Imm;
4747
2.58k
    if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
4748
2.58k
        
isIntS16Immediate(N->getOperand(1), Imm)49
) {
4749
47
      selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
4750
47
      return;
4751
47
    }
4752
2.53k
4753
2.53k
    break;
4754
2.53k
  }
4755
2.53k
  case ISD::SHL: {
4756
189
    unsigned Imm, SH, MB, ME;
4757
189
    if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
4758
189
        
isRotateAndMask(N, Imm, true, SH, MB, ME)0
) {
4759
0
      SDValue Ops[] = { N->getOperand(0).getOperand(0),
4760
0
                          getI32Imm(SH, dl), getI32Imm(MB, dl),
4761
0
                          getI32Imm(ME, dl) };
4762
0
      CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4763
0
      return;
4764
0
    }
4765
189
4766
189
    // Other cases are autogenerated.
4767
189
    break;
4768
189
  }
4769
189
  case ISD::SRL: {
4770
36
    unsigned Imm, SH, MB, ME;
4771
36
    if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
4772
36
        
isRotateAndMask(N, Imm, true, SH, MB, ME)4
) {
4773
0
      SDValue Ops[] = { N->getOperand(0).getOperand(0),
4774
0
                          getI32Imm(SH, dl), getI32Imm(MB, dl),
4775
0
                          getI32Imm(ME, dl) };
4776
0
      CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4777
0
      return;
4778
0
    }
4779
36
4780
36
    // Other cases are autogenerated.
4781
36
    break;
4782
36
  }
4783
36
  // FIXME: Remove this once the ANDI glue bug is fixed:
4784
36
  case PPCISD::ANDIo_1_EQ_BIT:
4785
0
  case PPCISD::ANDIo_1_GT_BIT: {
4786
0
    if (!ANDIGlueBug)
4787
0
      break;
4788
0
4789
0
    EVT InVT = N->getOperand(0).getValueType();
4790
0
    assert((InVT == MVT::i64 || InVT == MVT::i32) &&
4791
0
           "Invalid input type for ANDIo_1_EQ_BIT");
4792
0
4793
0
    unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
4794
0
    SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
4795
0
                                        N->getOperand(0),
4796
0
                                        CurDAG->getTargetConstant(1, dl, InVT)),
4797
0
                 0);
4798
0
    SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
4799
0
    SDValue SRIdxVal =
4800
0
      CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
4801
0
                                PPC::sub_eq : PPC::sub_gt, dl, MVT::i32);
4802
0
4803
0
    CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
4804
0
                         SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
4805
0
    return;
4806
0
  }
4807
533
  case ISD::SELECT_CC: {
4808
533
    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
4809
533
    EVT PtrVT =
4810
533
        CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
4811
533
    bool isPPC64 = (PtrVT == MVT::i64);
4812
533
4813
533
    // If this is a select of i1 operands, we'll pattern match it.
4814
533
    if (PPCSubTarget->useCRBits() &&
4815
533
        
N->getOperand(0).getValueType() == MVT::i1490
)
4816
104
      break;
4817
429
4818
429
    if (PPCSubTarget->isISA3_0() && 
PPCSubTarget->isPPC64()62
) {
4819
62
      bool NeedSwapOps = false;
4820
62
      bool IsUnCmp = false;
4821
62
      if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
4822
49
        SDValue LHS = N->getOperand(0);
4823
49
        SDValue RHS = N->getOperand(1);
4824
49
        if (NeedSwapOps)
4825
24
          std::swap(LHS, RHS);
4826
49
4827
49
        // Make use of SelectCC to generate the comparison to set CR bits, for
4828
49
        // equality comparisons having one literal operand, SelectCC probably
4829
49
        // doesn't need to materialize the whole literal and just use xoris to
4830
49
        // check it first, it leads the following comparison result can't
4831
49
        // exactly represent GT/LT relationship. So to avoid this we specify
4832
49
        // SETGT/SETUGT here instead of SETEQ.
4833
49
        SDValue GenCC =
4834
49
            SelectCC(LHS, RHS, IsUnCmp ? 
ISD::SETUGT7
:
ISD::SETGT42
, dl);
4835
49
        CurDAG->SelectNodeTo(
4836
49
            N, N->getSimpleValueType(0) == MVT::i64 ? 
PPC::SETB846
:
PPC::SETB3
,
4837
49
            N->getValueType(0), GenCC);
4838
49
        NumP9Setb++;
4839
49
        return;
4840
49
      }
4841
380
    }
4842
380
4843
380
    // Handle the setcc cases here.  select_cc lhs, 0, 1, 0, cc
4844
380
    if (!isPPC64)
4845
80
      if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
4846
39
        if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
4847
8
          if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
4848
6
            if (N1C->isNullValue() && 
N3C->isNullValue()4
&&
4849
6
                
N2C->getZExtValue() == 1ULL4
&&
CC == ISD::SETNE4
&&
4850
6
                // FIXME: Implement this optzn for PPC64.
4851
6
                
N->getValueType(0) == MVT::i323
) {
4852
3
              SDNode *Tmp =
4853
3
                CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4854
3
                                       N->getOperand(0), getI32Imm(~0U, dl));
4855
3
              CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
4856
3
                                   N->getOperand(0), SDValue(Tmp, 1));
4857
3
              return;
4858
3
            }
4859
377
4860
377
    SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
4861
377
4862
377
    if (N->getValueType(0) == MVT::i1) {
4863
6
      // An i1 select is: (c & t) | (!c & f).
4864
6
      bool Inv;
4865
6
      unsigned Idx = getCRIdxForSetCC(CC, Inv);
4866
6
4867
6
      unsigned SRI;
4868
6
      switch (Idx) {
4869
6
      
default: 0
llvm_unreachable0
("Invalid CC index");
4870
6
      
case 0: SRI = PPC::sub_lt; break0
;
4871
6
      
case 1: SRI = PPC::sub_gt; break0
;
4872
6
      case 2: SRI = PPC::sub_eq; break;
4873
6
      
case 3: SRI = PPC::sub_un; break0
;
4874
6
      }
4875
6
4876
6
      SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
4877
6
4878
6
      SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
4879
6
                                              CCBit, CCBit), 0);
4880
6
      SDValue C =    Inv ? 
NotCCBit0
: CCBit,
4881
6
              NotC = Inv ? 
CCBit0
: NotCCBit;
4882
6
4883
6
      SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
4884
6
                                           C, N->getOperand(2)), 0);
4885
6
      SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
4886
6
                                              NotC, N->getOperand(3)), 0);
4887
6
4888
6
      CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
4889
6
      return;
4890
6
    }
4891
371
4892
371
    unsigned BROpc = getPredicateForSetCC(CC);
4893
371
4894
371
    unsigned SelectCCOp;
4895
371
    if (N->getValueType(0) == MVT::i32)
4896
144
      SelectCCOp = PPC::SELECT_CC_I4;
4897
227
    else if (N->getValueType(0) == MVT::i64)
4898
150
      SelectCCOp = PPC::SELECT_CC_I8;
4899
77
    else if (N->getValueType(0) == MVT::f32) {
4900
36
      if (PPCSubTarget->hasP8Vector())
4901
0
        SelectCCOp = PPC::SELECT_CC_VSSRC;
4902
36
      else if (PPCSubTarget->hasSPE())
4903
0
        SelectCCOp = PPC::SELECT_CC_SPE4;
4904
36
      else
4905
36
        SelectCCOp = PPC::SELECT_CC_F4;
4906
41
    } else if (N->getValueType(0) == MVT::f64) {
4907
38
      if (PPCSubTarget->hasVSX())
4908
19
        SelectCCOp = PPC::SELECT_CC_VSFRC;
4909
19
      else if (PPCSubTarget->hasSPE())
4910
0
        SelectCCOp = PPC::SELECT_CC_SPE;
4911
19
      else
4912
19
        SelectCCOp = PPC::SELECT_CC_F8;
4913
38
    } else 
if (3
N->getValueType(0) == MVT::f1283
)
4914
3
      SelectCCOp = PPC::SELECT_CC_F16;
4915
0
    else if (PPCSubTarget->hasSPE())
4916
0
      SelectCCOp = PPC::SELECT_CC_SPE;
4917
0
    else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
4918
0
      SelectCCOp = PPC::SELECT_CC_QFRC;
4919
0
    else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
4920
0
      SelectCCOp = PPC::SELECT_CC_QSRC;
4921
0
    else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
4922
0
      SelectCCOp = PPC::SELECT_CC_QBRC;
4923
0
    else if (N->getValueType(0) == MVT::v2f64 ||
4924
0
             N->getValueType(0) == MVT::v2i64)
4925
0
      SelectCCOp = PPC::SELECT_CC_VSRC;
4926
0
    else
4927
0
      SelectCCOp = PPC::SELECT_CC_VRRC;
4928
371
4929
371
    SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
4930
371
                        getI32Imm(BROpc, dl) };