Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This implements the TargetLowering class.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "llvm/CodeGen/TargetLowering.h"
14
#include "llvm/ADT/BitVector.h"
15
#include "llvm/ADT/STLExtras.h"
16
#include "llvm/CodeGen/CallingConvLower.h"
17
#include "llvm/CodeGen/MachineFrameInfo.h"
18
#include "llvm/CodeGen/MachineFunction.h"
19
#include "llvm/CodeGen/MachineJumpTableInfo.h"
20
#include "llvm/CodeGen/MachineRegisterInfo.h"
21
#include "llvm/CodeGen/SelectionDAG.h"
22
#include "llvm/CodeGen/TargetRegisterInfo.h"
23
#include "llvm/CodeGen/TargetSubtargetInfo.h"
24
#include "llvm/IR/DataLayout.h"
25
#include "llvm/IR/DerivedTypes.h"
26
#include "llvm/IR/GlobalVariable.h"
27
#include "llvm/IR/LLVMContext.h"
28
#include "llvm/MC/MCAsmInfo.h"
29
#include "llvm/MC/MCExpr.h"
30
#include "llvm/Support/ErrorHandling.h"
31
#include "llvm/Support/KnownBits.h"
32
#include "llvm/Support/MathExtras.h"
33
#include "llvm/Target/TargetLoweringObjectFile.h"
34
#include "llvm/Target/TargetMachine.h"
35
#include <cctype>
36
using namespace llvm;
37
38
/// NOTE: The TargetMachine owns TLOF.
39
TargetLowering::TargetLowering(const TargetMachine &tm)
40
53.2k
  : TargetLoweringBase(tm) {}
41
42
0
const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
43
0
  return nullptr;
44
0
}
45
46
171k
bool TargetLowering::isPositionIndependent() const {
47
171k
  return getTargetMachine().isPositionIndependent();
48
171k
}
49
50
/// Check whether a given call node is in tail position within its function. If
51
/// so, it sets Chain to the input chain of the tail call.
52
bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
53
5.65k
                                          SDValue &Chain) const {
54
5.65k
  const Function &F = DAG.getMachineFunction().getFunction();
55
5.65k
56
5.65k
  // Conservatively require the attributes of the call to match those of
57
5.65k
  // the return. Ignore NoAlias and NonNull because they don't affect the
58
5.65k
  // call sequence.
59
5.65k
  AttributeList CallerAttrs = F.getAttributes();
60
5.65k
  if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
61
5.65k
          .removeAttribute(Attribute::NoAlias)
62
5.65k
          .removeAttribute(Attribute::NonNull)
63
5.65k
          .hasAttributes())
64
58
    return false;
65
5.59k
66
5.59k
  // It's not safe to eliminate the sign / zero extension of the return value.
67
5.59k
  if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
68
5.59k
      CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
69
0
    return false;
70
5.59k
71
5.59k
  // Check if the only use is a function return node.
72
5.59k
  return isUsedByReturnOnly(Node, Chain);
73
5.59k
}
74
75
bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
76
    const uint32_t *CallerPreservedMask,
77
    const SmallVectorImpl<CCValAssign> &ArgLocs,
78
62.3k
    const SmallVectorImpl<SDValue> &OutVals) const {
79
269k
  for (unsigned I = 0, E = ArgLocs.size(); I != E; 
++I206k
) {
80
206k
    const CCValAssign &ArgLoc = ArgLocs[I];
81
206k
    if (!ArgLoc.isRegLoc())
82
1.10k
      continue;
83
205k
    unsigned Reg = ArgLoc.getLocReg();
84
205k
    // Only look at callee saved registers.
85
205k
    if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
86
205k
      continue;
87
16
    // Check that we pass the value used for the caller.
88
16
    // (We look for a CopyFromReg reading a virtual register that is used
89
16
    //  for the function live-in value of register Reg)
90
16
    SDValue Value = OutVals[I];
91
16
    if (Value->getOpcode() != ISD::CopyFromReg)
92
0
      return false;
93
16
    unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
94
16
    if (MRI.getLiveInPhysReg(ArgReg) != Reg)
95
7
      return false;
96
16
  }
97
62.3k
  
return true62.3k
;
98
62.3k
}
99
100
/// Set CallLoweringInfo attribute flags based on a call instruction
101
/// and called function attributes.
102
void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
103
1.00M
                                                     unsigned ArgIdx) {
104
1.00M
  IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
105
1.00M
  IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
106
1.00M
  IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
107
1.00M
  IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
108
1.00M
  IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
109
1.00M
  IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
110
1.00M
  IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
111
1.00M
  IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
112
1.00M
  IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
113
1.00M
  IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
114
1.00M
  Alignment = Call->getParamAlignment(ArgIdx);
115
1.00M
  ByValType = nullptr;
116
1.00M
  if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
117
1.14k
    ByValType = Call->getParamByValType(ArgIdx);
118
1.00M
}
119
120
/// Generate a libcall taking the given operands as arguments and returning a
121
/// result of type RetVT.
122
std::pair<SDValue, SDValue>
123
TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
124
                            ArrayRef<SDValue> Ops, bool isSigned,
125
                            const SDLoc &dl, bool doesNotReturn,
126
                            bool isReturnValueUsed,
127
5.99k
                            bool isPostTypeLegalization) const {
128
5.99k
  TargetLowering::ArgListTy Args;
129
5.99k
  Args.reserve(Ops.size());
130
5.99k
131
5.99k
  TargetLowering::ArgListEntry Entry;
132
9.40k
  for (SDValue Op : Ops) {
133
9.40k
    Entry.Node = Op;
134
9.40k
    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
135
9.40k
    Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
136
9.40k
    Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
137
9.40k
    Args.push_back(Entry);
138
9.40k
  }
139
5.99k
140
5.99k
  if (LC == RTLIB::UNKNOWN_LIBCALL)
141
0
    report_fatal_error("Unsupported library call operation!");
142
5.99k
  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
143
5.99k
                                         getPointerTy(DAG.getDataLayout()));
144
5.99k
145
5.99k
  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
146
5.99k
  TargetLowering::CallLoweringInfo CLI(DAG);
147
5.99k
  bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
148
5.99k
  CLI.setDebugLoc(dl)
149
5.99k
      .setChain(DAG.getEntryNode())
150
5.99k
      .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
151
5.99k
      .setNoReturn(doesNotReturn)
152
5.99k
      .setDiscardResult(!isReturnValueUsed)
153
5.99k
      .setIsPostTypeLegalization(isPostTypeLegalization)
154
5.99k
      .setSExtResult(signExtend)
155
5.99k
      .setZExtResult(!signExtend);
156
5.99k
  return LowerCallTo(CLI);
157
5.99k
}
158
159
bool
160
TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
161
                                         unsigned Limit, uint64_t Size,
162
                                         unsigned DstAlign, unsigned SrcAlign,
163
                                         bool IsMemset,
164
                                         bool ZeroMemset,
165
                                         bool MemcpyStrSrc,
166
                                         bool AllowOverlap,
167
                                         unsigned DstAS, unsigned SrcAS,
168
12.7k
                                         const AttributeList &FuncAttributes) const {
169
12.7k
  // If 'SrcAlign' is zero, that means the memory operation does not need to
170
12.7k
  // load the value, i.e. memset or memcpy from constant string. Otherwise,
171
12.7k
  // it's the inferred alignment of the source. 'DstAlign', on the other hand,
172
12.7k
  // is the specified alignment of the memory operation. If it is zero, that
173
12.7k
  // means it's possible to change the alignment of the destination.
174
12.7k
  // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
175
12.7k
  // not need to be loaded.
176
12.7k
  if (!(SrcAlign == 0 || 
SrcAlign >= DstAlign8.18k
))
177
2
    return false;
178
12.7k
179
12.7k
  EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
180
12.7k
                               IsMemset, ZeroMemset, MemcpyStrSrc,
181
12.7k
                               FuncAttributes);
182
12.7k
183
12.7k
  if (VT == MVT::Other) {
184
669
    // Use the largest integer type whose alignment constraints are satisfied.
185
669
    // We only need to check DstAlign here as SrcAlign is always greater or
186
669
    // equal to DstAlign (or zero).
187
669
    VT = MVT::i64;
188
1.18k
    while (DstAlign && 
DstAlign < VT.getSizeInBits() / 81.12k
&&
189
1.18k
           
!allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)902
)
190
514
      VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
191
669
    assert(VT.isInteger());
192
669
193
669
    // Find the largest legal integer type.
194
669
    MVT LVT = MVT::i64;
195
1.15k
    while (!isTypeLegal(LVT))
196
489
      LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
197
669
    assert(LVT.isInteger());
198
669
199
669
    // If the type we've chosen is larger than the largest legal integer type
200
669
    // then use that instead.
201
669
    if (VT.bitsGT(LVT))
202
78
      VT = LVT;
203
669
  }
204
12.7k
205
12.7k
  unsigned NumMemOps = 0;
206
101k
  while (Size != 0) {
207
92.1k
    unsigned VTSize = VT.getSizeInBits() / 8;
208
97.8k
    while (VTSize > Size) {
209
6.09k
      // For now, only use non-vector load / store's for the left-over pieces.
210
6.09k
      EVT NewVT = VT;
211
6.09k
      unsigned NewVTSize;
212
6.09k
213
6.09k
      bool Found = false;
214
6.09k
      if (VT.isVector() || 
VT.isFloatingPoint()4.98k
) {
215
3.31k
        NewVT = (VT.getSizeInBits() > 64) ? 
MVT::i642.54k
:
MVT::i32765
;
216
3.31k
        if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
217
3.31k
            
isSafeMemOpType(NewVT.getSimpleVT())3.20k
)
218
3.20k
          Found = true;
219
107
        else if (NewVT == MVT::i64 &&
220
107
                 isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
221
107
                 
isSafeMemOpType(MVT::f64)49
) {
222
48
          // i64 is usually not legal on 32-bit targets, but f64 may be.
223
48
          NewVT = MVT::f64;
224
48
          Found = true;
225
48
        }
226
3.31k
      }
227
6.09k
228
6.09k
      if (!Found) {
229
2.83k
        do {
230
2.83k
          NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
231
2.83k
          if (NewVT == MVT::i8)
232
401
            break;
233
2.43k
        } while (!isSafeMemOpType(NewVT.getSimpleVT()));
234
2.83k
      }
235
6.09k
      NewVTSize = NewVT.getSizeInBits() / 8;
236
5.69k
237
5.69k
      // If the new VT cannot cover all of the remaining bits, then consider
238
5.69k
      // issuing a (or a pair of) unaligned and overlapping load / store.
239
5.69k
      bool Fast;
240
5.69k
      if (NumMemOps && 
AllowOverlap4.93k
&&
NewVTSize < Size4.69k
&&
241
5.69k
          allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
242
844
                                         MachineMemOperand::MONone, &Fast) &&
243
5.69k
          
Fast833
)
244
825
        VTSize = Size;
245
4.86k
      else {
246
4.86k
        VT = NewVT;
247
4.86k
        VTSize = NewVTSize;
248
4.86k
      }
249
5.69k
    }
250
92.1k
251
92.1k
    
if (91.7k
++NumMemOps > Limit91.7k
)
252
2.65k
      return false;
253
89.1k
254
89.1k
    MemOps.push_back(VT);
255
89.1k
    Size -= VTSize;
256
89.1k
  }
257
12.7k
258
12.7k
  
return true9.70k
;
259
12.7k
}
260
261
/// Soften the operands of a comparison. This code is shared among BR_CC,
262
/// SELECT_CC, and SETCC handlers.
263
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
264
                                         SDValue &NewLHS, SDValue &NewRHS,
265
                                         ISD::CondCode &CCCode,
266
1.10k
                                         const SDLoc &dl) const {
267
1.10k
  assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
268
1.10k
         && "Unsupported setcc type!");
269
1.10k
270
1.10k
  // Expand into one or more soft-fp libcall(s).
271
1.10k
  RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
272
1.10k
  bool ShouldInvertCC = false;
273
1.10k
  switch (CCCode) {
274
1.10k
  case ISD::SETEQ:
275
293
  case ISD::SETOEQ:
276
293
    LC1 = (VT == MVT::f32) ? 
RTLIB::OEQ_F32124
:
277
293
          
(VT == MVT::f64) 169
?
RTLIB::OEQ_F64157
:
278
169
          
(VT == MVT::f128) 12
?
RTLIB::OEQ_F12811
:
RTLIB::OEQ_PPCF1281
;
279
293
    break;
280
293
  case ISD::SETNE:
281
142
  case ISD::SETUNE:
282
142
    LC1 = (VT == MVT::f32) ? 
RTLIB::UNE_F3264
:
283
142
          
(VT == MVT::f64) 78
?
RTLIB::UNE_F6466
:
284
78
          
(VT == MVT::f128) 12
?
RTLIB::UNE_F12811
:
RTLIB::UNE_PPCF1281
;
285
142
    break;
286
142
  case ISD::SETGE:
287
34
  case ISD::SETOGE:
288
34
    LC1 = (VT == MVT::f32) ? 
RTLIB::OGE_F3211
:
289
34
          
(VT == MVT::f64) 23
?
RTLIB::OGE_F6411
:
290
23
          
(VT == MVT::f128) 12
?
RTLIB::OGE_F12811
:
RTLIB::OGE_PPCF1281
;
291
34
    break;
292
53
  case ISD::SETLT:
293
53
  case ISD::SETOLT:
294
53
    LC1 = (VT == MVT::f32) ? 
RTLIB::OLT_F3219
:
295
53
          
(VT == MVT::f64) 34
?
RTLIB::OLT_F6415
:
296
34
          
(VT == MVT::f128) 19
?
RTLIB::OLT_F12818
:
RTLIB::OLT_PPCF1281
;
297
53
    break;
298
54
  case ISD::SETLE:
299
54
  case ISD::SETOLE:
300
54
    LC1 = (VT == MVT::f32) ? 
RTLIB::OLE_F3221
:
301
54
          
(VT == MVT::f64) 33
?
RTLIB::OLE_F6421
:
302
33
          
(VT == MVT::f128) 12
?
RTLIB::OLE_F12811
:
RTLIB::OLE_PPCF1281
;
303
54
    break;
304
54
  case ISD::SETGT:
305
49
  case ISD::SETOGT:
306
49
    LC1 = (VT == MVT::f32) ? 
RTLIB::OGT_F3212
:
307
49
          
(VT == MVT::f64) 37
?
RTLIB::OGT_F6414
:
308
37
          
(VT == MVT::f128) 23
?
RTLIB::OGT_F12822
:
RTLIB::OGT_PPCF1281
;
309
49
    break;
310
199
  case ISD::SETUO:
311
199
    LC1 = (VT == MVT::f32) ? 
RTLIB::UO_F3286
:
312
199
          
(VT == MVT::f64) 113
?
RTLIB::UO_F64107
:
313
113
          
(VT == MVT::f128) 6
?
RTLIB::UO_F1285
:
RTLIB::UO_PPCF1281
;
314
199
    break;
315
96
  case ISD::SETO:
316
96
    LC1 = (VT == MVT::f32) ? 
RTLIB::O_F3243
:
317
96
          
(VT == MVT::f64) 53
?
RTLIB::O_F6452
:
318
53
          
(VT == MVT::f128) 1
?
RTLIB::O_F1281
:
RTLIB::O_PPCF1280
;
319
96
    break;
320
49
  case ISD::SETONE:
321
5
    // SETONE = SETOLT | SETOGT
322
5
    LC1 = (VT == MVT::f32) ? 
RTLIB::OLT_F322
:
323
5
          
(VT == MVT::f64) 3
?
RTLIB::OLT_F643
:
324
3
          
(VT == MVT::f128) 0
?
RTLIB::OLT_F1280
:
RTLIB::OLT_PPCF1280
;
325
5
    LC2 = (VT == MVT::f32) ? 
RTLIB::OGT_F322
:
326
5
          
(VT == MVT::f64) 3
?
RTLIB::OGT_F643
:
327
3
          
(VT == MVT::f128) 0
?
RTLIB::OGT_F1280
:
RTLIB::OGT_PPCF1280
;
328
5
    break;
329
102
  case ISD::SETUEQ:
330
102
    LC1 = (VT == MVT::f32) ? 
RTLIB::UO_F3247
:
331
102
          
(VT == MVT::f64) 55
?
RTLIB::UO_F6454
:
332
55
          
(VT == MVT::f128) 1
?
RTLIB::UO_F1281
:
RTLIB::UO_PPCF1280
;
333
102
    LC2 = (VT == MVT::f32) ? 
RTLIB::OEQ_F3247
:
334
102
          
(VT == MVT::f64) 55
?
RTLIB::OEQ_F6454
:
335
55
          
(VT == MVT::f128) 1
?
RTLIB::OEQ_F1281
:
RTLIB::OEQ_PPCF1280
;
336
102
    break;
337
82
  default:
338
82
    // Invert CC for unordered comparisons
339
82
    ShouldInvertCC = true;
340
82
    switch (CCCode) {
341
82
    case ISD::SETULT:
342
9
      LC1 = (VT == MVT::f32) ? 
RTLIB::OGE_F326
:
343
9
            
(VT == MVT::f64) 3
?
RTLIB::OGE_F643
:
344
3
            
(VT == MVT::f128) 0
?
RTLIB::OGE_F1280
:
RTLIB::OGE_PPCF1280
;
345
9
      break;
346
82
    case ISD::SETULE:
347
34
      LC1 = (VT == MVT::f32) ? 
RTLIB::OGT_F3214
:
348
34
            
(VT == MVT::f64) 20
?
RTLIB::OGT_F6414
:
349
20
            
(VT == MVT::f128) 6
?
RTLIB::OGT_F1286
:
RTLIB::OGT_PPCF1280
;
350
34
      break;
351
82
    case ISD::SETUGT:
352
10
      LC1 = (VT == MVT::f32) ? 
RTLIB::OLE_F326
:
353
10
            
(VT == MVT::f64) 4
?
RTLIB::OLE_F643
:
354
4
            
(VT == MVT::f128) 1
?
RTLIB::OLE_F1281
:
RTLIB::OLE_PPCF1280
;
355
10
      break;
356
82
    case ISD::SETUGE:
357
29
      LC1 = (VT == MVT::f32) ? 
RTLIB::OLT_F3214
:
358
29
            
(VT == MVT::f64) 15
?
RTLIB::OLT_F6414
:
359
15
            
(VT == MVT::f128) 1
?
RTLIB::OLT_F1281
:
RTLIB::OLT_PPCF1280
;
360
29
      break;
361
82
    
default: 0
llvm_unreachable0
("Do not know how to soften this setcc!");
362
1.10k
    }
363
1.10k
  }
364
1.10k
365
1.10k
  // Use the target specific return value for comparions lib calls.
366
1.10k
  EVT RetVT = getCmpLibcallReturnType();
367
1.10k
  SDValue Ops[2] = {NewLHS, NewRHS};
368
1.10k
  NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/,
369
1.10k
                       dl).first;
370
1.10k
  NewRHS = DAG.getConstant(0, dl, RetVT);
371
1.10k
372
1.10k
  CCCode = getCmpLibcallCC(LC1);
373
1.10k
  if (ShouldInvertCC)
374
82
    CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
375
1.10k
376
1.10k
  if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
377
107
    SDValue Tmp = DAG.getNode(
378
107
        ISD::SETCC, dl,
379
107
        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
380
107
        NewLHS, NewRHS, DAG.getCondCode(CCCode));
381
107
    NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/,
382
107
                         dl).first;
383
107
    NewLHS = DAG.getNode(
384
107
        ISD::SETCC, dl,
385
107
        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
386
107
        NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
387
107
    NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
388
107
    NewRHS = SDValue();
389
107
  }
390
1.10k
}
391
392
/// Return the entry encoding for a jump table in the current function. The
393
/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
394
4.30k
unsigned TargetLowering::getJumpTableEncoding() const {
395
4.30k
  // In non-pic modes, just use the address of a block.
396
4.30k
  if (!isPositionIndependent())
397
169
    return MachineJumpTableInfo::EK_BlockAddress;
398
4.13k
399
4.13k
  // In PIC mode, if the target supports a GPRel32 directive, use it.
400
4.13k
  if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
401
8
    return MachineJumpTableInfo::EK_GPRel32BlockAddress;
402
4.12k
403
4.12k
  // Otherwise, use a label difference.
404
4.12k
  return MachineJumpTableInfo::EK_LabelDifference32;
405
4.12k
}
406
407
SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
408
15
                                                 SelectionDAG &DAG) const {
409
15
  // If our PIC model is GP relative, use the global offset table as the base.
410
15
  unsigned JTEncoding = getJumpTableEncoding();
411
15
412
15
  if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
413
15
      
(JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress)10
)
414
9
    return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
415
6
416
6
  return Table;
417
6
}
418
419
/// This returns the relocation base for the given PIC jumptable, the same as
420
/// getPICJumpTableRelocBase, but as an MCExpr.
421
const MCExpr *
422
TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
423
480
                                             unsigned JTI,MCContext &Ctx) const{
424
480
  // The normal PIC reloc base is the label at the start of the jump table.
425
480
  return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
426
480
}
427
428
bool
429
51.9k
TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
430
51.9k
  const TargetMachine &TM = getTargetMachine();
431
51.9k
  const GlobalValue *GV = GA->getGlobal();
432
51.9k
433
51.9k
  // If the address is not even local to this DSO we will have to load it from
434
51.9k
  // a got and then add the offset.
435
51.9k
  if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
436
28.7k
    return false;
437
23.1k
438
23.1k
  // If the code is position independent we will have to add a base register.
439
23.1k
  if (isPositionIndependent())
440
16.8k
    return false;
441
6.30k
442
6.30k
  // Otherwise we can do it.
443
6.30k
  return true;
444
6.30k
}
445
446
//===----------------------------------------------------------------------===//
447
//  Optimization Methods
448
//===----------------------------------------------------------------------===//
449
450
/// If the specified instruction has a constant integer operand and there are
451
/// bits set in that constant that are not demanded, then clear those bits and
452
/// return true.
453
bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
454
1.58M
                                            TargetLoweringOpt &TLO) const {
455
1.58M
  SDLoc DL(Op);
456
1.58M
  unsigned Opcode = Op.getOpcode();
457
1.58M
458
1.58M
  // Do target-specific constant optimization.
459
1.58M
  if (targetShrinkDemandedConstant(Op, Demanded, TLO))
460
28.2k
    return TLO.New.getNode();
461
1.55M
462
1.55M
  // FIXME: ISD::SELECT, ISD::SELECT_CC
463
1.55M
  switch (Opcode) {
464
1.55M
  default:
465
27.5k
    break;
466
1.55M
  case ISD::XOR:
467
1.52M
  case ISD::AND:
468
1.52M
  case ISD::OR: {
469
1.52M
    auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
470
1.52M
    if (!Op1C)
471
508k
      return false;
472
1.02M
473
1.02M
    // If this is a 'not' op, don't touch it because that's a canonical form.
474
1.02M
    const APInt &C = Op1C->getAPIntValue();
475
1.02M
    if (Opcode == ISD::XOR && 
Demanded.isSubsetOf(C)20.9k
)
476
0
      return false;
477
1.02M
478
1.02M
    if (!C.isSubsetOf(Demanded)) {
479
13.6k
      EVT VT = Op.getValueType();
480
13.6k
      SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT);
481
13.6k
      SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
482
13.6k
      return TLO.CombineTo(Op, NewOp);
483
13.6k
    }
484
1.00M
485
1.00M
    break;
486
1.00M
  }
487
1.03M
  }
488
1.03M
489
1.03M
  return false;
490
1.03M
}
491
492
/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
493
/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
494
/// generalized for targets with other types of implicit widening casts.
495
bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
496
                                      const APInt &Demanded,
497
5.91M
                                      TargetLoweringOpt &TLO) const {
498
5.91M
  assert(Op.getNumOperands() == 2 &&
499
5.91M
         "ShrinkDemandedOp only supports binary operators!");
500
5.91M
  assert(Op.getNode()->getNumValues() == 1 &&
501
5.91M
         "ShrinkDemandedOp only supports nodes with one result!");
502
5.91M
503
5.91M
  SelectionDAG &DAG = TLO.DAG;
504
5.91M
  SDLoc dl(Op);
505
5.91M
506
5.91M
  // Early return, as this function cannot handle vector types.
507
5.91M
  if (Op.getValueType().isVector())
508
466k
    return false;
509
5.45M
510
5.45M
  // Don't do this if the node has another user, which may require the
511
5.45M
  // full value.
512
5.45M
  if (!Op.getNode()->hasOneUse())
513
479k
    return false;
514
4.97M
515
4.97M
  // Search for the smallest integer type with free casts to and from
516
4.97M
  // Op's type. For expedience, just check power-of-2 integer types.
517
4.97M
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
518
4.97M
  unsigned DemandedSize = Demanded.getActiveBits();
519
4.97M
  unsigned SmallVTBits = DemandedSize;
520
4.97M
  if (!isPowerOf2_32(SmallVTBits))
521
101k
    SmallVTBits = NextPowerOf2(SmallVTBits);
522
5.11M
  for (; SmallVTBits < BitWidth; 
SmallVTBits = NextPowerOf2(SmallVTBits)141k
) {
523
149k
    EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
524
149k
    if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
525
149k
        
TLI.isZExtFree(SmallVT, Op.getValueType())68.0k
) {
526
7.45k
      // We found a type with free casts.
527
7.45k
      SDValue X = DAG.getNode(
528
7.45k
          Op.getOpcode(), dl, SmallVT,
529
7.45k
          DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
530
7.45k
          DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
531
7.45k
      assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
532
7.45k
      SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
533
7.45k
      return TLO.CombineTo(Op, Z);
534
7.45k
    }
535
149k
  }
536
4.97M
  
return false4.96M
;
537
4.97M
}
538
539
bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
540
95.9k
                                          DAGCombinerInfo &DCI) const {
541
95.9k
  SelectionDAG &DAG = DCI.DAG;
542
95.9k
  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
543
95.9k
                        !DCI.isBeforeLegalizeOps());
544
95.9k
  KnownBits Known;
545
95.9k
546
95.9k
  bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
547
95.9k
  if (Simplified) {
548
2.13k
    DCI.AddToWorklist(Op.getNode());
549
2.13k
    DCI.CommitTargetLoweringOpt(TLO);
550
2.13k
  }
551
95.9k
  return Simplified;
552
95.9k
}
553
554
bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
555
                                          KnownBits &Known,
556
                                          TargetLoweringOpt &TLO,
557
                                          unsigned Depth,
558
1.34M
                                          bool AssumeSingleUse) const {
559
1.34M
  EVT VT = Op.getValueType();
560
1.34M
  APInt DemandedElts = VT.isVector()
561
1.34M
                           ? 
APInt::getAllOnesValue(VT.getVectorNumElements())346k
562
1.34M
                           : 
APInt(1, 1)997k
;
563
1.34M
  return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
564
1.34M
                              AssumeSingleUse);
565
1.34M
}
566
567
// TODO: Can we merge SelectionDAG::GetDemandedBits into this?
568
// TODO: Under what circumstances can we create nodes? BITCAST? Constant?
569
SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
570
    SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
571
420k
    SelectionDAG &DAG, unsigned Depth) const {
572
420k
  KnownBits LHSKnown, RHSKnown;
573
420k
  switch (Op.getOpcode()) {
574
420k
  case ISD::AND: {
575
14.0k
    LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
576
14.0k
    RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
577
14.0k
578
14.0k
    // If all of the demanded bits are known 1 on one side, return the other.
579
14.0k
    // These bits cannot contribute to the result of the 'and' in this
580
14.0k
    // context.
581
14.0k
    if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
582
141
      return Op.getOperand(0);
583
13.9k
    if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
584
0
      return Op.getOperand(1);
585
13.9k
    break;
586
13.9k
  }
587
13.9k
  case ISD::OR: {
588
9.97k
    LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
589
9.97k
    RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
590
9.97k
591
9.97k
    // If all of the demanded bits are known zero on one side, return the
592
9.97k
    // other.  These bits cannot contribute to the result of the 'or' in this
593
9.97k
    // context.
594
9.97k
    if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
595
38
      return Op.getOperand(0);
596
9.93k
    if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
597
17
      return Op.getOperand(1);
598
9.91k
    break;
599
9.91k
  }
600
9.91k
  case ISD::XOR: {
601
6.49k
    LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
602
6.49k
    RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
603
6.49k
604
6.49k
    // If all of the demanded bits are known zero on one side, return the
605
6.49k
    // other.
606
6.49k
    if (DemandedBits.isSubsetOf(RHSKnown.Zero))
607
1
      return Op.getOperand(0);
608
6.48k
    if (DemandedBits.isSubsetOf(LHSKnown.Zero))
609
0
      return Op.getOperand(1);
610
6.48k
    break;
611
6.48k
  }
612
6.48k
  case ISD::VECTOR_SHUFFLE: {
613
3.36k
    ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
614
3.36k
615
3.36k
    // If all the demanded elts are from one operand and are inline,
616
3.36k
    // then we can use the operand directly.
617
3.36k
    bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
618
21.2k
    for (unsigned i = 0, NumElts = ShuffleMask.size(); i != NumElts; 
++i17.8k
) {
619
17.8k
      int M = ShuffleMask[i];
620
17.8k
      if (M < 0 || 
!DemandedElts[i]17.0k
)
621
6.43k
        continue;
622
11.4k
      AllUndef = false;
623
11.4k
      IdentityLHS &= (M == (int)i);
624
11.4k
      IdentityRHS &= ((M - NumElts) == i);
625
11.4k
    }
626
3.36k
627
3.36k
    if (AllUndef)
628
0
      return DAG.getUNDEF(Op.getValueType());
629
3.36k
    if (IdentityLHS)
630
1.01k
      return Op.getOperand(0);
631
2.34k
    if (IdentityRHS)
632
0
      return Op.getOperand(1);
633
2.34k
    break;
634
2.34k
  }
635
386k
  default:
636
386k
    if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
637
37.8k
      if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
638
0
              Op, DemandedBits, DemandedElts, DAG, Depth))
639
0
        return V;
640
386k
    break;
641
419k
  }
642
419k
  return SDValue();
643
419k
}
644
645
/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
646
/// result of Op are ever used downstream. If we can use this information to
647
/// simplify Op, create a new simplified DAG node and return true, returning the
648
/// original and new nodes in Old and New. Otherwise, analyze the expression and
649
/// return a mask of Known bits for the expression (used to simplify the
650
/// caller).  The Known bits may only be accurate for those bits in the
651
/// OriginalDemandedBits and OriginalDemandedElts.
652
bool TargetLowering::SimplifyDemandedBits(
653
    SDValue Op, const APInt &OriginalDemandedBits,
654
    const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
655
21.1M
    unsigned Depth, bool AssumeSingleUse) const {
656
21.1M
  unsigned BitWidth = OriginalDemandedBits.getBitWidth();
657
21.1M
  assert(Op.getScalarValueSizeInBits() == BitWidth &&
658
21.1M
         "Mask size mismatches value type size!");
659
21.1M
660
21.1M
  unsigned NumElts = OriginalDemandedElts.getBitWidth();
661
21.1M
  assert((!Op.getValueType().isVector() ||
662
21.1M
          NumElts == Op.getValueType().getVectorNumElements()) &&
663
21.1M
         "Unexpected vector size");
664
21.1M
665
21.1M
  APInt DemandedBits = OriginalDemandedBits;
666
21.1M
  APInt DemandedElts = OriginalDemandedElts;
667
21.1M
  SDLoc dl(Op);
668
21.1M
  auto &DL = TLO.DAG.getDataLayout();
669
21.1M
670
21.1M
  // Don't know anything.
671
21.1M
  Known = KnownBits(BitWidth);
672
21.1M
673
21.1M
  // Undef operand.
674
21.1M
  if (Op.isUndef())
675
19.6k
    return false;
676
21.1M
677
21.1M
  if (Op.getOpcode() == ISD::Constant) {
678
3.75M
    // We know all of the bits for a constant!
679
3.75M
    Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
680
3.75M
    Known.Zero = ~Known.One;
681
3.75M
    return false;
682
3.75M
  }
683
17.4M
684
17.4M
  // Other users may use these bits.
685
17.4M
  EVT VT = Op.getValueType();
686
17.4M
  if (!Op.getNode()->hasOneUse() && 
!AssumeSingleUse5.73M
) {
687
5.58M
    if (Depth != 0) {
688
4.78M
      // If not at the root, Just compute the Known bits to
689
4.78M
      // simplify things downstream.
690
4.78M
      Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
691
4.78M
      return false;
692
4.78M
    }
693
800k
    // If this is the root being simplified, allow it to have multiple uses,
694
800k
    // just set the DemandedBits/Elts to all bits.
695
800k
    DemandedBits = APInt::getAllOnesValue(BitWidth);
696
800k
    DemandedElts = APInt::getAllOnesValue(NumElts);
697
11.8M
  } else if (OriginalDemandedBits == 0 || 
OriginalDemandedElts == 011.8M
) {
698
3.80k
    // Not demanding any bits/elts from Op.
699
3.80k
    return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
700
11.8M
  } else if (Depth == 6) { // Limit search depth.
701
121k
    return false;
702
121k
  }
703
12.5M
704
12.5M
  KnownBits Known2, KnownOut;
705
12.5M
  switch (Op.getOpcode()) {
706
12.5M
  case ISD::SCALAR_TO_VECTOR: {
707
33.1k
    if (!DemandedElts[0])
708
0
      return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
709
33.1k
710
33.1k
    KnownBits SrcKnown;
711
33.1k
    SDValue Src = Op.getOperand(0);
712
33.1k
    unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
713
33.1k
    APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
714
33.1k
    if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
715
62
      return true;
716
33.0k
    Known = SrcKnown.zextOrTrunc(BitWidth, false);
717
33.0k
    break;
718
33.0k
  }
719
147k
  case ISD::BUILD_VECTOR:
720
147k
    // Collect the known bits that are shared by every demanded element.
721
147k
    // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
722
147k
    Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
723
147k
    return false; // Don't fall through, will infinitely loop.
724
677k
  case ISD::LOAD: {
725
677k
    LoadSDNode *LD = cast<LoadSDNode>(Op);
726
677k
    if (getTargetConstantFromLoad(LD)) {
727
33.1k
      Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
728
33.1k
      return false; // Don't fall through, will infinitely loop.
729
33.1k
    }
730
644k
    break;
731
644k
  }
732
644k
  case ISD::INSERT_VECTOR_ELT: {
733
3.60k
    SDValue Vec = Op.getOperand(0);
734
3.60k
    SDValue Scl = Op.getOperand(1);
735
3.60k
    auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
736
3.60k
    EVT VecVT = Vec.getValueType();
737
3.60k
738
3.60k
    // If index isn't constant, assume we need all vector elements AND the
739
3.60k
    // inserted element.
740
3.60k
    APInt DemandedVecElts(DemandedElts);
741
3.60k
    if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
742
3.60k
      unsigned Idx = CIdx->getZExtValue();
743
3.60k
      DemandedVecElts.clearBit(Idx);
744
3.60k
745
3.60k
      // Inserted element is not required.
746
3.60k
      if (!DemandedElts[Idx])
747
9
        return TLO.CombineTo(Op, Vec);
748
3.60k
    }
749
3.60k
750
3.60k
    KnownBits KnownScl;
751
3.60k
    unsigned NumSclBits = Scl.getScalarValueSizeInBits();
752
3.60k
    APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
753
3.60k
    if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
754
72
      return true;
755
3.52k
756
3.52k
    Known = KnownScl.zextOrTrunc(BitWidth, false);
757
3.52k
758
3.52k
    KnownBits KnownVec;
759
3.52k
    if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
760
3.52k
                             Depth + 1))
761
32
      return true;
762
3.49k
763
3.49k
    if (!!DemandedVecElts) {
764
2.50k
      Known.One &= KnownVec.One;
765
2.50k
      Known.Zero &= KnownVec.Zero;
766
2.50k
    }
767
3.49k
768
3.49k
    return false;
769
3.49k
  }
770
8.12k
  case ISD::INSERT_SUBVECTOR: {
771
8.12k
    SDValue Base = Op.getOperand(0);
772
8.12k
    SDValue Sub = Op.getOperand(1);
773
8.12k
    EVT SubVT = Sub.getValueType();
774
8.12k
    unsigned NumSubElts = SubVT.getVectorNumElements();
775
8.12k
776
8.12k
    // If index isn't constant, assume we need the original demanded base
777
8.12k
    // elements and ALL the inserted subvector elements.
778
8.12k
    APInt BaseElts = DemandedElts;
779
8.12k
    APInt SubElts = APInt::getAllOnesValue(NumSubElts);
780
8.12k
    if (isa<ConstantSDNode>(Op.getOperand(2))) {
781
8.12k
      const APInt &Idx = Op.getConstantOperandAPInt(2);
782
8.12k
      if (Idx.ule(NumElts - NumSubElts)) {
783
8.12k
        unsigned SubIdx = Idx.getZExtValue();
784
8.12k
        SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
785
8.12k
        BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
786
8.12k
      }
787
8.12k
    }
788
8.12k
789
8.12k
    KnownBits KnownSub, KnownBase;
790
8.12k
    if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO,
791
8.12k
                             Depth + 1))
792
23
      return true;
793
8.10k
    if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO,
794
8.10k
                             Depth + 1))
795
0
      return true;
796
8.10k
797
8.10k
    Known.Zero.setAllBits();
798
8.10k
    Known.One.setAllBits();
799
8.10k
    if (!!SubElts) {
800
8.10k
        Known.One &= KnownSub.One;
801
8.10k
        Known.Zero &= KnownSub.Zero;
802
8.10k
    }
803
8.10k
    if (!!BaseElts) {
804
6.35k
        Known.One &= KnownBase.One;
805
6.35k
        Known.Zero &= KnownBase.Zero;
806
6.35k
    }
807
8.10k
    break;
808
8.10k
  }
809
8.10k
  case ISD::CONCAT_VECTORS: {
810
7.39k
    Known.Zero.setAllBits();
811
7.39k
    Known.One.setAllBits();
812
7.39k
    EVT SubVT = Op.getOperand(0).getValueType();
813
7.39k
    unsigned NumSubVecs = Op.getNumOperands();
814
7.39k
    unsigned NumSubElts = SubVT.getVectorNumElements();
815
25.5k
    for (unsigned i = 0; i != NumSubVecs; 
++i18.1k
) {
816
18.4k
      APInt DemandedSubElts =
817
18.4k
          DemandedElts.extractBits(NumSubElts, i * NumSubElts);
818
18.4k
      if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
819
18.4k
                               Known2, TLO, Depth + 1))
820
317
        return true;
821
18.1k
      // Known bits are shared by every demanded subvector element.
822
18.1k
      if (!!DemandedSubElts) {
823
16.3k
        Known.One &= Known2.One;
824
16.3k
        Known.Zero &= Known2.Zero;
825
16.3k
      }
826
18.1k
    }
827
7.39k
    
break7.07k
;
828
7.39k
  }
829
51.8k
  case ISD::VECTOR_SHUFFLE: {
830
51.8k
    ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
831
51.8k
832
51.8k
    // Collect demanded elements from shuffle operands..
833
51.8k
    APInt DemandedLHS(NumElts, 0);
834
51.8k
    APInt DemandedRHS(NumElts, 0);
835
442k
    for (unsigned i = 0; i != NumElts; 
++i390k
) {
836
403k
      if (!DemandedElts[i])
837
128k
        continue;
838
275k
      int M = ShuffleMask[i];
839
275k
      if (M < 0) {
840
13.1k
        // For UNDEF elements, we don't know anything about the common state of
841
13.1k
        // the shuffle result.
842
13.1k
        DemandedLHS.clearAllBits();
843
13.1k
        DemandedRHS.clearAllBits();
844
13.1k
        break;
845
13.1k
      }
846
262k
      assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
847
262k
      if (M < (int)NumElts)
848
238k
        DemandedLHS.setBit(M);
849
24.0k
      else
850
24.0k
        DemandedRHS.setBit(M - NumElts);
851
262k
    }
852
51.8k
853
51.8k
    if (!!DemandedLHS || 
!!DemandedRHS14.2k
) {
854
38.7k
      Known.Zero.setAllBits();
855
38.7k
      Known.One.setAllBits();
856
38.7k
      if (!!DemandedLHS) {
857
37.6k
        if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits, DemandedLHS,
858
37.6k
                                 Known2, TLO, Depth + 1))
859
181
          return true;
860
37.4k
        Known.One &= Known2.One;
861
37.4k
        Known.Zero &= Known2.Zero;
862
37.4k
      }
863
38.7k
      
if (38.5k
!!DemandedRHS38.5k
) {
864
6.45k
        if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedRHS,
865
6.45k
                                 Known2, TLO, Depth + 1))
866
21
          return true;
867
6.42k
        Known.One &= Known2.One;
868
6.42k
        Known.Zero &= Known2.Zero;
869
6.42k
      }
870
38.5k
    }
871
51.8k
    
break51.6k
;
872
51.8k
  }
873
973k
  case ISD::AND: {
874
973k
    SDValue Op0 = Op.getOperand(0);
875
973k
    SDValue Op1 = Op.getOperand(1);
876
973k
877
973k
    // If the RHS is a constant, check to see if the LHS would be zero without
878
973k
    // using the bits from the RHS.  Below, we use knowledge about the RHS to
879
973k
    // simplify the LHS, here we're using information from the LHS to simplify
880
973k
    // the RHS.
881
973k
    if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
882
788k
      // Do not increment Depth here; that can cause an infinite loop.
883
788k
      KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
884
788k
      // If the LHS already has zeros where RHSC does, this 'and' is dead.
885
788k
      if ((LHSKnown.Zero & DemandedBits) ==
886
788k
          (~RHSC->getAPIntValue() & DemandedBits))
887
280k
        return TLO.CombineTo(Op, Op0);
888
507k
889
507k
      // If any of the set bits in the RHS are known zero on the LHS, shrink
890
507k
      // the constant.
891
507k
      if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
892
13.8k
        return true;
893
493k
894
493k
      // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
895
493k
      // constant, but if this 'and' is only clearing bits that were just set by
896
493k
      // the xor, then this 'and' can be eliminated by shrinking the mask of
897
493k
      // the xor. For example, for a 32-bit X:
898
493k
      // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
899
493k
      if (isBitwiseNot(Op0) && 
Op0.hasOneUse()3.95k
&&
900
493k
          
LHSKnown.One == ~RHSC->getAPIntValue()3.85k
) {
901
20
        SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
902
20
        return TLO.CombineTo(Op, Xor);
903
20
      }
904
678k
    }
905
678k
906
678k
    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
907
678k
                             Depth + 1))
908
1.21k
      return true;
909
677k
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
910
677k
    if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
911
677k
                             Known2, TLO, Depth + 1))
912
11.9k
      return true;
913
665k
    assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
914
665k
915
665k
    // Attempt to avoid multi-use ops if we don't need anything from them.
916
665k
    if (!DemandedBits.isAllOnesValue() || 
!DemandedElts.isAllOnesValue()595k
) {
917
70.0k
      SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
918
70.0k
          Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
919
70.0k
      SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
920
70.0k
          Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
921
70.0k
      if (DemandedOp0 || 
DemandedOp169.5k
) {
922
544
        Op0 = DemandedOp0 ? 
DemandedOp0483
:
Op061
;
923
544
        Op1 = DemandedOp1 ? 
DemandedOp161
:
Op1483
;
924
544
        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
925
544
        return TLO.CombineTo(Op, NewOp);
926
544
      }
927
664k
    }
928
664k
929
664k
    // If all of the demanded bits are known one on one side, return the other.
930
664k
    // These bits cannot contribute to the result of the 'and'.
931
664k
    if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
932
998
      return TLO.CombineTo(Op, Op0);
933
663k
    if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
934
2.97k
      return TLO.CombineTo(Op, Op1);
935
660k
    // If all of the demanded bits in the inputs are known zeros, return zero.
936
660k
    if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
937
2
      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
938
660k
    // If the RHS is a constant, see if we can simplify it.
939
660k
    if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
940
0
      return true;
941
660k
    // If the operation can be done in a smaller type, do so.
942
660k
    if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
943
2.42k
      return true;
944
658k
945
658k
    // Output known-1 bits are only known if set in both the LHS & RHS.
946
658k
    Known.One &= Known2.One;
947
658k
    // Output known-0 are known to be clear if zero in either the LHS | RHS.
948
658k
    Known.Zero |= Known2.Zero;
949
658k
    break;
950
658k
  }
951
658k
  case ISD::OR: {
952
380k
    SDValue Op0 = Op.getOperand(0);
953
380k
    SDValue Op1 = Op.getOperand(1);
954
380k
955
380k
    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
956
380k
                             Depth + 1))
957
7.29k
      return true;
958
372k
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
959
372k
    if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
960
372k
                             Known2, TLO, Depth + 1))
961
7.55k
      return true;
962
365k
    assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
963
365k
964
365k
    // Attempt to avoid multi-use ops if we don't need anything from them.
965
365k
    if (!DemandedBits.isAllOnesValue() || 
!DemandedElts.isAllOnesValue()344k
) {
966
21.6k
      SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
967
21.6k
          Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
968
21.6k
      SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
969
21.6k
          Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
970
21.6k
      if (DemandedOp0 || 
DemandedOp121.6k
) {
971
487
        Op0 = DemandedOp0 ? 
DemandedOp011
:
Op0476
;
972
487
        Op1 = DemandedOp1 ? 
DemandedOp1476
:
Op111
;
973
487
        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
974
487
        return TLO.CombineTo(Op, NewOp);
975
487
      }
976
364k
    }
977
364k
978
364k
    // If all of the demanded bits are known zero on one side, return the other.
979
364k
    // These bits cannot contribute to the result of the 'or'.
980
364k
    if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
981
986
      return TLO.CombineTo(Op, Op0);
982
363k
    if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
983
1.06k
      return TLO.CombineTo(Op, Op1);
984
362k
    // If the RHS is a constant, see if we can simplify it.
985
362k
    if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
986
81
      return true;
987
362k
    // If the operation can be done in a smaller type, do so.
988
362k
    if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
989
263
      return true;
990
362k
991
362k
    // Output known-0 bits are only known if clear in both the LHS & RHS.
992
362k
    Known.Zero &= Known2.Zero;
993
362k
    // Output known-1 are known to be set if set in either the LHS | RHS.
994
362k
    Known.One |= Known2.One;
995
362k
    break;
996
362k
  }
997
362k
  case ISD::XOR: {
998
222k
    SDValue Op0 = Op.getOperand(0);
999
222k
    SDValue Op1 = Op.getOperand(1);
1000
222k
1001
222k
    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1002
222k
                             Depth + 1))
1003
738
      return true;
1004
221k
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1005
221k
    if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1006
221k
                             Depth + 1))
1007
184
      return true;
1008
221k
    assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1009
221k
1010
221k
    // Attempt to avoid multi-use ops if we don't need anything from them.
1011
221k
    if (!DemandedBits.isAllOnesValue() || 
!DemandedElts.isAllOnesValue()195k
) {
1012
26.3k
      SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1013
26.3k
          Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1014
26.3k
      SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1015
26.3k
          Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1016
26.3k
      if (DemandedOp0 || 
DemandedOp126.3k
) {
1017
4
        Op0 = DemandedOp0 ? DemandedOp0 : 
Op00
;
1018
4
        Op1 = DemandedOp1 ? 
DemandedOp13
:
Op11
;
1019
4
        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1020
4
        return TLO.CombineTo(Op, NewOp);
1021
4
      }
1022
221k
    }
1023
221k
1024
221k
    // If all of the demanded bits are known zero on one side, return the other.
1025
221k
    // These bits cannot contribute to the result of the 'xor'.
1026
221k
    if (DemandedBits.isSubsetOf(Known.Zero))
1027
35
      return TLO.CombineTo(Op, Op0);
1028
221k
    if (DemandedBits.isSubsetOf(Known2.Zero))
1029
18
      return TLO.CombineTo(Op, Op1);
1030
221k
    // If the operation can be done in a smaller type, do so.
1031
221k
    if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1032
212
      return true;
1033
221k
1034
221k
    // If all of the unknown bits are known to be zero on one side or the other
1035
221k
    // (but not both) turn this into an *inclusive* or.
1036
221k
    //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1037
221k
    if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1038
124
      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1039
221k
1040
221k
    // Output known-0 bits are known if clear or set in both the LHS & RHS.
1041
221k
    KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
1042
221k
    // Output known-1 are known to be set if set in only one of the LHS, RHS.
1043
221k
    KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
1044
221k
1045
221k
    if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
1046
149k
      // If one side is a constant, and all of the known set bits on the other
1047
149k
      // side are also set in the constant, turn this into an AND, as we know
1048
149k
      // the bits will be cleared.
1049
149k
      //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1050
149k
      // NB: it is okay if more bits are known than are requested
1051
149k
      if (C->getAPIntValue() == Known2.One) {
1052
11
        SDValue ANDC =
1053
11
            TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1054
11
        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1055
11
      }
1056
149k
1057
149k
      // If the RHS is a constant, see if we can change it. Don't alter a -1
1058
149k
      // constant because that's a 'not' op, and that is better for combining
1059
149k
      // and codegen.
1060
149k
      if (!C->isAllOnesValue()) {
1061
27.2k
        if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
1062
154
          // We're flipping all demanded bits. Flip the undemanded bits too.
1063
154
          SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1064
154
          return TLO.CombineTo(Op, New);
1065
154
        }
1066
27.1k
        // If we can't turn this into a 'not', try to shrink the constant.
1067
27.1k
        if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1068
167
          return true;
1069
220k
      }
1070
149k
    }
1071
220k
1072
220k
    Known = std::move(KnownOut);
1073
220k
    break;
1074
220k
  }
1075
220k
  case ISD::SELECT:
1076
16.5k
    if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1077
16.5k
                             Depth + 1))
1078
49
      return true;
1079
16.4k
    if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1080
16.4k
                             Depth + 1))
1081
61
      return true;
1082
16.4k
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1083
16.4k
    assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1084
16.4k
1085
16.4k
    // If the operands are constants, see if we can simplify them.
1086
16.4k
    if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1087
0
      return true;
1088
16.4k
1089
16.4k
    // Only known if known in both the LHS and RHS.
1090
16.4k
    Known.One &= Known2.One;
1091
16.4k
    Known.Zero &= Known2.Zero;
1092
16.4k
    break;
1093
16.4k
  case ISD::SELECT_CC:
1094
11.1k
    if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1095
11.1k
                             Depth + 1))
1096
64
      return true;
1097
11.0k
    if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1098
11.0k
                             Depth + 1))
1099
4
      return true;
1100
11.0k
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1101
11.0k
    assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1102
11.0k
1103
11.0k
    // If the operands are constants, see if we can simplify them.
1104
11.0k
    if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1105
0
      return true;
1106
11.0k
1107
11.0k
    // Only known if known in both the LHS and RHS.
1108
11.0k
    Known.One &= Known2.One;
1109
11.0k
    Known.Zero &= Known2.Zero;
1110
11.0k
    break;
1111
156k
  case ISD::SETCC: {
1112
156k
    SDValue Op0 = Op.getOperand(0);
1113
156k
    SDValue Op1 = Op.getOperand(1);
1114
156k
    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1115
156k
    // If (1) we only need the sign-bit, (2) the setcc operands are the same
1116
156k
    // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1117
156k
    // -1, we may be able to bypass the setcc.
1118
156k
    if (DemandedBits.isSignMask() &&
1119
156k
        
Op0.getScalarValueSizeInBits() == BitWidth77.4k
&&
1120
156k
        getBooleanContents(VT) ==
1121
3.69k
            BooleanContent::ZeroOrNegativeOneBooleanContent) {
1122
3.69k
      // If we're testing X < 0, then this compare isn't needed - just use X!
1123
3.69k
      // FIXME: We're limiting to integer types here, but this should also work
1124
3.69k
      // if we don't care about FP signed-zero. The use of SETLT with FP means
1125
3.69k
      // that we don't care about NaNs.
1126
3.69k
      if (CC == ISD::SETLT && 
Op1.getValueType().isInteger()422
&&
1127
3.69k
          
(420
isNullConstant(Op1)420
||
ISD::isBuildVectorAllZeros(Op1.getNode())420
))
1128
391
        return TLO.CombineTo(Op, Op0);
1129
156k
1130
156k
      // TODO: Should we check for other forms of sign-bit comparisons?
1131
156k
      // Examples: X <= -1, X >= 0
1132
156k
    }
1133
156k
    if (getBooleanContents(Op0.getValueType()) ==
1134
156k
            TargetLowering::ZeroOrOneBooleanContent &&
1135
156k
        
BitWidth > 159.6k
)
1136
23.5k
      Known.Zero.setBitsFrom(1);
1137
156k
    break;
1138
156k
  }
1139
651k
  case ISD::SHL: {
1140
651k
    SDValue Op0 = Op.getOperand(0);
1141
651k
    SDValue Op1 = Op.getOperand(1);
1142
651k
1143
651k
    if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1144
615k
      // If the shift count is an invalid immediate, don't do anything.
1145
615k
      if (SA->getAPIntValue().uge(BitWidth))
1146
0
        break;
1147
615k
1148
615k
      unsigned ShAmt = SA->getZExtValue();
1149
615k
      if (ShAmt == 0)
1150
0
        return TLO.CombineTo(Op, Op0);
1151
615k
1152
615k
      // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1153
615k
      // single shift.  We can do this if the bottom bits (which are shifted
1154
615k
      // out) are never demanded.
1155
615k
      // TODO - support non-uniform vector amounts.
1156
615k
      if (Op0.getOpcode() == ISD::SRL) {
1157
21.2k
        if ((DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
1158
1.19k
          if (ConstantSDNode *SA2 =
1159
1.13k
                  isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1160
1.13k
            if (SA2->getAPIntValue().ult(BitWidth)) {
1161
1.13k
              unsigned C1 = SA2->getZExtValue();
1162
1.13k
              unsigned Opc = ISD::SHL;
1163
1.13k
              int Diff = ShAmt - C1;
1164
1.13k
              if (Diff < 0) {
1165
135
                Diff = -Diff;
1166
135
                Opc = ISD::SRL;
1167
135
              }
1168
1.13k
1169
1.13k
              SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
1170
1.13k
              return TLO.CombineTo(
1171
1.13k
                  Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1172
1.13k
            }
1173
613k
          }
1174
1.19k
        }
1175
21.2k
      }
1176
613k
1177
613k
      if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts,
1178
613k
                               Known, TLO, Depth + 1))
1179
14.7k
        return true;
1180
599k
1181
599k
      // Try shrinking the operation as long as the shift amount will still be
1182
599k
      // in range.
1183
599k
      if ((ShAmt < DemandedBits.getActiveBits()) &&
1184
599k
          
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)598k
)
1185
460
        return true;
1186
598k
1187
598k
      // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1188
598k
      // are not demanded. This will likely allow the anyext to be folded away.
1189
598k
      if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1190
24.3k
        SDValue InnerOp = Op0.getOperand(0);
1191
24.3k
        EVT InnerVT = InnerOp.getValueType();
1192
24.3k
        unsigned InnerBits = InnerVT.getScalarSizeInBits();
1193
24.3k
        if (ShAmt < InnerBits && 
DemandedBits.getActiveBits() <= InnerBits358
&&
1194
24.3k
            
isTypeDesirableForOp(ISD::SHL, InnerVT)65
) {
1195
4
          EVT ShTy = getShiftAmountTy(InnerVT, DL);
1196
4
          if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1197
0
            ShTy = InnerVT;
1198
4
          SDValue NarrowShl =
1199
4
              TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1200
4
                              TLO.DAG.getConstant(ShAmt, dl, ShTy));
1201
4
          return TLO.CombineTo(
1202
4
              Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1203
4
        }
1204
24.3k
        // Repeat the SHL optimization above in cases where an extension
1205
24.3k
        // intervenes: (shl (anyext (shr x, c1)), c2) to
1206
24.3k
        // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1207
24.3k
        // aren't demanded (as above) and that the shifted upper c1 bits of
1208
24.3k
        // x aren't demanded.
1209
24.3k
        if (Op0.hasOneUse() && 
InnerOp.getOpcode() == ISD::SRL23.6k
&&
1210
24.3k
            
InnerOp.hasOneUse()812
) {
1211
810
          if (ConstantSDNode *SA2 =
1212
628
                  isConstOrConstSplat(InnerOp.getOperand(1))) {
1213
628
            unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
1214
628
            if (InnerShAmt < ShAmt && 
InnerShAmt < InnerBits607
&&
1215
628
                DemandedBits.getActiveBits() <=
1216
607
                    (InnerBits - InnerShAmt + ShAmt) &&
1217
628
                
DemandedBits.countTrailingZeros() >= ShAmt21
) {
1218
2
              SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
1219
2
                                                  Op1.getValueType());
1220
2
              SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1221
2
                                               InnerOp.getOperand(0));
1222
2
              return TLO.CombineTo(
1223
2
                  Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1224
2
            }
1225
598k
          }
1226
810
        }
1227
24.3k
      }
1228
598k
1229
598k
      Known.Zero <<= ShAmt;
1230
598k
      Known.One <<= ShAmt;
1231
598k
      // low bits known zero.
1232
598k
      Known.Zero.setLowBits(ShAmt);
1233
598k
    }
1234
651k
    
break635k
;
1235
651k
  }
1236
651k
  case ISD::SRL: {
1237
518k
    SDValue Op0 = Op.getOperand(0);
1238
518k
    SDValue Op1 = Op.getOperand(1);
1239
518k
1240
518k
    if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1241
503k
      // If the shift count is an invalid immediate, don't do anything.
1242
503k
      if (SA->getAPIntValue().uge(BitWidth))
1243
0
        break;
1244
503k
1245
503k
      unsigned ShAmt = SA->getZExtValue();
1246
503k
      if (ShAmt == 0)
1247
0
        return TLO.CombineTo(Op, Op0);
1248
503k
1249
503k
      EVT ShiftVT = Op1.getValueType();
1250
503k
      APInt InDemandedMask = (DemandedBits << ShAmt);
1251
503k
1252
503k
      // If the shift is exact, then it does demand the low bits (and knows that
1253
503k
      // they are zero).
1254
503k
      if (Op->getFlags().hasExact())
1255
5.45k
        InDemandedMask.setLowBits(ShAmt);
1256
503k
1257
503k
      // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1258
503k
      // single shift.  We can do this if the top bits (which are shifted out)
1259
503k
      // are never demanded.
1260
503k
      // TODO - support non-uniform vector amounts.
1261
503k
      if (Op0.getOpcode() == ISD::SHL) {
1262
4.49k
        if (ConstantSDNode *SA2 =
1263
1.83k
                isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1264
1.83k
          if ((DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
1265
1.17k
            if (SA2->getAPIntValue().ult(BitWidth)) {
1266
1.17k
              unsigned C1 = SA2->getZExtValue();
1267
1.17k
              unsigned Opc = ISD::SRL;
1268
1.17k
              int Diff = ShAmt - C1;
1269
1.17k
              if (Diff < 0) {
1270
22
                Diff = -Diff;
1271
22
                Opc = ISD::SHL;
1272
22
              }
1273
1.17k
1274
1.17k
              SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1275
1.17k
              return TLO.CombineTo(
1276
1.17k
                  Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1277
1.17k
            }
1278
502k
          }
1279
1.83k
        }
1280
4.49k
      }
1281
502k
1282
502k
      // Compute the new bits that are at the top now.
1283
502k
      if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1284
502k
                               Depth + 1))
1285
14.6k
        return true;
1286
487k
      assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1287
487k
      Known.Zero.lshrInPlace(ShAmt);
1288
487k
      Known.One.lshrInPlace(ShAmt);
1289
487k
1290
487k
      Known.Zero.setHighBits(ShAmt); // High bits known zero.
1291
487k
    }
1292
518k
    
break503k
;
1293
518k
  }
1294
518k
  case ISD::SRA: {
1295
57.7k
    SDValue Op0 = Op.getOperand(0);
1296
57.7k
    SDValue Op1 = Op.getOperand(1);
1297
57.7k
1298
57.7k
    // If this is an arithmetic shift right and only the low-bit is set, we can
1299
57.7k
    // always convert this into a logical shr, even if the shift amount is
1300
57.7k
    // variable.  The low bit of the shift cannot be an input sign bit unless
1301
57.7k
    // the shift amount is >= the size of the datatype, which is undefined.
1302
57.7k
    if (DemandedBits.isOneValue())
1303
33
      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1304
57.6k
1305
57.6k
    if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1306
55.3k
      // If the shift count is an invalid immediate, don't do anything.
1307
55.3k
      if (SA->getAPIntValue().uge(BitWidth))
1308
0
        break;
1309
55.3k
1310
55.3k
      unsigned ShAmt = SA->getZExtValue();
1311
55.3k
      if (ShAmt == 0)
1312
0
        return TLO.CombineTo(Op, Op0);
1313
55.3k
1314
55.3k
      APInt InDemandedMask = (DemandedBits << ShAmt);
1315
55.3k
1316
55.3k
      // If the shift is exact, then it does demand the low bits (and knows that
1317
55.3k
      // they are zero).
1318
55.3k
      if (Op->getFlags().hasExact())
1319
8.59k
        InDemandedMask.setLowBits(ShAmt);
1320
55.3k
1321
55.3k
      // If any of the demanded bits are produced by the sign extension, we also
1322
55.3k
      // demand the input sign bit.
1323
55.3k
      if (DemandedBits.countLeadingZeros() < ShAmt)
1324
54.5k
        InDemandedMask.setSignBit();
1325
55.3k
1326
55.3k
      if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1327
55.3k
                               Depth + 1))
1328
2.73k
        return true;
1329
52.6k
      assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1330
52.6k
      Known.Zero.lshrInPlace(ShAmt);
1331
52.6k
      Known.One.lshrInPlace(ShAmt);
1332
52.6k
1333
52.6k
      // If the input sign bit is known to be zero, or if none of the top bits
1334
52.6k
      // are demanded, turn this into an unsigned shift right.
1335
52.6k
      if (Known.Zero[BitWidth - ShAmt - 1] ||
1336
52.6k
          
DemandedBits.countLeadingZeros() >= ShAmt52.3k
) {
1337
1.04k
        SDNodeFlags Flags;
1338
1.04k
        Flags.setExact(Op->getFlags().hasExact());
1339
1.04k
        return TLO.CombineTo(
1340
1.04k
            Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1341
1.04k
      }
1342
51.5k
1343
51.5k
      int Log2 = DemandedBits.exactLogBase2();
1344
51.5k
      if (Log2 >= 0) {
1345
180
        // The bit must come from the sign.
1346
180
        SDValue NewSA =
1347
180
            TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType());
1348
180
        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1349
180
      }
1350
51.4k
1351
51.4k
      if (Known.One[BitWidth - ShAmt - 1])
1352
5
        // New bits are known one.
1353
5
        Known.One.setHighBits(ShAmt);
1354
51.4k
    }
1355
57.6k
    
break53.7k
;
1356
57.6k
  }
1357
57.6k
  case ISD::FSHL:
1358
861
  case ISD::FSHR: {
1359
861
    SDValue Op0 = Op.getOperand(0);
1360
861
    SDValue Op1 = Op.getOperand(1);
1361
861
    SDValue Op2 = Op.getOperand(2);
1362
861
    bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1363
861
1364
861
    if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1365
400
      unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1366
400
1367
400
      // For fshl, 0-shift returns the 1st arg.
1368
400
      // For fshr, 0-shift returns the 2nd arg.
1369
400
      if (Amt == 0) {
1370
0
        if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1371
0
                                 Known, TLO, Depth + 1))
1372
0
          return true;
1373
0
        break;
1374
0
      }
1375
400
1376
400
      // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1377
400
      // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1378
400
      APInt Demanded0 = DemandedBits.lshr(IsFSHL ? 
Amt237
:
(BitWidth - Amt)163
);
1379
400
      APInt Demanded1 = DemandedBits << (IsFSHL ? 
(BitWidth - Amt)237
:
Amt163
);
1380
400
      if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1381
400
                               Depth + 1))
1382
8
        return true;
1383
392
      if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1384
392
                               Depth + 1))
1385
4
        return true;
1386
388
1387
388
      Known2.One <<= (IsFSHL ? 
Amt231
:
(BitWidth - Amt)157
);
1388
388
      Known2.Zero <<= (IsFSHL ? 
Amt231
:
(BitWidth - Amt)157
);
1389
388
      Known.One.lshrInPlace(IsFSHL ? 
(BitWidth - Amt)231
:
Amt157
);
1390
388
      Known.Zero.lshrInPlace(IsFSHL ? 
(BitWidth - Amt)231
:
Amt157
);
1391
388
      Known.One |= Known2.One;
1392
388
      Known.Zero |= Known2.Zero;
1393
388
    }
1394
861
    
break849
;
1395
861
  }
1396
861
  case ISD::BITREVERSE: {
1397
106
    SDValue Src = Op.getOperand(0);
1398
106
    APInt DemandedSrcBits = DemandedBits.reverseBits();
1399
106
    if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1400
106
                             Depth + 1))
1401
12
      return true;
1402
94
    Known.One = Known2.One.reverseBits();
1403
94
    Known.Zero = Known2.Zero.reverseBits();
1404
94
    break;
1405
94
  }
1406
85.8k
  case ISD::SIGN_EXTEND_INREG: {
1407
85.8k
    SDValue Op0 = Op.getOperand(0);
1408
85.8k
    EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1409
85.8k
    unsigned ExVTBits = ExVT.getScalarSizeInBits();
1410
85.8k
1411
85.8k
    // If we only care about the highest bit, don't bother shifting right.
1412
85.8k
    if (DemandedBits.isSignMask()) {
1413
379
      unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0);
1414
379
      bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
1415
379
      // However if the input is already sign extended we expect the sign
1416
379
      // extension to be dropped altogether later and do not simplify.
1417
379
      if (!AlreadySignExtended) {
1418
374
        // Compute the correct shift amount type, which must be getShiftAmountTy
1419
374
        // for scalar types after legalization.
1420
374
        EVT ShiftAmtTy = VT;
1421
374
        if (TLO.LegalTypes() && 
!ShiftAmtTy.isVector()264
)
1422
5
          ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
1423
374
1424
374
        SDValue ShiftAmt =
1425
374
            TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
1426
374
        return TLO.CombineTo(Op,
1427
374
                             TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
1428
374
      }
1429
85.4k
    }
1430
85.4k
1431
85.4k
    // If none of the extended bits are demanded, eliminate the sextinreg.
1432
85.4k
    if (DemandedBits.getActiveBits() <= ExVTBits)
1433
400
      return TLO.CombineTo(Op, Op0);
1434
85.0k
1435
85.0k
    APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
1436
85.0k
1437
85.0k
    // Since the sign extended bits are demanded, we know that the sign
1438
85.0k
    // bit is demanded.
1439
85.0k
    InputDemandedBits.setBit(ExVTBits - 1);
1440
85.0k
1441
85.0k
    if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
1442
2.58k
      return true;
1443
82.4k
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1444
82.4k
1445
82.4k
    // If the sign bit of the input is known set or clear, then we know the
1446
82.4k
    // top bits of the result.
1447
82.4k
1448
82.4k
    // If the input sign bit is known zero, convert this into a zero extension.
1449
82.4k
    if (Known.Zero[ExVTBits - 1])
1450
0
      return TLO.CombineTo(
1451
0
          Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType()));
1452
82.4k
1453
82.4k
    APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
1454
82.4k
    if (Known.One[ExVTBits - 1]) { // Input sign bit known set
1455
2
      Known.One.setBitsFrom(ExVTBits);
1456
2
      Known.Zero &= Mask;
1457
82.4k
    } else { // Input sign bit unknown
1458
82.4k
      Known.Zero &= Mask;
1459
82.4k
      Known.One &= Mask;
1460
82.4k
    }
1461
82.4k
    break;
1462
82.4k
  }
1463
82.4k
  case ISD::BUILD_PAIR: {
1464
27.0k
    EVT HalfVT = Op.getOperand(0).getValueType();
1465
27.0k
    unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
1466
27.0k
1467
27.0k
    APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
1468
27.0k
    APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
1469
27.0k
1470
27.0k
    KnownBits KnownLo, KnownHi;
1471
27.0k
1472
27.0k
    if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
1473
322
      return true;
1474
26.6k
1475
26.6k
    if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
1476
2.65k
      return true;
1477
24.0k
1478
24.0k
    Known.Zero = KnownLo.Zero.zext(BitWidth) |
1479
24.0k
                 KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
1480
24.0k
1481
24.0k
    Known.One = KnownLo.One.zext(BitWidth) |
1482
24.0k
                KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
1483
24.0k
    break;
1484
24.0k
  }
1485
121k
  case ISD::ZERO_EXTEND:
1486
121k
  case ISD::ZERO_EXTEND_VECTOR_INREG: {
1487
121k
    SDValue Src = Op.getOperand(0);
1488
121k
    EVT SrcVT = Src.getValueType();
1489
121k
    unsigned InBits = SrcVT.getScalarSizeInBits();
1490
121k
    unsigned InElts = SrcVT.isVector() ? 
SrcVT.getVectorNumElements()14.7k
:
1106k
;
1491
121k
    bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
1492
121k
1493
121k
    // If none of the top bits are demanded, convert this into an any_extend.
1494
121k
    if (DemandedBits.getActiveBits() <= InBits) {
1495
9.84k
      // If we only need the non-extended bits of the bottom element
1496
9.84k
      // then we can just bitcast to the result.
1497
9.84k
      if (IsVecInReg && 
DemandedElts == 12.00k
&&
1498
9.84k
          
VT.getSizeInBits() == SrcVT.getSizeInBits()18
&&
1499
9.84k
          
TLO.DAG.getDataLayout().isLittleEndian()18
)
1500
18
        return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1501
9.82k
1502
9.82k
      unsigned Opc =
1503
9.82k
          IsVecInReg ? 
ISD::ANY_EXTEND_VECTOR_INREG1.98k
:
ISD::ANY_EXTEND7.84k
;
1504
9.82k
      if (!TLO.LegalOperations() || 
isOperationLegal(Opc, VT)3.64k
)
1505
7.83k
        return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1506
113k
    }
1507
113k
1508
113k
    APInt InDemandedBits = DemandedBits.trunc(InBits);
1509
113k
    APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1510
113k
    if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1511
113k
                             Depth + 1))
1512
940
      return true;
1513
112k
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1514
112k
    assert(Known.getBitWidth() == InBits && "Src width has changed?");
1515
112k
    Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
1516
112k
    break;
1517
112k
  }
1518
112k
  case ISD::SIGN_EXTEND:
1519
52.0k
  case ISD::SIGN_EXTEND_VECTOR_INREG: {
1520
52.0k
    SDValue Src = Op.getOperand(0);
1521
52.0k
    EVT SrcVT = Src.getValueType();
1522
52.0k
    unsigned InBits = SrcVT.getScalarSizeInBits();
1523
52.0k
    unsigned InElts = SrcVT.isVector() ? 
SrcVT.getVectorNumElements()9.74k
:
142.3k
;
1524
52.0k
    bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
1525
52.0k
1526
52.0k
    // If none of the top bits are demanded, convert this into an any_extend.
1527
52.0k
    if (DemandedBits.getActiveBits() <= InBits) {
1528
1.39k
      // If we only need the non-extended bits of the bottom element
1529
1.39k
      // then we can just bitcast to the result.
1530
1.39k
      if (IsVecInReg && 
DemandedElts == 130
&&
1531
1.39k
          
VT.getSizeInBits() == SrcVT.getSizeInBits()2
&&
1532
1.39k
          
TLO.DAG.getDataLayout().isLittleEndian()0
)
1533
0
        return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1534
1.39k
1535
1.39k
      unsigned Opc =
1536
1.39k
          IsVecInReg ? 
ISD::ANY_EXTEND_VECTOR_INREG30
:
ISD::ANY_EXTEND1.36k
;
1537
1.39k
      if (!TLO.LegalOperations() || 
isOperationLegal(Opc, VT)924
)
1538
1.00k
        return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1539
51.0k
    }
1540
51.0k
1541
51.0k
    APInt InDemandedBits = DemandedBits.trunc(InBits);
1542
51.0k
    APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1543
51.0k
1544
51.0k
    // Since some of the sign extended bits are demanded, we know that the sign
1545
51.0k
    // bit is demanded.
1546
51.0k
    InDemandedBits.setBit(InBits - 1);
1547
51.0k
1548
51.0k
    if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1549
51.0k
                             Depth + 1))
1550
123
      return true;
1551
50.9k
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1552
50.9k
    assert(Known.getBitWidth() == InBits && "Src width has changed?");
1553
50.9k
1554
50.9k
    // If the sign bit is known one, the top bits match.
1555
50.9k
    Known = Known.sext(BitWidth);
1556
50.9k
1557
50.9k
    // If the sign bit is known zero, convert this to a zero extend.
1558
50.9k
    if (Known.isNonNegative()) {
1559
2.80k
      unsigned Opc =
1560
2.80k
          IsVecInReg ? 
ISD::ZERO_EXTEND_VECTOR_INREG0
: ISD::ZERO_EXTEND;
1561
2.80k
      if (!TLO.LegalOperations() || 
isOperationLegal(Opc, VT)8
)
1562
2.80k
        return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1563
48.1k
    }
1564
48.1k
    break;
1565
48.1k
  }
1566
90.9k
  case ISD::ANY_EXTEND:
1567
90.9k
  case ISD::ANY_EXTEND_VECTOR_INREG: {
1568
90.9k
    SDValue Src = Op.getOperand(0);
1569
90.9k
    EVT SrcVT = Src.getValueType();
1570
90.9k
    unsigned InBits = SrcVT.getScalarSizeInBits();
1571
90.9k
    unsigned InElts = SrcVT.isVector() ? 
SrcVT.getVectorNumElements()3.96k
:
187.0k
;
1572
90.9k
    bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
1573
90.9k
1574
90.9k
    // If we only need the bottom element then we can just bitcast.
1575
90.9k
    // TODO: Handle ANY_EXTEND?
1576
90.9k
    if (IsVecInReg && 
DemandedElts == 1647
&&
1577
90.9k
        
VT.getSizeInBits() == SrcVT.getSizeInBits()2
&&
1578
90.9k
        
TLO.DAG.getDataLayout().isLittleEndian()0
)
1579
0
      return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1580
90.9k
1581
90.9k
    APInt InDemandedBits = DemandedBits.trunc(InBits);
1582
90.9k
    APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1583
90.9k
    if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1584
90.9k
                             Depth + 1))
1585
1.04k
      return true;
1586
89.9k
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1587
89.9k
    assert(Known.getBitWidth() == InBits && "Src width has changed?");
1588
89.9k
    Known = Known.zext(BitWidth, false /* => any extend */);
1589
89.9k
    break;
1590
89.9k
  }
1591
529k
  case ISD::TRUNCATE: {
1592
529k
    SDValue Src = Op.getOperand(0);
1593
529k
1594
529k
    // Simplify the input, using demanded bit information, and compute the known
1595
529k
    // zero/one bits live out.
1596
529k
    unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
1597
529k
    APInt TruncMask = DemandedBits.zext(OperandBitWidth);
1598
529k
    if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
1599
19.9k
      return true;
1600
509k
    Known = Known.trunc(BitWidth);
1601
509k
1602
509k
    // If the input is only used by this truncate, see if we can shrink it based
1603
509k
    // on the known demanded bits.
1604
509k
    if (Src.getNode()->hasOneUse()) {
1605
337k
      switch (Src.getOpcode()) {
1606
337k
      default:
1607
262k
        break;
1608
337k
      case ISD::SRL:
1609
75.1k
        // Shrink SRL by a constant if none of the high bits shifted in are
1610
75.1k
        // demanded.
1611
75.1k
        if (TLO.LegalTypes() && 
!isTypeDesirableForOp(ISD::SRL, VT)61.6k
)
1612
12.0k
          // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
1613
12.0k
          // undesirable.
1614
12.0k
          break;
1615
63.1k
1616
63.1k
        auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1617
63.1k
        if (!ShAmt || 
ShAmt->getAPIntValue().uge(BitWidth)62.0k
)
1618
56.9k
          break;
1619
6.21k
1620
6.21k
        SDValue Shift = Src.getOperand(1);
1621
6.21k
        uint64_t ShVal = ShAmt->getZExtValue();
1622
6.21k
1623
6.21k
        if (TLO.LegalTypes())
1624
3.73k
          Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
1625
6.21k
1626
6.21k
        APInt HighBits =
1627
6.21k
            APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
1628
6.21k
        HighBits.lshrInPlace(ShVal);
1629
6.21k
        HighBits = HighBits.trunc(BitWidth);
1630
6.21k
1631
6.21k
        if (!(HighBits & DemandedBits)) {
1632
1.90k
          // None of the shifted in bits are needed.  Add a truncate of the
1633
1.90k
          // shift input, then shift it.
1634
1.90k
          SDValue NewTrunc =
1635
1.90k
              TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
1636
1.90k
          return TLO.CombineTo(
1637
1.90k
              Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
1638
1.90k
        }
1639
4.30k
        break;
1640
337k
      }
1641
337k
    }
1642
507k
1643
507k
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1644
507k
    break;
1645
507k
  }
1646
507k
  case ISD::AssertZext: {
1647
201k
    // AssertZext demands all of the high bits, plus any of the low bits
1648
201k
    // demanded by its users.
1649
201k
    EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1650
201k
    APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
1651
201k
    if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
1652
201k
                             TLO, Depth + 1))
1653
0
      return true;
1654
201k
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1655
201k
1656
201k
    Known.Zero |= ~InMask;
1657
201k
    break;
1658
201k
  }
1659
201k
  case ISD::EXTRACT_VECTOR_ELT: {
1660
128k
    SDValue Src = Op.getOperand(0);
1661
128k
    SDValue Idx = Op.getOperand(1);
1662
128k
    unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1663
128k
    unsigned EltBitWidth = Src.getScalarValueSizeInBits();
1664
128k
1665
128k
    // Demand the bits from every vector element without a constant index.
1666
128k
    APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
1667
128k
    if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
1668
128k
      if (CIdx->getAPIntValue().ult(NumSrcElts))
1669
128k
        DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
1670
128k
1671
128k
    // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
1672
128k
    // anything about the extended bits.
1673
128k
    APInt DemandedSrcBits = DemandedBits;
1674
128k
    if (BitWidth > EltBitWidth)
1675
12.8k
      DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
1676
128k
1677
128k
    if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
1678
128k
                             Depth + 1))
1679
1.41k
      return true;
1680
127k
1681
127k
    Known = Known2;
1682
127k
    if (BitWidth > EltBitWidth)
1683
12.8k
      Known = Known.zext(BitWidth, false /* => any extend */);
1684
127k
    break;
1685
127k
  }
1686
220k
  case ISD::BITCAST: {
1687
220k
    SDValue Src = Op.getOperand(0);
1688
220k
    EVT SrcVT = Src.getValueType();
1689
220k
    unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
1690
220k
1691
220k
    // If this is an FP->Int bitcast and if the sign bit is the only
1692
220k
    // thing demanded, turn this into a FGETSIGN.
1693
220k
    if (!TLO.LegalOperations() && 
!VT.isVector()23.2k
&&
!SrcVT.isVector()5.22k
&&
1694
220k
        
DemandedBits == APInt::getSignMask(Op.getValueSizeInBits())1.28k
&&
1695
220k
        
SrcVT.isFloatingPoint()38
) {
1696
38
      bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
1697
38
      bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
1698
38
      if ((OpVTLegal || 
i32Legal32
) &&
VT.isSimple()10
&&
SrcVT != MVT::f168
&&
1699
38
          
SrcVT != MVT::f1286
) {
1700
6
        // Cannot eliminate/lower SHL for f128 yet.
1701
6
        EVT Ty = OpVTLegal ? VT : 
MVT::i320
;
1702
6
        // Make a FGETSIGN + SHL to move the sign bit into the appropriate
1703
6
        // place.  We expect the SHL to be eliminated by other optimizations.
1704
6
        SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
1705
6
        unsigned OpVTSizeInBits = Op.getValueSizeInBits();
1706
6
        if (!OpVTLegal && 
OpVTSizeInBits > 320
)
1707
0
          Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
1708
6
        unsigned ShVal = Op.getValueSizeInBits() - 1;
1709
6
        SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
1710
6
        return TLO.CombineTo(Op,
1711
6
                             TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
1712
6
      }
1713
220k
    }
1714
220k
1715
220k
    // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
1716
220k
    // Demand the elt/bit if any of the original elts/bits are demanded.
1717
220k
    // TODO - bigendian once we have test coverage.
1718
220k
    // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support.
1719
220k
    if (SrcVT.isVector() && 
NumSrcEltBits > 1207k
&&
1720
220k
        
(BitWidth % NumSrcEltBits) == 0202k
&&
1721
220k
        
TLO.DAG.getDataLayout().isLittleEndian()139k
) {
1722
138k
      unsigned Scale = BitWidth / NumSrcEltBits;
1723
138k
      unsigned NumSrcElts = SrcVT.getVectorNumElements();
1724
138k
      APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
1725
138k
      APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
1726
431k
      for (unsigned i = 0; i != Scale; 
++i293k
) {
1727
293k
        unsigned Offset = i * NumSrcEltBits;
1728
293k
        APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
1729
293k
        if (!Sub.isNullValue()) {
1730
213k
          DemandedSrcBits |= Sub;
1731
1.12M
          for (unsigned j = 0; j != NumElts; 
++j908k
)
1732
908k
            if (DemandedElts[j])
1733
855k
              DemandedSrcElts.setBit((j * Scale) + i);
1734
213k
        }
1735
293k
      }
1736
138k
1737
138k
      APInt KnownSrcUndef, KnownSrcZero;
1738
138k
      if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
1739
138k
                                     KnownSrcZero, TLO, Depth + 1))
1740
4.45k
        return true;
1741
134k
1742
134k
      KnownBits KnownSrcBits;
1743
134k
      if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
1744
134k
                               KnownSrcBits, TLO, Depth + 1))
1745
1.30k
        return true;
1746
82.0k
    } else if ((NumSrcEltBits % BitWidth) == 0 &&
1747
82.0k
               
TLO.DAG.getDataLayout().isLittleEndian()75.5k
) {
1748
74.8k
      unsigned Scale = NumSrcEltBits / BitWidth;
1749
74.8k
      unsigned NumSrcElts = SrcVT.isVector() ? 
SrcVT.getVectorNumElements()62.1k
:
112.7k
;
1750
74.8k
      APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
1751
74.8k
      APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
1752
1.01M
      for (unsigned i = 0; i != NumElts; 
++i935k
)
1753
935k
        if (DemandedElts[i]) {
1754
586k
          unsigned Offset = (i % Scale) * BitWidth;
1755
586k
          DemandedSrcBits.insertBits(DemandedBits, Offset);
1756
586k
          DemandedSrcElts.setBit(i / Scale);
1757
586k
        }
1758
74.8k
1759
74.8k
      if (SrcVT.isVector()) {
1760
62.1k
        APInt KnownSrcUndef, KnownSrcZero;
1761
62.1k
        if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
1762
62.1k
                                       KnownSrcZero, TLO, Depth + 1))
1763
1.10k
          return true;
1764
73.7k
      }
1765
73.7k
1766
73.7k
      KnownBits KnownSrcBits;
1767
73.7k
      if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
1768
73.7k
                               KnownSrcBits, TLO, Depth + 1))
1769
239
        return true;
1770
213k
    }
1771
213k
1772
213k
    // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
1773
213k
    // recursive call where Known may be useful to the caller.
1774
213k
    if (Depth > 0) {
1775
211k
      Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1776
211k
      return false;
1777
211k
    }
1778
2.62k
    break;
1779
2.62k
  }
1780
4.09M
  case ISD::ADD:
1781
4.09M
  case ISD::MUL:
1782
4.09M
  case ISD::SUB: {
1783
4.09M
    // Add, Sub, and Mul don't demand any bits in positions beyond that
1784
4.09M
    // of the highest bit demanded of them.
1785
4.09M
    SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
1786
4.09M
    SDNodeFlags Flags = Op.getNode()->getFlags();
1787
4.09M
    unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
1788
4.09M
    APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
1789
4.09M
    if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
1790
4.09M
                             Depth + 1) ||
1791
4.09M
        SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
1792
4.08M
                             Depth + 1) ||
1793
4.09M
        // See if the operation should be performed at a smaller bit width.
1794
4.09M
        
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)4.07M
) {
1795
24.5k
      if (Flags.hasNoSignedWrap() || 
Flags.hasNoUnsignedWrap()22.4k
) {
1796
2.35k
        // Disable the nsw and nuw flags. We can no longer guarantee that we
1797
2.35k
        // won't wrap after simplification.
1798
2.35k
        Flags.setNoSignedWrap(false);
1799
2.35k
        Flags.setNoUnsignedWrap(false);
1800
2.35k
        SDValue NewOp =
1801
2.35k
            TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
1802
2.35k
        return TLO.CombineTo(Op, NewOp);
1803
2.35k
      }
1804
22.1k
      return true;
1805
22.1k
    }
1806
4.07M
1807
4.07M
    // Attempt to avoid multi-use ops if we don't need anything from them.
1808
4.07M
    if (!LoMask.isAllOnesValue() || 
!DemandedElts.isAllOnesValue()3.98M
) {
1809
92.1k
      SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1810
92.1k
          Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
1811
92.1k
      SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1812
92.1k
          Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
1813
92.1k
      if (DemandedOp0 || 
DemandedOp192.0k
) {
1814
143
        Flags.setNoSignedWrap(false);
1815
143
        Flags.setNoUnsignedWrap(false);
1816
143
        Op0 = DemandedOp0 ? 
DemandedOp087
:
Op056
;
1817
143
        Op1 = DemandedOp1 ? 
DemandedOp189
:
Op154
;
1818
143
        SDValue NewOp =
1819
143
            TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
1820
143
        return TLO.CombineTo(Op, NewOp);
1821
143
      }
1822
4.07M
    }
1823
4.07M
1824
4.07M
    // If we have a constant operand, we may be able to turn it into -1 if we
1825
4.07M
    // do not demand the high bits. This can make the constant smaller to
1826
4.07M
    // encode, allow more general folding, or match specialized instruction
1827
4.07M
    // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
1828
4.07M
    // is probably not useful (and could be detrimental).
1829
4.07M
    ConstantSDNode *C = isConstOrConstSplat(Op1);
1830
4.07M
    APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
1831
4.07M
    if (C && 
!C->isAllOnesValue()3.02M
&&
!C->isOne()2.94M
&&
1832
4.07M
        
(C->getAPIntValue() | HighMask).isAllOnesValue()2.78M
) {
1833
63
      SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
1834
63
      // We can't guarantee that the new math op doesn't wrap, so explicitly
1835
63
      // clear those flags to prevent folding with a potential existing node
1836
63
      // that has those flags set.
1837
63
      SDNodeFlags Flags;
1838
63
      Flags.setNoSignedWrap(false);
1839
63
      Flags.setNoUnsignedWrap(false);
1840
63
      SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
1841
63
      return TLO.CombineTo(Op, NewOp);
1842
63
    }
1843
4.07M
1844
4.07M
    LLVM_FALLTHROUGH;
1845
4.07M
  }
1846
7.10M
  default:
1847
7.10M
    if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
1848
495k
      if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
1849
495k
                                            Known, TLO, Depth))
1850
3.81k
        return true;
1851
491k
      break;
1852
491k
    }
1853
6.61M
1854
6.61M
    // Just use computeKnownBits to compute output bits.
1855
6.61M
    Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1856
6.61M
    break;
1857
11.6M
  }
1858
11.6M
1859
11.6M
  // If we know the value of all of the demanded bits, return this as a
1860
11.6M
  // constant.
1861
11.6M
  if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
1862
14.3k
    // Avoid folding to a constant if any OpaqueConstant is involved.
1863
14.3k
    const SDNode *N = Op.getNode();
1864
14.3k
    for (SDNodeIterator I = SDNodeIterator::begin(N),
1865
14.3k
                        E = SDNodeIterator::end(N);
1866
36.2k
         I != E; 
++I21.8k
) {
1867
24.2k
      SDNode *Op = *I;
1868
24.2k
      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
1869
14.7k
        if (C->isOpaque())
1870
2.38k
          return false;
1871
24.2k
    }
1872
14.3k
    // TODO: Handle float bits as well.
1873
14.3k
    
if (11.9k
VT.isInteger()11.9k
)
1874
11.8k
      return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
1875
11.6M
  }
1876
11.6M
1877
11.6M
  return false;
1878
11.6M
}
1879
1880
bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
1881
                                                const APInt &DemandedElts,
1882
                                                APInt &KnownUndef,
1883
                                                APInt &KnownZero,
1884
113k
                                                DAGCombinerInfo &DCI) const {
1885
113k
  SelectionDAG &DAG = DCI.DAG;
1886
113k
  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
1887
113k
                        !DCI.isBeforeLegalizeOps());
1888
113k
1889
113k
  bool Simplified =
1890
113k
      SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
1891
113k
  if (Simplified) {
1892
566
    DCI.AddToWorklist(Op.getNode());
1893
566
    DCI.CommitTargetLoweringOpt(TLO);
1894
566
  }
1895
113k
1896
113k
  return Simplified;
1897
113k
}
1898
1899
/// Given a vector binary operation and known undefined elements for each input
1900
/// operand, compute whether each element of the output is undefined.
1901
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
1902
                                         const APInt &UndefOp0,
1903
222k
                                         const APInt &UndefOp1) {
1904
222k
  EVT VT = BO.getValueType();
1905
222k
  assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
1906
222k
         "Vector binop only");
1907
222k
1908
222k
  EVT EltVT = VT.getVectorElementType();
1909
222k
  unsigned NumElts = VT.getVectorNumElements();
1910
222k
  assert(UndefOp0.getBitWidth() == NumElts &&
1911
222k
         UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
1912
222k
1913
222k
  auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
1914
6.87M
                                   const APInt &UndefVals) {
1915
6.87M
    if (UndefVals[Index])
1916
1.00M
      return DAG.getUNDEF(EltVT);
1917
5.86M
1918
5.86M
    if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
1919
296k
      // Try hard to make sure that the getNode() call is not creating temporary
1920
296k
      // nodes. Ignore opaque integers because they do not constant fold.
1921
296k
      SDValue Elt = BV->getOperand(Index);
1922
296k
      auto *C = dyn_cast<ConstantSDNode>(Elt);
1923
296k
      if (isa<ConstantFPSDNode>(Elt) || 
Elt.isUndef()283k
||
(283k
C283k
&&
!C->isOpaque()263k
))
1924
275k
        return Elt;
1925
5.59M
    }
1926
5.59M
1927
5.59M
    return SDValue();
1928
5.59M
  };
1929
222k
1930
222k
  APInt KnownUndef = APInt::getNullValue(NumElts);
1931
3.65M
  for (unsigned i = 0; i != NumElts; 
++i3.43M
) {
1932
3.43M
    // If both inputs for this element are either constant or undef and match
1933
3.43M
    // the element type, compute the constant/undef result for this element of
1934
3.43M
    // the vector.
1935
3.43M
    // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
1936
3.43M
    // not handle FP constants. The code within getNode() should be refactored
1937
3.43M
    // to avoid the danger of creating a bogus temporary node here.
1938
3.43M
    SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
1939
3.43M
    SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
1940
3.43M
    if (C0 && 
C171.9k
&&
C0.getValueType() == EltVT4.66k
&&
C1.getValueType() == EltVT4.66k
)
1941
4.66k
      if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
1942
3.42k
        KnownUndef.setBit(i);
1943
3.43M
  }
1944
222k
  return KnownUndef;
1945
222k
}
1946
1947
bool TargetLowering::SimplifyDemandedVectorElts(
1948
    SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
1949
    APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
1950
4.41M
    bool AssumeSingleUse) const {
1951
4.41M
  EVT VT = Op.getValueType();
1952
4.41M
  APInt DemandedElts = OriginalDemandedElts;
1953
4.41M
  unsigned NumElts = DemandedElts.getBitWidth();
1954
4.41M
  assert(VT.isVector() && "Expected vector op");
1955
4.41M
  assert(VT.getVectorNumElements() == NumElts &&
1956
4.41M
         "Mask size mismatches value type element count!");
1957
4.41M
1958
4.41M
  KnownUndef = KnownZero = APInt::getNullValue(NumElts);
1959
4.41M
1960
4.41M
  // Undef operand.
1961
4.41M
  if (Op.isUndef()) {
1962
358k
    KnownUndef.setAllBits();
1963
358k
    return false;
1964
358k
  }
1965
4.05M
1966
4.05M
  // If Op has other users, assume that all elements are needed.
1967
4.05M
  if (!Op.getNode()->hasOneUse() && 
!AssumeSingleUse1.54M
)
1968
1.18M
    DemandedElts.setAllBits();
1969
4.05M
1970
4.05M
  // Not demanding any elements from Op.
1971
4.05M
  if (DemandedElts == 0) {
1972
3.23k
    KnownUndef.setAllBits();
1973
3.23k
    return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1974
3.23k
  }
1975
4.05M
1976
4.05M
  // Limit search depth.
1977
4.05M
  if (Depth >= 6)
1978
360k
    return false;
1979
3.69M
1980
3.69M
  SDLoc DL(Op);
1981
3.69M
  unsigned EltSizeInBits = VT.getScalarSizeInBits();
1982
3.69M
1983
3.69M
  switch (Op.getOpcode()) {
1984
3.69M
  case ISD::SCALAR_TO_VECTOR: {
1985
74.4k
    if (!DemandedElts[0]) {
1986
0
      KnownUndef.setAllBits();
1987
0
      return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1988
0
    }
1989
74.4k
    KnownUndef.setHighBits(NumElts - 1);
1990
74.4k
    break;
1991
74.4k
  }
1992
527k
  case ISD::BITCAST: {
1993
527k
    SDValue Src = Op.getOperand(0);
1994
527k
    EVT SrcVT = Src.getValueType();
1995
527k
1996
527k
    // We only handle vectors here.
1997
527k
    // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
1998
527k
    if (!SrcVT.isVector())
1999
44.3k
      break;
2000
483k
2001
483k
    // Fast handling of 'identity' bitcasts.
2002
483k
    unsigned NumSrcElts = SrcVT.getVectorNumElements();
2003
483k
    if (NumSrcElts == NumElts)
2004
35.9k
      return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2005
35.9k
                                        KnownZero, TLO, Depth + 1);
2006
447k
2007
447k
    APInt SrcZero, SrcUndef;
2008
447k
    APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
2009
447k
2010
447k
    // Bitcast from 'large element' src vector to 'small element' vector, we
2011
447k
    // must demand a source element if any DemandedElt maps to it.
2012
447k
    if ((NumElts % NumSrcElts) == 0) {
2013
262k
      unsigned Scale = NumElts / NumSrcElts;
2014
3.19M
      for (unsigned i = 0; i != NumElts; 
++i2.93M
)
2015
2.93M
        if (DemandedElts[i])
2016
1.66M
          SrcDemandedElts.setBit(i / Scale);
2017
262k
2018
262k
      if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2019
262k
                                     TLO, Depth + 1))
2020
1.07k
        return true;
2021
260k
2022
260k
      // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2023
260k
      // of the large element.
2024
260k
      // TODO - bigendian once we have test coverage.
2025
260k
      if (TLO.DAG.getDataLayout().isLittleEndian()) {
2026
257k
        unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2027
257k
        APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
2028
3.14M
        for (unsigned i = 0; i != NumElts; 
++i2.88M
)
2029
2.88M
          if (DemandedElts[i]) {
2030
1.64M
            unsigned Ofs = (i % Scale) * EltSizeInBits;
2031
1.64M
            SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2032
1.64M
          }
2033
257k
2034
257k
        KnownBits Known;
2035
257k
        if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1))
2036
717
          return true;
2037
260k
      }
2038
260k
2039
260k
      // If the src element is zero/undef then all the output elements will be -
2040
260k
      // only demanded elements are guaranteed to be correct.
2041
1.38M
      
for (unsigned i = 0; 260k
i != NumSrcElts;
++i1.12M
) {
2042
1.12M
        if (SrcDemandedElts[i]) {
2043
854k
          if (SrcZero[i])
2044
32.1k
            KnownZero.setBits(i * Scale, (i + 1) * Scale);
2045
854k
          if (SrcUndef[i])
2046
23.9k
            KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2047
854k
        }
2048
1.12M
      }
2049
260k
    }
2050
447k
2051
447k
    // Bitcast from 'small element' src vector to 'large element' vector, we
2052
447k
    // demand all smaller source elements covered by the larger demanded element
2053
447k
    // of this vector.
2054
447k
    
if (445k
(NumSrcElts % NumElts) == 0445k
) {
2055
185k
      unsigned Scale = NumSrcElts / NumElts;
2056
1.08M
      for (unsigned i = 0; i != NumElts; 
++i899k
)
2057
899k
        if (DemandedElts[i])
2058
715k
          SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
2059
185k
2060
185k
      if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2061
185k
                                     TLO, Depth + 1))
2062
790
        return true;
2063
184k
2064
184k
      // If all the src elements covering an output element are zero/undef, then
2065
184k
      // the output element will be as well, assuming it was demanded.
2066
1.08M
      
for (unsigned i = 0; 184k
i != NumElts;
++i896k
) {
2067
896k
        if (DemandedElts[i]) {
2068
714k
          if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
2069
5.87k
            KnownZero.setBit(i);
2070
714k
          if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
2071
8.65k
            KnownUndef.setBit(i);
2072
714k
        }
2073
896k
      }
2074
184k
    }
2075
445k
    
break444k
;
2076
445k
  }
2077
445k
  case ISD::BUILD_VECTOR: {
2078
172k
    // Check all elements and simplify any unused elements with UNDEF.
2079
172k
    if (!DemandedElts.isAllOnesValue()) {
2080
48.9k
      // Don't simplify BROADCASTS.
2081
48.9k
      if (llvm::any_of(Op->op_values(),
2082
290k
                       [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
2083
21.7k
        SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
2084
21.7k
        bool Updated = false;
2085
166k
        for (unsigned i = 0; i != NumElts; 
++i144k
) {
2086
144k
          if (!DemandedElts[i] && 
!Ops[i].isUndef()74.4k
) {
2087
9.32k
            Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
2088
9.32k
            KnownUndef.setBit(i);
2089
9.32k
            Updated = true;
2090
9.32k
          }
2091
144k
        }
2092
21.7k
        if (Updated)
2093
5.17k
          return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
2094
167k
      }
2095
48.9k
    }
2096
1.73M
    
for (unsigned i = 0; 167k
i != NumElts;
++i1.56M
) {
2097
1.56M
      SDValue SrcOp = Op.getOperand(i);
2098
1.56M
      if (SrcOp.isUndef()) {
2099
101k
        KnownUndef.setBit(i);
2100
1.46M
      } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
2101
1.46M
                 
(1.14M
isNullConstant(SrcOp)1.14M
||
isNullFPConstant(SrcOp)808k
)) {
2102
345k
        KnownZero.setBit(i);
2103
345k
      }
2104
1.56M
    }
2105
167k
    break;
2106
167k
  }
2107
167k
  case ISD::CONCAT_VECTORS: {
2108
63.1k
    EVT SubVT = Op.getOperand(0).getValueType();
2109
63.1k
    unsigned NumSubVecs = Op.getNumOperands();
2110
63.1k
    unsigned NumSubElts = SubVT.getVectorNumElements();
2111
238k
    for (unsigned i = 0; i != NumSubVecs; 
++i175k
) {
2112
176k
      SDValue SubOp = Op.getOperand(i);
2113
176k
      APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
2114
176k
      APInt SubUndef, SubZero;
2115
176k
      if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
2116
176k
                                     Depth + 1))
2117
1.20k
        return true;
2118
175k
      KnownUndef.insertBits(SubUndef, i * NumSubElts);
2119
175k
      KnownZero.insertBits(SubZero, i * NumSubElts);
2120
175k
    }
2121
63.1k
    
break61.9k
;
2122
63.1k
  }
2123
110k
  case ISD::INSERT_SUBVECTOR: {
2124
110k
    if (!isa<ConstantSDNode>(Op.getOperand(2)))
2125
0
      break;
2126
110k
    SDValue Base = Op.getOperand(0);
2127
110k
    SDValue Sub = Op.getOperand(1);
2128
110k
    EVT SubVT = Sub.getValueType();
2129
110k
    unsigned NumSubElts = SubVT.getVectorNumElements();
2130
110k
    const APInt &Idx = Op.getConstantOperandAPInt(2);
2131
110k
    if (Idx.ugt(NumElts - NumSubElts))
2132
0
      break;
2133
110k
    unsigned SubIdx = Idx.getZExtValue();
2134
110k
    APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
2135
110k
    APInt SubUndef, SubZero;
2136
110k
    if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO,
2137
110k
                                   Depth + 1))
2138
276
      return true;
2139
110k
    APInt BaseElts = DemandedElts;
2140
110k
    BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
2141
110k
    if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
2142
110k
                                   Depth + 1))
2143
112
      return true;
2144
110k
    KnownUndef.insertBits(SubUndef, SubIdx);
2145
110k
    KnownZero.insertBits(SubZero, SubIdx);
2146
110k
    break;
2147
110k
  }
2148
402k
  case ISD::EXTRACT_SUBVECTOR: {
2149
402k
    SDValue Src = Op.getOperand(0);
2150
402k
    ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2151
402k
    unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2152
402k
    if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
2153
402k
      // Offset the demanded elts by the subvector index.
2154
402k
      uint64_t Idx = SubIdx->getZExtValue();
2155
402k
      APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
2156
402k
      APInt SrcUndef, SrcZero;
2157
402k
      if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
2158
402k
                                     Depth + 1))
2159
1.63k
        return true;
2160
400k
      KnownUndef = SrcUndef.extractBits(NumElts, Idx);
2161
400k
      KnownZero = SrcZero.extractBits(NumElts, Idx);
2162
400k
    }
2163
402k
    
break400k
;
2164
402k
  }
2165
402k
  case ISD::INSERT_VECTOR_ELT: {
2166
24.8k
    SDValue Vec = Op.getOperand(0);
2167
24.8k
    SDValue Scl = Op.getOperand(1);
2168
24.8k
    auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2169
24.8k
2170
24.8k
    // For a legal, constant insertion index, if we don't need this insertion
2171
24.8k
    // then strip it, else remove it from the demanded elts.
2172
24.8k
    if (CIdx && 
CIdx->getAPIntValue().ult(NumElts)22.9k
) {
2173
22.9k
      unsigned Idx = CIdx->getZExtValue();
2174
22.9k
      if (!DemandedElts[Idx])
2175
109
        return TLO.CombineTo(Op, Vec);
2176
22.7k
2177
22.7k
      APInt DemandedVecElts(DemandedElts);
2178
22.7k
      DemandedVecElts.clearBit(Idx);
2179
22.7k
      if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
2180
22.7k
                                     KnownZero, TLO, Depth + 1))
2181
317
        return true;
2182
22.4k
2183
22.4k
      KnownUndef.clearBit(Idx);
2184
22.4k
      if (Scl.isUndef())
2185
58
        KnownUndef.setBit(Idx);
2186
22.4k
2187
22.4k
      KnownZero.clearBit(Idx);
2188
22.4k
      if (isNullConstant(Scl) || 
isNullFPConstant(Scl)22.0k
)
2189
523
        KnownZero.setBit(Idx);
2190
22.4k
      break;
2191
22.4k
    }
2192
1.97k
2193
1.97k
    APInt VecUndef, VecZero;
2194
1.97k
    if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
2195
1.97k
                                   Depth + 1))
2196
8
      return true;
2197
1.96k
    // Without knowing the insertion index we can't set KnownUndef/KnownZero.
2198
1.96k
    break;
2199
1.96k
  }
2200
21.5k
  case ISD::VSELECT: {
2201
21.5k
    // Try to transform the select condition based on the current demanded
2202
21.5k
    // elements.
2203
21.5k
    // TODO: If a condition element is undef, we can choose from one arm of the
2204
21.5k
    //       select (and if one arm is undef, then we can propagate that to the
2205
21.5k
    //       result).
2206
21.5k
    // TODO - add support for constant vselect masks (see IR version of this).
2207
21.5k
    APInt UnusedUndef, UnusedZero;
2208
21.5k
    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
2209
21.5k
                                   UnusedZero, TLO, Depth + 1))
2210
12
      return true;
2211
21.4k
2212
21.4k
    // See if we can simplify either vselect operand.
2213
21.4k
    APInt DemandedLHS(DemandedElts);
2214
21.4k
    APInt DemandedRHS(DemandedElts);
2215
21.4k
    APInt UndefLHS, ZeroLHS;
2216
21.4k
    APInt UndefRHS, ZeroRHS;
2217
21.4k
    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
2218
21.4k
                                   ZeroLHS, TLO, Depth + 1))
2219
64
      return true;
2220
21.4k
    if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
2221
21.4k
                                   ZeroRHS, TLO, Depth + 1))
2222
13
      return true;
2223
21.4k
2224
21.4k
    KnownUndef = UndefLHS & UndefRHS;
2225
21.4k
    KnownZero = ZeroLHS & ZeroRHS;
2226
21.4k
    break;
2227
21.4k
  }
2228
286k
  case ISD::VECTOR_SHUFFLE: {
2229
286k
    ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
2230
286k
2231
286k
    // Collect demanded elements from shuffle operands..
2232
286k
    APInt DemandedLHS(NumElts, 0);
2233
286k
    APInt DemandedRHS(NumElts, 0);
2234
5.31M
    for (unsigned i = 0; i != NumElts; 
++i5.03M
) {
2235
5.03M
      int M = ShuffleMask[i];
2236
5.03M
      if (M < 0 || 
!DemandedElts[i]2.37M
)
2237
2.69M
        continue;
2238
2.33M
      assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
2239
2.33M
      if (M < (int)NumElts)
2240
1.98M
        DemandedLHS.setBit(M);
2241
349k
      else
2242
349k
        DemandedRHS.setBit(M - NumElts);
2243
2.33M
    }
2244
286k
2245
286k
    // See if we can simplify either shuffle operand.
2246
286k
    APInt UndefLHS, ZeroLHS;
2247
286k
    APInt UndefRHS, ZeroRHS;
2248
286k
    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
2249
286k
                                   ZeroLHS, TLO, Depth + 1))
2250
2.57k
      return true;
2251
284k
    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
2252
284k
                                   ZeroRHS, TLO, Depth + 1))
2253
1.94k
      return true;
2254
282k
2255
282k
    // Simplify mask using undef elements from LHS/RHS.
2256
282k
    bool Updated = false;
2257
282k
    bool IdentityLHS = true, IdentityRHS = true;
2258
282k
    SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
2259
5.26M
    for (unsigned i = 0; i != NumElts; 
++i4.97M
) {
2260
4.97M
      int &M = NewMask[i];
2261
4.97M
      if (M < 0)
2262
2.64M
        continue;
2263
2.32M
      if (!DemandedElts[i] || 
(2.29M
M < (int)NumElts2.29M
&&
UndefLHS[M]1.96M
) ||
2264
2.32M
          
(2.28M
M >= (int)NumElts2.28M
&&
UndefRHS[M - NumElts]333k
)) {
2265
46.5k
        Updated = true;
2266
46.5k
        M = -1;
2267
46.5k
      }
2268
2.32M
      IdentityLHS &= (M < 0) || 
(M == (int)i)2.28M
;
2269
2.32M
      IdentityRHS &= (M < 0) || 
((M - NumElts) == i)2.28M
;
2270
2.32M
    }
2271
282k
2272
282k
    // Update legal shuffle masks based on demanded elements if it won't reduce
2273
282k
    // to Identity which can cause premature removal of the shuffle mask.
2274
282k
    if (Updated && 
!IdentityLHS9.50k
&&
!IdentityRHS6.88k
&&
!TLO.LegalOps6.78k
&&
2275
282k
        
isShuffleMaskLegal(NewMask, VT)1.59k
)
2276
1.38k
      return TLO.CombineTo(Op,
2277
1.38k
                           TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0),
2278
1.38k
                                                    Op.getOperand(1), NewMask));
2279
280k
2280
280k
    // Propagate undef/zero elements from LHS/RHS.
2281
5.24M
    
for (unsigned i = 0; 280k
i != NumElts;
++i4.96M
) {
2282
4.96M
      int M = ShuffleMask[i];
2283
4.96M
      if (M < 0) {
2284
2.64M
        KnownUndef.setBit(i);
2285
2.64M
      } else 
if (2.32M
M < (int)NumElts2.32M
) {
2286
1.98M
        if (UndefLHS[M])
2287
16.8k
          KnownUndef.setBit(i);
2288
1.98M
        if (ZeroLHS[M])
2289
20.0k
          KnownZero.setBit(i);
2290
1.98M
      } else {
2291
334k
        if (UndefRHS[M - NumElts])
2292
816
          KnownUndef.setBit(i);
2293
334k
        if (ZeroRHS[M - NumElts])
2294
106k
          KnownZero.setBit(i);
2295
334k
      }
2296
4.96M
    }
2297
280k
    break;
2298
280k
  }
2299
280k
  case ISD::ANY_EXTEND_VECTOR_INREG:
2300
27.0k
  case ISD::SIGN_EXTEND_VECTOR_INREG:
2301
27.0k
  case ISD::ZERO_EXTEND_VECTOR_INREG: {
2302
27.0k
    APInt SrcUndef, SrcZero;
2303
27.0k
    SDValue Src = Op.getOperand(0);
2304
27.0k
    unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2305
27.0k
    APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
2306
27.0k
    if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2307
27.0k
                                   Depth + 1))
2308
182
      return true;
2309
26.8k
    KnownZero = SrcZero.zextOrTrunc(NumElts);
2310
26.8k
    KnownUndef = SrcUndef.zextOrTrunc(NumElts);
2311
26.8k
2312
26.8k
    if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
2313
26.8k
        
Op.getValueSizeInBits() == Src.getValueSizeInBits()1.10k
&&
2314
26.8k
        
DemandedSrcElts == 11.10k
&&
TLO.DAG.getDataLayout().isLittleEndian()50
) {
2315
50
      // aext - if we just need the bottom element then we can bitcast.
2316
50
      return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2317
50
    }
2318
26.7k
2319
26.7k
    if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
2320
18.8k
      // zext(undef) upper bits are guaranteed to be zero.
2321
18.8k
      if (DemandedElts.isSubsetOf(KnownUndef))
2322
1
        return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2323
18.8k
      KnownUndef.clearAllBits();
2324
18.8k
    }
2325
26.7k
    
break26.7k
;
2326
26.7k
  }
2327
26.7k
2328
26.7k
  // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
2329
26.7k
  // MAX, saturated math, etc.
2330
223k
  case ISD::OR:
2331
223k
  case ISD::XOR:
2332
223k
  case ISD::ADD:
2333
223k
  case ISD::SUB:
2334
223k
  case ISD::FADD:
2335
223k
  case ISD::FSUB:
2336
223k
  case ISD::FMUL:
2337
223k
  case ISD::FDIV:
2338
223k
  case ISD::FREM: {
2339
223k
    APInt UndefRHS, ZeroRHS;
2340
223k
    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
2341
223k
                                   ZeroRHS, TLO, Depth + 1))
2342
539
      return true;
2343
222k
    APInt UndefLHS, ZeroLHS;
2344
222k
    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
2345
222k
                                   ZeroLHS, TLO, Depth + 1))
2346
291
      return true;
2347
222k
2348
222k
    KnownZero = ZeroLHS & ZeroRHS;
2349
222k
    KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
2350
222k
    break;
2351
222k
  }
2352
222k
  case ISD::SHL:
2353
13.4k
  case ISD::SRL:
2354
13.4k
  case ISD::SRA:
2355
13.4k
  case ISD::ROTL:
2356
13.4k
  case ISD::ROTR: {
2357
13.4k
    APInt UndefRHS, ZeroRHS;
2358
13.4k
    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
2359
13.4k
                                   ZeroRHS, TLO, Depth + 1))
2360
570
      return true;
2361
12.8k
    APInt UndefLHS, ZeroLHS;
2362
12.8k
    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
2363
12.8k
                                   ZeroLHS, TLO, Depth + 1))
2364
114
      return true;
2365
12.7k
2366
12.7k
    KnownZero = ZeroLHS;
2367
12.7k
    KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
2368
12.7k
    break;
2369
12.7k
  }
2370
133k
  case ISD::MUL:
2371
133k
  case ISD::AND: {
2372
133k
    APInt SrcUndef, SrcZero;
2373
133k
    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
2374
133k
                                   SrcZero, TLO, Depth + 1))
2375
264
      return true;
2376
133k
    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
2377
133k
                                   KnownZero, TLO, Depth + 1))
2378
235
      return true;
2379
133k
2380
133k
    // If either side has a zero element, then the result element is zero, even
2381
133k
    // if the other is an UNDEF.
2382
133k
    // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
2383
133k
    // and then handle 'and' nodes with the rest of the binop opcodes.
2384
133k
    KnownZero |= SrcZero;
2385
133k
    KnownUndef &= SrcUndef;
2386
133k
    KnownUndef &= ~KnownZero;
2387
133k
    break;
2388
133k
  }
2389
133k
  case ISD::TRUNCATE:
2390
59.8k
  case ISD::SIGN_EXTEND:
2391
59.8k
  case ISD::ZERO_EXTEND:
2392
59.8k
    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
2393
59.8k
                                   KnownZero, TLO, Depth + 1))
2394
177
      return true;
2395
59.7k
2396
59.7k
    if (Op.getOpcode() == ISD::ZERO_EXTEND) {
2397
24.2k
      // zext(undef) upper bits are guaranteed to be zero.
2398
24.2k
      if (DemandedElts.isSubsetOf(KnownUndef))
2399
3
        return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2400
24.2k
      KnownUndef.clearAllBits();
2401
24.2k
    }
2402
59.7k
    
break59.7k
;
2403
1.55M
  default: {
2404
1.55M
    if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2405
482k
      if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
2406
482k
                                                  KnownZero, TLO, Depth))
2407
5.06k
        return true;
2408
1.06M
    } else {
2409
1.06M
      KnownBits Known;
2410
1.06M
      APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
2411
1.06M
      if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
2412
1.06M
                               TLO, Depth, AssumeSingleUse))
2413
2
        return true;
2414
1.54M
    }
2415
1.54M
    break;
2416
1.54M
  }
2417
3.63M
  }
2418
3.63M
  assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
2419
3.63M
2420
3.63M
  // Constant fold all undef cases.
2421
3.63M
  // TODO: Handle zero cases as well.
2422
3.63M
  if (DemandedElts.isSubsetOf(KnownUndef))
2423
753
    return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2424
3.62M
2425
3.62M
  return false;
2426
3.62M
}
2427
2428
/// Determine which of the bits specified in Mask are known to be either zero or
2429
/// one and return them in the Known.
2430
void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2431
                                                   KnownBits &Known,
2432
                                                   const APInt &DemandedElts,
2433
                                                   const SelectionDAG &DAG,
2434
281k
                                                   unsigned Depth) const {
2435
281k
  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2436
281k
          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2437
281k
          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2438
281k
          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2439
281k
         "Should use MaskedValueIsZero if you don't know whether Op"
2440
281k
         " is a target node!");
2441
281k
  Known.resetAll();
2442
281k
}
2443
2444
void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
2445
                                                   KnownBits &Known,
2446
                                                   const APInt &DemandedElts,
2447
                                                   const SelectionDAG &DAG,
2448
2.70M
                                                   unsigned Depth) const {
2449
2.70M
  assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex");
2450
2.70M
2451
2.70M
  if (unsigned Align = DAG.InferPtrAlignment(Op)) {
2452
2.70M
    // The low bits are known zero if the pointer is aligned.
2453
2.70M
    Known.Zero.setLowBits(Log2_32(Align));
2454
2.70M
  }
2455
2.70M
}
2456
2457
/// This method can be implemented by targets that want to expose additional
2458
/// information about sign bits to the DAG Combiner.
2459
unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2460
                                                         const APInt &,
2461
                                                         const SelectionDAG &,
2462
52.7k
                                                         unsigned Depth) const {
2463
52.7k
  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2464
52.7k
          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2465
52.7k
          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2466
52.7k
          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2467
52.7k
         "Should use ComputeNumSignBits if you don't know whether Op"
2468
52.7k
         " is a target node!");
2469
52.7k
  return 1;
2470
52.7k
}
2471
2472
bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
2473
    SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
2474
46.8k
    TargetLoweringOpt &TLO, unsigned Depth) const {
2475
46.8k
  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2476
46.8k
          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2477
46.8k
          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2478
46.8k
          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2479
46.8k
         "Should use SimplifyDemandedVectorElts if you don't know whether Op"
2480
46.8k
         " is a target node!");
2481
46.8k
  return false;
2482
46.8k
}
2483
2484
bool TargetLowering::SimplifyDemandedBitsForTargetNode(
2485
    SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2486
429k
    KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
2487
429k
  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2488
429k
          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2489
429k
          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2490
429k
          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2491
429k
         "Should use SimplifyDemandedBits if you don't know whether Op"
2492
429k
         " is a target node!");
2493
429k
  computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
2494
429k
  return false;
2495
429k
}
2496
2497
SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
2498
    SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2499
37.8k
    SelectionDAG &DAG, unsigned Depth) const {
2500
37.8k
  assert(
2501
37.8k
      (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2502
37.8k
       Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2503
37.8k
       Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2504
37.8k
       Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2505
37.8k
      "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
2506
37.8k
      " is a target node!");
2507
37.8k
  return SDValue();
2508
37.8k
}
2509
2510
3.71M
const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
2511
3.71M
  return nullptr;
2512
3.71M
}
2513
2514
bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
2515
                                                  const SelectionDAG &DAG,
2516
                                                  bool SNaN,
2517
0
                                                  unsigned Depth) const {
2518
0
  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2519
0
          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2520
0
          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2521
0
          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2522
0
         "Should use isKnownNeverNaN if you don't know whether Op"
2523
0
         " is a target node!");
2524
0
  return false;
2525
0
}
2526
2527
// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
2528
// work with truncating build vectors and vectors with elements of less than
2529
// 8 bits.
2530
352k
bool TargetLowering::isConstTrueVal(const SDNode *N) const {
2531
352k
  if (!N)
2532
0
    return false;
2533
352k
2534
352k
  APInt CVal;
2535
352k
  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2536
285k
    CVal = CN->getAPIntValue();
2537
285k
  } else 
if (auto *66.3k
BV66.3k
= dyn_cast<BuildVectorSDNode>(N)) {
2538
21.2k
    auto *CN = BV->getConstantSplatNode();
2539
21.2k
    if (!CN)
2540
362
      return false;
2541
20.8k
2542
20.8k
    // If this is a truncating build vector, truncate the splat value.
2543
20.8k
    // Otherwise, we may fail to match the expected values below.
2544
20.8k
    unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
2545
20.8k
    CVal = CN->getAPIntValue();
2546
20.8k
    if (BVEltWidth < CVal.getBitWidth())
2547
1.72k
      CVal = CVal.trunc(BVEltWidth);
2548
45.1k
  } else {
2549
45.1k
    return false;
2550
45.1k
  }
2551
306k
2552
306k
  switch (getBooleanContents(N->getValueType(0))) {
2553
306k
  case UndefinedBooleanContent:
2554
575
    return CVal[0];
2555
306k
  case ZeroOrOneBooleanContent:
2556
281k
    return CVal.isOneValue();
2557
306k
  case ZeroOrNegativeOneBooleanContent:
2558
24.8k
    return CVal.isAllOnesValue();
2559
0
  }
2560
0
2561
0
  llvm_unreachable("Invalid boolean contents");
2562
0
}
2563
2564
497
bool TargetLowering::isConstFalseVal(const SDNode *N) const {
2565
497
  if (!N)
2566
0
    return false;
2567
497
2568
497
  const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
2569
497
  if (!CN) {
2570
2
    const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
2571
2
    if (!BV)
2572
2
      return false;
2573
0
2574
0
    // Only interested in constant splats, we don't care about undef
2575
0
    // elements in identifying boolean constants and getConstantSplatNode
2576
0
    // returns NULL if all ops are undef;
2577
0
    CN = BV->getConstantSplatNode();
2578
0
    if (!CN)
2579
0
      return false;
2580
495
  }
2581
495
2582
495
  if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
2583
0
    return !CN->getAPIntValue()[0];
2584
495
2585
495
  return CN->isNullValue();
2586
495
}
2587
2588
bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
2589
9
                                       bool SExt) const {
2590
9
  if (VT == MVT::i1)
2591
0
    return N->isOne();
2592
9
2593
9
  TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
2594
9
  switch (Cnt) {
2595
9
  case TargetLowering::ZeroOrOneBooleanContent:
2596
1
    // An extended value of 1 is always true, unless its original type is i1,
2597
1
    // in which case it will be sign extended to -1.
2598
1
    return (N->isOne() && !SExt) || 
(0
SExt0
&&
(N->getValueType(0) != MVT::i1)0
);
2599
9
  case TargetLowering::UndefinedBooleanContent:
2600
8
  case TargetLowering::ZeroOrNegativeOneBooleanContent:
2601
8
    return N->isAllOnesValue() && 
SExt5
;
2602
0
  }
2603
0
  llvm_unreachable("Unexpected enumeration.");
2604
0
}
2605
2606
/// This helper function of SimplifySetCC tries to optimize the comparison when
2607
/// either operand of the SetCC node is a bitwise-and instruction.
2608
SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
2609
                                         ISD::CondCode Cond, const SDLoc &DL,
2610
818k
                                         DAGCombinerInfo &DCI) const {
2611
818k
  // Match these patterns in any of their permutations:
2612
818k
  // (X & Y) == Y
2613
818k
  // (X & Y) != Y
2614
818k
  if (N1.getOpcode() == ISD::AND && 
N0.getOpcode() != ISD::AND1.54k
)
2615
905
    std::swap(N0, N1);
2616
818k
2617
818k
  EVT OpVT = N0.getValueType();
2618
818k
  if (N0.getOpcode() != ISD::AND || 
!OpVT.isInteger()67.3k
||
2619
818k
      
(67.3k
Cond != ISD::SETEQ67.3k
&&
Cond != ISD::SETNE13.1k
))
2620
750k
    return SDValue();
2621
67.3k
2622
67.3k
  SDValue X, Y;
2623
67.3k
  if (N0.getOperand(0) == N1) {
2624
132
    X = N0.getOperand(1);
2625
132
    Y = N0.getOperand(0);
2626
67.1k
  } else if (N0.getOperand(1) == N1) {
2627
770
    X = N0.getOperand(0);
2628
770
    Y = N0.getOperand(1);
2629
66.4k
  } else {
2630
66.4k
    return SDValue();
2631
66.4k
  }
2632
902
2633
902
  SelectionDAG &DAG = DCI.DAG;
2634
902
  SDValue Zero = DAG.getConstant(0, DL, OpVT);
2635
902
  if (DAG.isKnownToBeAPowerOfTwo(Y)) {
2636
313
    // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
2637
313
    // Note that where Y is variable and is known to have at most one bit set
2638
313
    // (for example, if it is Z & 1) we cannot do this; the expressions are not
2639
313
    // equivalent when Y == 0.
2640
313
    Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
2641
313
    if (DCI.isBeforeLegalizeOps() ||
2642
313
        
isCondCodeLegal(Cond, N0.getSimpleValueType())0
)
2643
313
      return DAG.getSetCC(DL, VT, N0, Zero, Cond);
2644
589
  } else if (N0.hasOneUse() && 
hasAndNotCompare(Y)362
) {
2645
98
    // If the target supports an 'and-not' or 'and-complement' logic operation,
2646
98
    // try to use that to make a comparison operation more efficient.
2647
98
    // But don't do this transform if the mask is a single bit because there are
2648
98
    // more efficient ways to deal with that case (for example, 'bt' on x86 or
2649
98
    // 'rlwinm' on PPC).
2650
98
2651
98
    // Bail out if the compare operand that we want to turn into a zero is
2652
98
    // already a zero (otherwise, infinite loop).
2653
98
    auto *YConst = dyn_cast<ConstantSDNode>(Y);
2654
98
    if (YConst && 
YConst->isNullValue()45
)
2655
0
      return SDValue();
2656
98
2657
98
    // Transform this into: ~X & Y == 0.
2658
98
    SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
2659
98
    SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
2660
98
    return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
2661
98
  }
2662
491
2663
491
  return SDValue();
2664
491
}
2665
2666
/// There are multiple IR patterns that could be checking whether certain
2667
/// truncation of a signed number would be lossy or not. The pattern which is
2668
/// best at IR level, may not lower optimally. Thus, we want to unfold it.
2669
/// We are looking for the following pattern: (KeptBits is a constant)
2670
///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
2671
/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
2672
/// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
2673
/// We will unfold it into the natural trunc+sext pattern:
2674
///   ((%x << C) a>> C) dstcond %x
2675
/// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
2676
SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
2677
    EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
2678
936k
    const SDLoc &DL) const {
2679
936k
  // We must be comparing with a constant.
2680
936k
  ConstantSDNode *C1;
2681
936k
  if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
2682
0
    return SDValue();
2683
936k
2684
936k
  // N0 should be:  add %x, (1 << (KeptBits-1))
2685
936k
  if (N0->getOpcode() != ISD::ADD)
2686
863k
    return SDValue();
2687
72.9k
2688
72.9k
  // And we must be 'add'ing a constant.
2689
72.9k
  ConstantSDNode *C01;
2690
72.9k
  if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
2691
5.68k
    return SDValue();
2692
67.2k
2693
67.2k
  SDValue X = N0->getOperand(0);
2694
67.2k
  EVT XVT = X.getValueType();
2695
67.2k
2696
67.2k
  // Validate constants ...
2697
67.2k
2698
67.2k
  APInt I1 = C1->getAPIntValue();
2699
67.2k
2700
67.2k
  ISD::CondCode NewCond;
2701
67.2k
  if (Cond == ISD::CondCode::SETULT) {
2702
4.11k
    NewCond = ISD::CondCode::SETEQ;
2703
63.1k
  } else if (Cond == ISD::CondCode::SETULE) {
2704
480
    NewCond = ISD::CondCode::SETEQ;
2705
480
    // But need to 'canonicalize' the constant.
2706
480
    I1 += 1;
2707
62.6k
  } else if (Cond == ISD::CondCode::SETUGT) {
2708
3.49k
    NewCond = ISD::CondCode::SETNE;
2709
3.49k
    // But need to 'canonicalize' the constant.
2710
3.49k
    I1 += 1;
2711
59.1k
  } else if (Cond == ISD::CondCode::SETUGE) {
2712
1.53k
    NewCond = ISD::CondCode::SETNE;
2713
1.53k
  } else
2714
57.5k
    return SDValue();
2715
9.61k
2716
9.61k
  APInt I01 = C01->getAPIntValue();
2717
9.61k
2718
17.8k
  auto checkConstants = [&I1, &I01]() -> bool {
2719
17.8k
    // Both of them must be power-of-two, and the constant from setcc is bigger.
2720
17.8k
    return I1.ugt(I01) && 
I1.isPowerOf2()9.59k
&&
I01.isPowerOf2()1.49k
;
2721
17.8k
  };
2722
9.61k
2723
9.61k
  if (checkConstants()) {
2724
1.38k
    // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
2725
8.23k
  } else {
2726
8.23k
    // What if we invert constants? (and the target predicate)
2727
8.23k
    I1.negate();
2728
8.23k
    I01.negate();
2729
8.23k
    NewCond = getSetCCInverse(NewCond, /*isInteger=*/true);
2730
8.23k
    if (!checkConstants())
2731
8.17k
      return SDValue();
2732
1.44k
    // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
2733
1.44k
  }
2734
1.44k
2735
1.44k
  // They are power-of-two, so which bit is set?
2736
1.44k
  const unsigned KeptBits = I1.logBase2();
2737
1.44k
  const unsigned KeptBitsMinusOne = I01.logBase2();
2738
1.44k
2739
1.44k
  // Magic!
2740
1.44k
  if (KeptBits != (KeptBitsMinusOne + 1))
2741
1.14k
    return SDValue();
2742
293
  assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
2743
293
2744
293
  // We don't want to do this in every single case.
2745
293
  SelectionDAG &DAG = DCI.DAG;
2746
293
  if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
2747
293
          XVT, KeptBits))
2748
186
    return SDValue();
2749
107
2750
107
  const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
2751
107
  assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
2752
107
2753
107
  // Unfold into:  ((%x << C) a>> C) cond %x
2754
107
  // Where 'cond' will be either 'eq' or 'ne'.
2755
107
  SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
2756
107
  SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
2757
107
  SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
2758
107
  SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
2759
107
2760
107
  return T2;
2761
107
}
2762
2763
/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
2764
/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
2765
/// handle the commuted versions of these patterns.
2766
SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
2767
                                           ISD::CondCode Cond, const SDLoc &DL,
2768
18.2k
                                           DAGCombinerInfo &DCI) const {
2769
18.2k
  unsigned BOpcode = N0.getOpcode();
2770
18.2k
  assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
2771
18.2k
         "Unexpected binop");
2772
18.2k
  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
2773
18.2k
2774
18.2k
  // (X + Y) == X --> Y == 0
2775
18.2k
  // (X - Y) == X --> Y == 0
2776
18.2k
  // (X ^ Y) == X --> Y == 0
2777
18.2k
  SelectionDAG &DAG = DCI.DAG;
2778
18.2k
  EVT OpVT = N0.getValueType();
2779
18.2k
  SDValue X = N0.getOperand(0);
2780
18.2k
  SDValue Y = N0.getOperand(1);
2781
18.2k
  if (X == N1)
2782
41
    return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
2783
18.2k
2784
18.2k
  if (Y != N1)
2785
18.2k
    return SDValue();
2786
37
2787
37
  // (X + Y) == Y --> X == 0
2788
37
  // (X ^ Y) == Y --> X == 0
2789
37
  if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
2790
2
    return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
2791
35
2792
35
  // The shift would not be valid if the operands are boolean (i1).
2793
35
  if (!N0.hasOneUse() || 
OpVT.getScalarSizeInBits() == 113
)
2794
26
    return SDValue();
2795
9
2796
9
  // (X - Y) == Y --> X == Y << 1
2797
9
  EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
2798
9
                                 !DCI.isBeforeLegalize());
2799
9
  SDValue One = DAG.getConstant(1, DL, ShiftVT);
2800
9
  SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
2801
9
  if (!DCI.isCalledByLegalizer())
2802
9
    DCI.AddToWorklist(YShl1.getNode());
2803
9
  return DAG.getSetCC(DL, VT, X, YShl1, Cond);
2804
9
}
2805
2806
/// Try to simplify a setcc built with the specified operands and cc. If it is
2807
/// unable to simplify it, return a null SDValue.
2808
SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
2809
                                      ISD::CondCode Cond, bool foldBooleans,
2810
                                      DAGCombinerInfo &DCI,
2811
1.24M
                                      const SDLoc &dl) const {
2812
1.24M
  SelectionDAG &DAG = DCI.DAG;
2813
1.24M
  EVT OpVT = N0.getValueType();
2814
1.24M
2815
1.24M
  // Constant fold or commute setcc.
2816
1.24M
  if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
2817
223
    return Fold;
2818
1.23M
2819
1.23M
  // Ensure that the constant occurs on the RHS and fold constant comparisons.
2820
1.23M
  // TODO: Handle non-splat vector constants. All undef causes trouble.
2821
1.23M
  ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
2822
1.23M
  if (isConstOrConstSplat(N0) &&
2823
1.23M
      
(2.21k
DCI.isBeforeLegalizeOps()2.21k
||
2824
2.21k
       
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())1.76k
))
2825
487
    return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
2826
1.23M
2827
1.23M
  // If we have a subtract with the same 2 non-constant operands as this setcc
2828
1.23M
  // -- but in reverse order -- then try to commute the operands of this setcc
2829
1.23M
  // to match. A matching pair of setcc (cmp) and sub may be combined into 1
2830
1.23M
  // instruction on some targets.
2831
1.23M
  if (!isConstOrConstSplat(N0) && 
!isConstOrConstSplat(N1)1.23M
&&
2832
1.23M
      
(254k
DCI.isBeforeLegalizeOps()254k
||
2833
254k
       
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())19.1k
) &&
2834
1.23M
      
DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } )250k
&&
2835
1.23M
      
!DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } )530
)
2836
55
    return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
2837
1.23M
2838
1.23M
  if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
2839
957k
    const APInt &C1 = N1C->getAPIntValue();
2840
957k
2841
957k
    // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
2842
957k
    // equality comparison, then we're just comparing whether X itself is
2843
957k
    // zero.
2844
957k
    if (N0.getOpcode() == ISD::SRL && 
(8.70k
C1.isNullValue()8.70k
||
C1.isOneValue()3.98k
) &&
2845
957k
        
N0.getOperand(0).getOpcode() == ISD::CTLZ7.17k
&&
2846
957k
        
N0.getOperand(1).getOpcode() == ISD::Constant0
) {
2847
0
      const APInt &ShAmt = N0.getConstantOperandAPInt(1);
2848
0
      if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2849
0
          ShAmt == Log2_32(N0.getValueSizeInBits())) {
2850
0
        if ((C1 == 0) == (Cond == ISD::SETEQ)) {
2851
0
          // (srl (ctlz x), 5) == 0  -> X != 0
2852
0
          // (srl (ctlz x), 5) != 1  -> X != 0
2853
0
          Cond = ISD::SETNE;
2854
0
        } else {
2855
0
          // (srl (ctlz x), 5) != 0  -> X == 0
2856
0
          // (srl (ctlz x), 5) == 1  -> X == 0
2857
0
          Cond = ISD::SETEQ;
2858
0
        }
2859
0
        SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
2860
0
        return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
2861
0
                            Zero, Cond);
2862
0
      }
2863
957k
    }
2864
957k
2865
957k
    SDValue CTPOP = N0;
2866
957k
    // Look through truncs that don't change the value of a ctpop.
2867
957k
    if (N0.hasOneUse() && 
N0.getOpcode() == ISD::TRUNCATE564k
)
2868
39.5k
      CTPOP = N0.getOperand(0);
2869
957k
2870
957k
    if (CTPOP.hasOneUse() && 
CTPOP.getOpcode() == ISD::CTPOP554k
&&
2871
957k
        
(2.08k
N0 == CTPOP2.08k
||
2872
2.08k
         
N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits())4
)) {
2873
2.08k
      EVT CTVT = CTPOP.getValueType();
2874
2.08k
      SDValue CTOp = CTPOP.getOperand(0);
2875
2.08k
2876
2.08k
      // (ctpop x) u< 2 -> (x & x-1) == 0
2877
2.08k
      // (ctpop x) u> 1 -> (x & x-1) != 0
2878
2.08k
      if ((Cond == ISD::SETULT && 
C1 == 21.73k
) ||
(349
Cond == ISD::SETUGT349
&&
C1 == 1168
)){
2879
1.90k
        SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
2880
1.90k
                                  DAG.getConstant(1, dl, CTVT));
2881
1.90k
        SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
2882
1.90k
        ISD::CondCode CC = Cond == ISD::SETULT ? 
ISD::SETEQ1.73k
:
ISD::SETNE168
;
2883
1.90k
        return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
2884
1.90k
      }
2885
181
2886
181
      // If ctpop is not supported, expand a power-of-2 comparison based on it.
2887
181
      if (C1 == 1 && 
!isOperationLegalOrCustom(ISD::CTPOP, CTVT)112
&&
2888
181
          
(53
Cond == ISD::SETEQ53
||
Cond == ISD::SETNE49
)) {
2889
31
        // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
2890
31
        // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
2891
31
        SDValue Zero = DAG.getConstant(0, dl, CTVT);
2892
31
        SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
2893
31
        ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true);
2894
31
        SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
2895
31
        SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
2896
31
        SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
2897
31
        SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
2898
31
        unsigned LogicOpcode = Cond == ISD::SETEQ ? 
ISD::AND4
:
ISD::OR27
;
2899
31
        return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
2900
31
      }
2901
955k
    }
2902
955k
2903
955k
    // (zext x) == C --> x == (trunc C)
2904
955k
    // (sext x) == C --> x == (trunc C)
2905
955k
    if ((Cond == ISD::SETEQ || 
Cond == ISD::SETNE469k
) &&
2906
955k
        
DCI.isBeforeLegalize()739k
&&
N0->hasOneUse()443k
) {
2907
240k
      unsigned MinBits = N0.getValueSizeInBits();
2908
240k
      SDValue PreExt;
2909
240k
      bool Signed = false;
2910
240k
      if (N0->getOpcode() == ISD::ZERO_EXTEND) {
2911
931
        // ZExt
2912
931
        MinBits = N0->getOperand(0).getValueSizeInBits();
2913
931
        PreExt = N0->getOperand(0);
2914
239k
      } else if (N0->getOpcode() == ISD::AND) {
2915
28.6k
        // DAGCombine turns costly ZExts into ANDs
2916
28.6k
        if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
2917
23.1k
          if ((C->getAPIntValue()+1).isPowerOf2()) {
2918
15.6k
            MinBits = C->getAPIntValue().countTrailingOnes();
2919
15.6k
            PreExt = N0->getOperand(0);
2920
15.6k
          }
2921
211k
      } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
2922
95
        // SExt
2923
95
        MinBits = N0->getOperand(0).getValueSizeInBits();
2924
95
        PreExt = N0->getOperand(0);
2925
95
        Signed = true;
2926
211k
      } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
2927
58.1k
        // ZEXTLOAD / SEXTLOAD
2928
58.1k
        if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
2929
29
          MinBits = LN0->getMemoryVT().getSizeInBits();
2930
29
          PreExt = N0;
2931
58.1k
        } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
2932
49
          Signed = true;
2933
49
          MinBits = LN0->getMemoryVT().getSizeInBits();
2934
49
          PreExt = N0;
2935
49
        }
2936
58.1k
      }
2937
240k
2938
240k
      // Figure out how many bits we need to preserve this constant.
2939
240k
      unsigned ReqdBits = Signed ?
2940
144
        C1.getBitWidth() - C1.getNumSignBits() + 1 :
2941
240k
        
C1.getActiveBits()240k
;
2942
240k
2943
240k
      // Make sure we're not losing bits from the constant.
2944
240k
      if (MinBits > 0 &&
2945
240k
          MinBits < C1.getBitWidth() &&
2946
240k
          
MinBits >= ReqdBits16.7k
) {
2947
16.6k
        EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
2948
16.6k
        if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
2949
641
          // Will get folded away.
2950
641
          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
2951
641
          if (MinBits == 1 && 
C1 == 1148
)
2952
0
            // Invert the condition.
2953
0
            return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
2954
0
                                Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
2955
641
          SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
2956
641
          return DAG.getSetCC(dl, VT, Trunc, C, Cond);
2957
641
        }
2958
16.0k
2959
16.0k
        // If truncating the setcc operands is not desirable, we can still
2960
16.0k
        // simplify the expression in some cases:
2961
16.0k
        // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
2962
16.0k
        // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
2963
16.0k
        // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
2964
16.0k
        // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
2965
16.0k
        // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
2966
16.0k
        // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
2967
16.0k
        SDValue TopSetCC = N0->getOperand(0);
2968
16.0k
        unsigned N0Opc = N0->getOpcode();
2969
16.0k
        bool SExt = (N0Opc == ISD::SIGN_EXTEND);
2970
16.0k
        if (TopSetCC.getValueType() == MVT::i1 && 
VT == MVT::i1249
&&
2971
16.0k
            
TopSetCC.getOpcode() == ISD::SETCC248
&&
2972
16.0k
            
(234
N0Opc == ISD::ZERO_EXTEND234
||
N0Opc == ISD::SIGN_EXTEND10
) &&
2973
16.0k
            
(234
isConstFalseVal(N1C)234
||
2974
234
             
isExtendedTrueVal(N1C, N0->getValueType(0), SExt)9
)) {
2975
231
2976
231
          bool Inverse = (N1C->isNullValue() && 
Cond == ISD::SETEQ225
) ||
2977
231
                         
(112
!N1C->isNullValue()112
&&
Cond == ISD::SETNE6
);
2978
231
2979
231
          if (!Inverse)
2980
107
            return TopSetCC;
2981
124
2982
124
          ISD::CondCode InvCond = ISD::getSetCCInverse(
2983
124
              cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
2984
124
              TopSetCC.getOperand(0).getValueType().isInteger());
2985
124
          return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
2986
124
                                      TopSetCC.getOperand(1),
2987
124
                                      InvCond);
2988
124
        }
2989
16.0k
      }
2990
240k
    }
2991
954k
2992
954k
    // If the LHS is '(and load, const)', the RHS is 0, the test is for
2993
954k
    // equality or unsigned, and all 1 bits of the const are in the same
2994
954k
    // partial word, see if we can shorten the load.
2995
954k
    if (DCI.isBeforeLegalize() &&
2996
954k
        
!ISD::isSignedIntSetCC(Cond)602k
&&
2997
954k
        
N0.getOpcode() == ISD::AND516k
&&
C1 == 035.9k
&&
2998
954k
        
N0.getNode()->hasOneUse()33.8k
&&
2999
954k
        
isa<LoadSDNode>(N0.getOperand(0))27.8k
&&
3000
954k
        
N0.getOperand(0).getNode()->hasOneUse()12.6k
&&
3001
954k
        
isa<ConstantSDNode>(N0.getOperand(1))10.9k
) {
3002
10.4k
      LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
3003
10.4k
      APInt bestMask;
3004
10.4k
      unsigned bestWidth = 0, bestOffset = 0;
3005
10.4k
      if (!Lod->isVolatile() && Lod->isUnindexed()) {
3006
10.4k
        unsigned origWidth = N0.getValueSizeInBits();
3007
10.4k
        unsigned maskWidth = origWidth;
3008
10.4k
        // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
3009
10.4k
        // 8 bits, but have to be careful...
3010
10.4k
        if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
3011
0
          origWidth = Lod->getMemoryVT().getSizeInBits();
3012
10.4k
        const APInt &Mask = N0.getConstantOperandAPInt(1);
3013
15.5k
        for (unsigned width = origWidth / 2; width>=8; 
width /= 25.02k
) {
3014
5.02k
          APInt newMask = APInt::getLowBitsSet(maskWidth, width);
3015
5.45k
          for (unsigned offset=0; offset<origWidth/width; 
offset++427
) {
3016
5.39k
            if (Mask.isSubsetOf(newMask)) {
3017
4.97k
              if (DAG.getDataLayout().isLittleEndian())
3018
4.95k
                bestOffset = (uint64_t)offset * (width/8);
3019
18
              else
3020
18
                bestOffset = (origWidth/width - offset - 1) * (width/8);
3021
4.97k
              bestMask = Mask.lshr(offset * (width/8) * 8);
3022
4.97k
              bestWidth = width;
3023
4.97k
              break;
3024
4.97k
            }
3025
427
            newMask <<= width;
3026
427
          }
3027
5.02k
        }
3028
10.4k
      }
3029
10.4k
      if (bestWidth) {
3030
2.54k
        EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
3031
2.54k
        if (newVT.isRound() &&
3032
2.54k
            
shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)2.47k
) {
3033
2.46k
          EVT PtrType = Lod->getOperand(1).getValueType();
3034
2.46k
          SDValue Ptr = Lod->getBasePtr();
3035
2.46k
          if (bestOffset != 0)
3036
94
            Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
3037
94
                              DAG.getConstant(bestOffset, dl, PtrType));
3038
2.46k
          unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
3039
2.46k
          SDValue NewLoad = DAG.getLoad(
3040
2.46k
              newVT, dl, Lod->getChain(), Ptr,
3041
2.46k
              Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
3042
2.46k
          return DAG.getSetCC(dl, VT,
3043
2.46k
                              DAG.getNode(ISD::AND, dl, newVT, NewLoad,
3044
2.46k
                                      DAG.getConstant(bestMask.trunc(bestWidth),
3045
2.46k
                                                      dl, newVT)),
3046
2.46k
                              DAG.getConstant(0LL, dl, newVT), Cond);
3047
2.46k
        }
3048
952k
      }
3049
10.4k
    }
3050
952k
3051
952k
    // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
3052
952k
    if (N0.getOpcode() == ISD::ZERO_EXTEND) {
3053
2.32k
      unsigned InSize = N0.getOperand(0).getValueSizeInBits();
3054
2.32k
3055
2.32k
      // If the comparison constant has bits in the upper part, the
3056
2.32k
      // zero-extended value could never match.
3057
2.32k
      if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
3058
2.32k
                                              C1.getBitWidth() - InSize))) {
3059
27
        switch (Cond) {
3060
27
        case ISD::SETUGT:
3061
9
        case ISD::SETUGE:
3062
9
        case ISD::SETEQ:
3063
9
          return DAG.getConstant(0, dl, VT);
3064
12
        case ISD::SETULT:
3065
12
        case ISD::SETULE:
3066
12
        case ISD::SETNE:
3067
12
          return DAG.getConstant(1, dl, VT);
3068
12
        case ISD::SETGT:
3069
1
        case ISD::SETGE:
3070
1
          // True if the sign bit of C1 is set.
3071
1
          return DAG.getConstant(C1.isNegative(), dl, VT);
3072
5
        case ISD::SETLT:
3073
5
        case ISD::SETLE:
3074
5
          // True if the sign bit of C1 isn't set.
3075
5
          return DAG.getConstant(C1.isNonNegative(), dl, VT);
3076
5
        default:
3077
0
          break;
3078
2.29k
        }
3079
2.29k
      }
3080
2.29k
3081
2.29k
      // Otherwise, we can perform the comparison with the low bits.
3082
2.29k
      switch (Cond) {
3083
2.29k
      case ISD::SETEQ:
3084
2.26k
      case ISD::SETNE:
3085
2.26k
      case ISD::SETUGT:
3086
2.26k
      case ISD::SETUGE:
3087
2.26k
      case ISD::SETULT:
3088
2.26k
      case ISD::SETULE: {
3089
2.26k
        EVT newVT = N0.getOperand(0).getValueType();
3090
2.26k
        if (DCI.isBeforeLegalizeOps() ||
3091
2.26k
            
(42
isOperationLegal(ISD::SETCC, newVT)42
&&
3092
2.22k
             
isCondCodeLegal(Cond, newVT.getSimpleVT())2
)) {
3093
2.22k
          EVT NewSetCCVT =
3094
2.22k
              getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
3095
2.22k
          SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
3096
2.22k
3097
2.22k
          SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
3098
2.22k
                                          NewConst, Cond);
3099
2.22k
          return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
3100
2.22k
        }
3101
40
        break;
3102
40
      }
3103
40
      default:
3104
34
        break; // todo, be more careful with signed comparisons
3105
949k
      }
3106
949k
    } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3107
949k
               
(1.03k
Cond == ISD::SETEQ1.03k
||
Cond == ISD::SETNE1.00k
)) {
3108
44
      EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
3109
44
      unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
3110
44
      EVT ExtDstTy = N0.getValueType();
3111
44
      unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
3112
44
3113
44
      // If the constant doesn't fit into the number of bits for the source of
3114
44
      // the sign extension, it is impossible for both sides to be equal.
3115
44
      if (C1.getMinSignedBits() > ExtSrcTyBits)
3116
2
        return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
3117
42
3118
42
      SDValue ZextOp;
3119
42
      EVT Op0Ty = N0.getOperand(0).getValueType();
3120
42
      if (Op0Ty == ExtSrcTy) {
3121
0
        ZextOp = N0.getOperand(0);
3122
42
      } else {
3123
42
        APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
3124
42
        ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
3125
42
                             DAG.getConstant(Imm, dl, Op0Ty));
3126
42
      }
3127
42
      if (!DCI.isCalledByLegalizer())
3128
42
        DCI.AddToWorklist(ZextOp.getNode());
3129
42
      // Otherwise, make this a use of a zext.
3130
42
      return DAG.getSetCC(dl, VT, ZextOp,
3131
42
                          DAG.getConstant(C1 & APInt::getLowBitsSet(
3132
42
                                                              ExtDstTyBits,
3133
42
                                                              ExtSrcTyBits),
3134
42
                                          dl, ExtDstTy),
3135
42
                          Cond);
3136
949k
    } else if ((N1C->isNullValue() || 
N1C->isOne()275k
) &&
3137
949k
                
(756k
Cond == ISD::SETEQ756k
||
Cond == ISD::SETNE308k
)) {
3138
669k
      // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
3139
669k
      if (N0.getOpcode() == ISD::SETCC &&
3140
669k
          
isTypeLegal(VT)298
&&
VT.bitsLE(N0.getValueType())288
) {
3141
284
        bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
3142
284
        if (TrueWhenTrue)
3143
180
          return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
3144
104
        // Invert the condition.
3145
104
        ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
3146
104
        CC = ISD::getSetCCInverse(CC,
3147
104
                                  N0.getOperand(0).getValueType().isInteger());
3148
104
        if (DCI.isBeforeLegalizeOps() ||
3149
104
            
isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType())30
)
3150
104
          return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
3151
669k
      }
3152
669k
3153
669k
      if ((N0.getOpcode() == ISD::XOR ||
3154
669k
           
(668k
N0.getOpcode() == ISD::AND668k
&&
3155
668k
            
N0.getOperand(0).getOpcode() == ISD::XOR67.9k
&&
3156
668k
            
N0.getOperand(1) == N0.getOperand(0).getOperand(1)2.54k
)) &&
3157
669k
          
isa<ConstantSDNode>(N0.getOperand(1))528
&&
3158
669k
          
cast<ConstantSDNode>(N0.getOperand(1))->isOne()208
) {
3159
23
        // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
3160
23
        // can only do this if the top bits are known zero.
3161
23
        unsigned BitWidth = N0.getValueSizeInBits();
3162
23
        if (DAG.MaskedValueIsZero(N0,
3163
23
                                  APInt::getHighBitsSet(BitWidth,
3164
23
                                                        BitWidth-1))) {
3165
15
          // Okay, get the un-inverted input value.
3166
15
          SDValue Val;
3167
15
          if (N0.getOpcode() == ISD::XOR) {
3168
15
            Val = N0.getOperand(0);
3169
15
          } else {
3170
0
            assert(N0.getOpcode() == ISD::AND &&
3171
0
                    N0.getOperand(0).getOpcode() == ISD::XOR);
3172
0
            // ((X^1)&1)^1 -> X & 1
3173
0
            Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
3174
0
                              N0.getOperand(0).getOperand(0),
3175
0
                              N0.getOperand(1));
3176
0
          }
3177
15
3178
15
          return DAG.getSetCC(dl, VT, Val, N1,
3179
15
                              Cond == ISD::SETEQ ? 
ISD::SETNE11
:
ISD::SETEQ4
);
3180
15
        }
3181
669k
      } else if (N1C->isOne() &&
3182
669k
                 
(35.7k
VT == MVT::i135.7k
||
3183
35.7k
                  getBooleanContents(N0->getValueType(0)) ==
3184
35.7k
                      ZeroOrOneBooleanContent)) {
3185
35.7k
        SDValue Op0 = N0;
3186
35.7k
        if (Op0.getOpcode() == ISD::TRUNCATE)
3187
8.39k
          Op0 = Op0.getOperand(0);
3188
35.7k
3189
35.7k
        if ((Op0.getOpcode() == ISD::XOR) &&
3190
35.7k
            
Op0.getOperand(0).getOpcode() == ISD::SETCC162
&&
3191
35.7k
            
Op0.getOperand(1).getOpcode() == ISD::SETCC156
) {
3192
156
          // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
3193
156
          Cond = (Cond == ISD::SETEQ) ? 
ISD::SETNE0
: ISD::SETEQ;
3194
156
          return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
3195
156
                              Cond);
3196
156
        }
3197
35.6k
        if (Op0.getOpcode() == ISD::AND &&
3198
35.6k
            
isa<ConstantSDNode>(Op0.getOperand(1))5.73k
&&
3199
35.6k
            
cast<ConstantSDNode>(Op0.getOperand(1))->isOne()5.65k
) {
3200
5.58k
          // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
3201
5.58k
          if (Op0.getValueType().bitsGT(VT))
3202
86
            Op0 = DAG.getNode(ISD::AND, dl, VT,
3203
86
                          DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
3204
86
                          DAG.getConstant(1, dl, VT));
3205
5.49k
          else if (Op0.getValueType().bitsLT(VT))
3206
0
            Op0 = DAG.getNode(ISD::AND, dl, VT,
3207
0
                        DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
3208
0
                        DAG.getConstant(1, dl, VT));
3209
5.58k
3210
5.58k
          return DAG.getSetCC(dl, VT, Op0,
3211
5.58k
                              DAG.getConstant(0, dl, Op0.getValueType()),
3212
5.58k
                              Cond == ISD::SETEQ ? 
ISD::SETNE44
:
ISD::SETEQ5.54k
);
3213
5.58k
        }
3214
30.0k
        if (Op0.getOpcode() == ISD::AssertZext &&
3215
30.0k
            
cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i18.16k
)
3216
7.83k
          return DAG.getSetCC(dl, VT, Op0,
3217
7.83k
                              DAG.getConstant(0, dl, Op0.getValueType()),
3218
7.83k
                              Cond == ISD::SETEQ ? 
ISD::SETNE15
:
ISD::SETEQ7.82k
);
3219
936k
      }
3220
669k
    }
3221
936k
3222
936k
    // Given:
3223
936k
    //   icmp eq/ne (urem %x, %y), 0
3224
936k
    // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
3225
936k
    //   icmp eq/ne %x, 0
3226
936k
    if (N0.getOpcode() == ISD::UREM && 
N1C->isNullValue()301
&&
3227
936k
        
(252
Cond == ISD::SETEQ252
||
Cond == ISD::SETNE93
)) {
3228
251
      KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
3229
251
      KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
3230
251
      if (XKnown.countMaxPopulation() == 1 && 
YKnown.countMinPopulation() >= 25
)
3231
3
        return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
3232
936k
    }
3233
936k
3234
936k
    if (SDValue V =
3235
107
            optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
3236
107
      return V;
3237
1.21M
  }
3238
1.21M
3239
1.21M
  // These simplifications apply to splat vectors as well.
3240
1.21M
  // TODO: Handle more splat vector cases.
3241
1.21M
  if (auto *N1C = isConstOrConstSplat(N1)) {
3242
961k
    const APInt &C1 = N1C->getAPIntValue();
3243
961k
3244
961k
    APInt MinVal, MaxVal;
3245
961k
    unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
3246
961k
    if (ISD::isSignedIntSetCC(Cond)) {
3247
113k
      MinVal = APInt::getSignedMinValue(OperandBitSize);
3248
113k
      MaxVal = APInt::getSignedMaxValue(OperandBitSize);
3249
848k
    } else {
3250
848k
      MinVal = APInt::getMinValue(OperandBitSize);
3251
848k
      MaxVal = APInt::getMaxValue(OperandBitSize);
3252
848k
    }
3253
961k
3254
961k
    // Canonicalize GE/LE comparisons to use GT/LT comparisons.
3255
961k
    if (Cond == ISD::SETGE || 
Cond == ISD::SETUGE949k
) {
3256
22.6k
      // X >= MIN --> true
3257
22.6k
      if (C1 == MinVal)
3258
218
        return DAG.getBoolConstant(true, dl, VT, OpVT);
3259
22.4k
3260
22.4k
      if (!VT.isVector()) { // TODO: Support this for vectors.
3261
22.2k
        // X >= C0 --> X > (C0 - 1)
3262
22.2k
        APInt C = C1 - 1;
3263
22.2k
        ISD::CondCode NewCC = (Cond == ISD::SETGE) ? 
ISD::SETGT11.6k
:
ISD::SETUGT10.6k
;
3264
22.2k
        if ((DCI.isBeforeLegalizeOps() ||
3265
22.2k
             
isCondCodeLegal(NewCC, VT.getSimpleVT())293
) &&
3266
22.2k
            (!N1C->isOpaque() || 
(85
C.getBitWidth() <= 6485
&&
3267
22.1k
                                  
isLegalICmpImmediate(C.getSExtValue())85
))) {
3268
22.1k
          return DAG.getSetCC(dl, VT, N0,
3269
22.1k
                              DAG.getConstant(C, dl, N1.getValueType()),
3270
22.1k
                              NewCC);
3271
22.1k
        }
3272
939k
      }
3273
22.4k
    }
3274
939k
3275
939k
    if (Cond == ISD::SETLE || 
Cond == ISD::SETULE921k
) {
3276
26.6k
      // X <= MAX --> true
3277
26.6k
      if (C1 == MaxVal)
3278
7
        return DAG.getBoolConstant(true, dl, VT, OpVT);
3279
26.5k
3280
26.5k
      // X <= C0 --> X < (C0 + 1)
3281
26.5k
      if (!VT.isVector()) { // TODO: Support this for vectors.
3282
26.4k
        APInt C = C1 + 1;
3283
26.4k
        ISD::CondCode NewCC = (Cond == ISD::SETLE) ? 
ISD::SETLT17.4k
:
ISD::SETULT8.97k
;
3284
26.4k
        if ((DCI.isBeforeLegalizeOps() ||
3285
26.4k
             
isCondCodeLegal(NewCC, VT.getSimpleVT())117
) &&
3286
26.4k
            (!N1C->isOpaque() || 
(574
C.getBitWidth() <= 64574
&&
3287
25.9k
                                  
isLegalICmpImmediate(C.getSExtValue())574
))) {
3288
25.9k
          return DAG.getSetCC(dl, VT, N0,
3289
25.9k
                              DAG.getConstant(C, dl, N1.getValueType()),
3290
25.9k
                              NewCC);
3291
25.9k
        }
3292
913k
      }
3293
26.5k
    }
3294
913k
3295
913k
    if (Cond == ISD::SETLT || 
Cond == ISD::SETULT861k
) {
3296
95.2k
      if (C1 == MinVal)
3297
195
        return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
3298
95.0k
3299
95.0k
      // TODO: Support this for vectors after legalize ops.
3300
95.0k
      if (!VT.isVector() || 
DCI.isBeforeLegalizeOps()2.65k
) {
3301
94.9k
        // Canonicalize setlt X, Max --> setne X, Max
3302
94.9k
        if (C1 == MaxVal)
3303
46
          return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3304
94.9k
3305
94.9k
        // If we have setult X, 1, turn it into seteq X, 0
3306
94.9k
        if (C1 == MinVal+1)
3307
1.36k
          return DAG.getSetCC(dl, VT, N0,
3308
1.36k
                              DAG.getConstant(MinVal, dl, N0.getValueType()),
3309
1.36k
                              ISD::SETEQ);
3310
911k
      }
3311
95.0k
    }
3312
911k
3313
911k
    if (Cond == ISD::SETGT || 
Cond == ISD::SETUGT879k
) {
3314
73.4k
      if (C1 == MaxVal)
3315
221
        return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
3316
73.2k
3317
73.2k
      // TODO: Support this for vectors after legalize ops.
3318
73.2k
      if (!VT.isVector() || 
DCI.isBeforeLegalizeOps()404
) {
3319
73.2k
        // Canonicalize setgt X, Min --> setne X, Min
3320
73.2k
        if (C1 == MinVal)
3321
1.86k
          return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3322
71.3k
3323
71.3k
        // If we have setugt X, Max-1, turn it into seteq X, Max
3324
71.3k
        if (C1 == MaxVal-1)
3325
18
          return DAG.getSetCC(dl, VT, N0,
3326
18
                              DAG.getConstant(MaxVal, dl, N0.getValueType()),
3327
18
                              ISD::SETEQ);
3328
909k
      }
3329
73.2k
    }
3330
909k
3331
909k
    // If we have "setcc X, C0", check to see if we can shrink the immediate
3332
909k
    // by changing cc.
3333
909k
    // TODO: Support this for vectors after legalize ops.
3334
909k
    if (!VT.isVector() || 
DCI.isBeforeLegalizeOps()25.5k
) {
3335
905k
      // SETUGT X, SINTMAX  -> SETLT X, 0
3336
905k
      if (Cond == ISD::SETUGT &&
3337
905k
          
C1 == APInt::getSignedMaxValue(OperandBitSize)38.8k
)
3338
1
        return DAG.getSetCC(dl, VT, N0,
3339
1
                            DAG.getConstant(0, dl, N1.getValueType()),
3340
1
                            ISD::SETLT);
3341
905k
3342
905k
      // SETULT X, SINTMIN  -> SETGT X, -1
3343
905k
      if (Cond == ISD::SETULT &&
3344
905k
          
C1 == APInt::getSignedMinValue(OperandBitSize)42.1k
) {
3345
26
        SDValue ConstMinusOne =
3346
26
            DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
3347
26
                            N1.getValueType());
3348
26
        return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
3349
26
      }
3350
1.16M
    }
3351
909k
  }
3352
1.16M
3353
1.16M
  // Back to non-vector simplifications.
3354
1.16M
  // TODO: Can we do these for vector splats?
3355
1.16M
  if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3356
883k
    const APInt &C1 = N1C->getAPIntValue();
3357
883k
3358
883k
    // Fold bit comparisons when we can.
3359
883k
    if ((Cond == ISD::SETEQ || 
Cond == ISD::SETNE401k
) &&
3360
883k
        
(721k
VT == N0.getValueType()721k
||
3361
721k
         
(396k
isTypeLegal(VT)396k
&&
VT.bitsLE(N0.getValueType())207k
)) &&
3362
883k
        
N0.getOpcode() == ISD::AND525k
) {
3363
44.6k
      auto &DL = DAG.getDataLayout();
3364
44.6k
      if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3365
38.7k
        EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
3366
38.7k
                                       !DCI.isBeforeLegalize());
3367
38.7k
        if (Cond == ISD::SETNE && 
C1 == 06.91k
) {// (X & 8) != 0 --> (X & 8) >> 3
3368
6.12k
          // Perform the xform if the AND RHS is a single bit.
3369
6.12k
          if (AndRHS->getAPIntValue().isPowerOf2()) {
3370
2.95k
            return DAG.getNode(ISD::TRUNCATE, dl, VT,
3371
2.95k
                              DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
3372
2.95k
                   DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl,
3373
2.95k
                                   ShiftTy)));
3374
2.95k
          }
3375
32.6k
        } else if (Cond == ISD::SETEQ && 
C1 == AndRHS->getAPIntValue()31.8k
) {
3376
158
          // (X & 8) == 8  -->  (X & 8) >> 3
3377
158
          // Perform the xform if C1 is a single bit.
3378
158
          if (C1.isPowerOf2()) {
3379
0
            return DAG.getNode(ISD::TRUNCATE, dl, VT,
3380
0
                               DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
3381
0
                                      DAG.getConstant(C1.logBase2(), dl,
3382
0
                                                      ShiftTy)));
3383
0
          }
3384
880k
        }
3385
38.7k
      }
3386
44.6k
    }
3387
880k
3388
880k
    if (C1.getMinSignedBits() <= 64 &&
3389
880k
        
!isLegalICmpImmediate(C1.getSExtValue())880k
) {
3390
18.4k
      // (X & -256) == 256 -> (X >> 8) == 1
3391
18.4k
      if ((Cond == ISD::SETEQ || 
Cond == ISD::SETNE14.3k
) &&
3392
18.4k
          
N0.getOpcode() == ISD::AND4.80k
&&
N0.hasOneUse()313
) {
3393
173
        if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3394
173
          const APInt &AndRHSC = AndRHS->getAPIntValue();
3395
173
          if ((-AndRHSC).isPowerOf2() && 
(AndRHSC & C1) == C183
) {
3396
83
            unsigned ShiftBits = AndRHSC.countTrailingZeros();
3397
83
            auto &DL = DAG.getDataLayout();
3398
83
            EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
3399
83
                                           !DCI.isBeforeLegalize());
3400
83
            EVT CmpTy = N0.getValueType();
3401
83
            SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
3402
83
                                        DAG.getConstant(ShiftBits, dl,
3403
83
                                                        ShiftTy));
3404
83
            SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy);
3405
83
            return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
3406
83
          }
3407
18.2k
        }
3408
18.2k
      } else if (Cond == ISD::SETULT || 
Cond == ISD::SETUGE13.1k
||
3409
18.2k
                 
Cond == ISD::SETULE13.0k
||
Cond == ISD::SETUGT12.5k
) {
3410
8.44k
        bool AdjOne = (Cond == ISD::SETULE || 
Cond == ISD::SETUGT7.91k
);
3411
8.44k
        // X <  0x100000000 -> (X >> 32) <  1
3412
8.44k
        // X >= 0x100000000 -> (X >> 32) >= 1
3413
8.44k
        // X <= 0x0ffffffff -> (X >> 32) <  1
3414
8.44k
        // X >  0x0ffffffff -> (X >> 32) >= 1
3415
8.44k
        unsigned ShiftBits;
3416
8.44k
        APInt NewC = C1;
3417
8.44k
        ISD::CondCode NewCond = Cond;
3418
8.44k
        if (AdjOne) {
3419
3.24k
          ShiftBits = C1.countTrailingOnes();
3420
3.24k
          NewC = NewC + 1;
3421
3.24k
          NewCond = (Cond == ISD::SETULE) ? 
ISD::SETULT534
:
ISD::SETUGE2.70k
;
3422
5.20k
        } else {
3423
5.20k
          ShiftBits = C1.countTrailingZeros();
3424
5.20k
        }
3425
8.44k
        NewC.lshrInPlace(ShiftBits);
3426
8.44k
        if (ShiftBits && 
NewC.getMinSignedBits() <= 643.58k
&&
3427
8.44k
          
isLegalICmpImmediate(NewC.getSExtValue())3.57k
) {
3428
2.98k
          auto &DL = DAG.getDataLayout();
3429
2.98k
          EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
3430
2.98k
                                         !DCI.isBeforeLegalize());
3431
2.98k
          EVT CmpTy = N0.getValueType();
3432
2.98k
          SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
3433
2.98k
                                      DAG.getConstant(ShiftBits, dl, ShiftTy));
3434
2.98k
          SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy);
3435
2.98k
          return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
3436
2.98k
        }
3437
1.15M
      }
3438
18.4k
    }
3439
880k
  }
3440
1.15M
3441
1.15M
  if (!isa<ConstantFPSDNode>(N0) && 
isa<ConstantFPSDNode>(N1)1.15M
) {
3442
11.5k
    auto *CFP = cast<ConstantFPSDNode>(N1);
3443
11.5k
    assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
3444
11.5k
3445
11.5k
    // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
3446
11.5k
    // constant if knowing that the operand is non-nan is enough.  We prefer to
3447
11.5k
    // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
3448
11.5k
    // materialize 0.0.
3449
11.5k
    if (Cond == ISD::SETO || 
Cond == ISD::SETUO11.0k
)
3450
1.15k
      return DAG.getSetCC(dl, VT, N0, N0, Cond);
3451
10.3k
3452
10.3k
    // setcc (fneg x), C -> setcc swap(pred) x, -C
3453
10.3k
    if (N0.getOpcode() == ISD::FNEG) {
3454
35
      ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
3455
35
      if (DCI.isBeforeLegalizeOps() ||
3456
35
          
isCondCodeLegal(SwapCond, N0.getSimpleValueType())1
) {
3457
35
        SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
3458
35
        return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
3459
35
      }
3460
10.3k
    }
3461
10.3k
3462
10.3k
    // If the condition is not legal, see if we can find an equivalent one
3463
10.3k
    // which is legal.
3464
10.3k
    if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
3465
899
      // If the comparison was an awkward floating-point == or != and one of
3466
899
      // the comparison operands is infinity or negative infinity, convert the
3467
899
      // condition to a less-awkward <= or >=.
3468
899
      if (CFP->getValueAPF().isInfinity()) {
3469
496
        if (CFP->getValueAPF().isNegative()) {
3470
4
          if (Cond == ISD::SETOEQ &&
3471
4
              
isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())2
)
3472
2
            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
3473
2
          if (Cond == ISD::SETUEQ &&
3474
2
              
isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())0
)
3475
0
            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
3476
2
          if (Cond == ISD::SETUNE &&
3477
2
              isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
3478
2
            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
3479
0
          if (Cond == ISD::SETONE &&
3480
0
              isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
3481
0
            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
3482
492
        } else {
3483
492
          if (Cond == ISD::SETOEQ &&
3484
492
              
isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())387
)
3485
387
            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
3486
105
          if (Cond == ISD::SETUEQ &&
3487
105
              
isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())0
)
3488
0
            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
3489
105
          if (Cond == ISD::SETUNE &&
3490
105
              isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
3491
105
            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
3492
0
          if (Cond == ISD::SETONE &&
3493
0
              isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
3494
0
            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
3495
1.15M
        }
3496
496
      }
3497
899
    }
3498
10.3k
  }
3499
1.15M
3500
1.15M
  if (N0 == N1) {
3501
2.28k
    // The sext(setcc()) => setcc() optimization relies on the appropriate
3502
2.28k
    // constant being emitted.
3503
2.28k
    assert(!N0.getValueType().isInteger() &&
3504
2.28k
           "Integer types should be handled by FoldSetCC");
3505
2.28k
3506
2.28k
    bool EqTrue = ISD::isTrueWhenEqual(Cond);
3507
2.28k
    unsigned UOF = ISD::getUnorderedFlavor(Cond);
3508
2.28k
    if (UOF == 2) // FP operators that are undefined on NaNs.
3509
0
      return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
3510
2.28k
    if (UOF == unsigned(EqTrue))
3511
18
      return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
3512
2.26k
    // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
3513
2.26k
    // if it is not already.
3514
2.26k
    ISD::CondCode NewCond = UOF == 0 ? 
ISD::SETO911
:
ISD::SETUO1.35k
;
3515
2.26k
    if (NewCond != Cond &&
3516
2.26k
        
(368
DCI.isBeforeLegalizeOps()368
||
3517
368
                            
isCondCodeLegal(NewCond, N0.getSimpleValueType())344
))
3518
24
      return DAG.getSetCC(dl, VT, N0, N1, NewCond);
3519
1.15M
  }
3520
1.15M
3521
1.15M
  if ((Cond == ISD::SETEQ || 
Cond == ISD::SETNE619k
) &&
3522
1.15M
      
N0.getValueType().isInteger()818k
) {
3523
818k
    if (N0.getOpcode() == ISD::ADD || 
N0.getOpcode() == ISD::SUB756k
||
3524
818k
        
N0.getOpcode() == ISD::XOR754k
) {
3525
64.8k
      // Simplify (X+Y) == (X+Z) -->  Y == Z
3526
64.8k
      if (N0.getOpcode() == N1.getOpcode()) {
3527
120
        if (N0.getOperand(0) == N1.getOperand(0))
3528
0
          return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
3529
120
        if (N0.getOperand(1) == N1.getOperand(1))
3530
9
          return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
3531
111
        if (isCommutativeBinOp(N0.getOpcode())) {
3532
87
          // If X op Y == Y op X, try other combinations.
3533
87
          if (N0.getOperand(0) == N1.getOperand(1))
3534
0
            return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
3535
0
                                Cond);
3536
87
          if (N0.getOperand(1) == N1.getOperand(0))
3537
0
            return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
3538
0
                                Cond);
3539
64.7k
        }
3540
111
      }
3541
64.7k
3542
64.7k
      // If RHS is a legal immediate value for a compare instruction, we need
3543
64.7k
      // to be careful about increasing register pressure needlessly.
3544
64.7k
      bool LegalRHSImm = false;
3545
64.7k
3546
64.7k
      if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
3547
58.5k
        if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3548
55.1k
          // Turn (X+C1) == C2 --> X == C2-C1
3549
55.1k
          if (N0.getOpcode() == ISD::ADD && 
N0.getNode()->hasOneUse()54.9k
) {
3550
70
            return DAG.getSetCC(dl, VT, N0.getOperand(0),
3551
70
                                DAG.getConstant(RHSC->getAPIntValue()-
3552
70
                                                LHSR->getAPIntValue(),
3553
70
                                dl, N0.getValueType()), Cond);
3554
70
          }
3555
55.1k
3556
55.1k
          // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
3557
55.1k
          if (N0.getOpcode() == ISD::XOR)
3558
195
            // If we know that all of the inverted bits are zero, don't bother
3559
195
            // performing the inversion.
3560
195
            if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
3561
15
              return
3562
15
                DAG.getSetCC(dl, VT, N0.getOperand(0),
3563
15
                             DAG.getConstant(LHSR->getAPIntValue() ^
3564
15
                                               RHSC->getAPIntValue(),
3565
15
                                             dl, N0.getValueType()),
3566
15
                             Cond);
3567
58.4k
        }
3568
58.4k
3569
58.4k
        // Turn (C1-X) == C2 --> X == C1-C2
3570
58.4k
        if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
3571
41
          if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
3572
16
            return
3573
16
              DAG.getSetCC(dl, VT, N0.getOperand(1),
3574
16
                           DAG.getConstant(SUBC->getAPIntValue() -
3575
16
                                             RHSC->getAPIntValue(),
3576
16
                                           dl, N0.getValueType()),
3577
16
                           Cond);
3578
16
          }
3579
58.4k
        }
3580
58.4k
3581
58.4k
        // Could RHSC fold directly into a compare?
3582
58.4k
        if (RHSC->getValueType(0).getSizeInBits() <= 64)
3583
58.4k
          LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
3584
58.4k
      }
3585
64.7k
3586
64.7k
      // (X+Y) == X --> Y == 0 and similar folds.
3587
64.7k
      // Don't do this if X is an immediate that can fold into a cmp
3588
64.7k
      // instruction and X+Y has other uses. It could be an induction variable
3589
64.7k
      // chain, and the transform would increase register pressure.
3590
64.7k
      
if (64.6k
!LegalRHSImm64.6k
||
N0.hasOneUse()58.1k
)
3591
7.96k
        if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
3592
48
          return V;
3593
818k
    }
3594
818k
3595
818k
    if (N1.getOpcode() == ISD::ADD || 
N1.getOpcode() == ISD::SUB810k
||
3596
818k
        
N1.getOpcode() == ISD::XOR809k
)
3597
10.3k
      if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
3598
4
        return V;
3599
818k
3600
818k
    if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
3601
411
      return V;
3602
1.15M
  }
3603
1.15M
3604
1.15M
  // Fold remainder of division by a constant.
3605
1.15M
  if (N0.getOpcode() == ISD::UREM && 
N0.hasOneUse()896
&&
3606
1.15M
      
(349
Cond == ISD::SETEQ349
||
Cond == ISD::SETNE92
)) {
3607
346
    AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3608
346
3609
346
    // When division is cheap or optimizing for minimum size,
3610
346
    // fall through to DIVREM creation by skipping this fold.
3611
346
    if (!isIntDivCheap(VT, Attr) && 
!Attr.hasFnAttribute(Attribute::MinSize)341
)
3612
337
      if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
3613
124
        return Folded;
3614
1.15M
  }
3615
1.15M
3616
1.15M
  // Fold away ALL boolean setcc's.
3617
1.15M
  if (N0.getValueType().getScalarType() == MVT::i1 && 
foldBooleans10.3k
) {
3618
176
    SDValue Temp;
3619
176
    switch (Cond) {
3620
176
    
default: 0
llvm_unreachable0
("Unknown integer setcc!");
3621
176
    case ISD::SETEQ:  // X == Y  -> ~(X^Y)
3622
64
      Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
3623
64
      N0 = DAG.getNOT(dl, Temp, OpVT);
3624
64
      if (!DCI.isCalledByLegalizer())
3625
64
        DCI.AddToWorklist(Temp.getNode());
3626
64
      break;
3627
176
    case ISD::SETNE:  // X != Y   -->  (X^Y)
3628
96
      N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
3629
96
      break;
3630
176
    case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
3631
6
    case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
3632
6
      Temp = DAG.getNOT(dl, N0, OpVT);
3633
6
      N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
3634
6
      if (!DCI.isCalledByLegalizer())
3635
6
        DCI.AddToWorklist(Temp.getNode());
3636
6
      break;
3637
10
    case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
3638
10
    case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
3639
10
      Temp = DAG.getNOT(dl, N1, OpVT);
3640
10
      N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
3641
10
      if (!DCI.isCalledByLegalizer())
3642
10
        DCI.AddToWorklist(Temp.getNode());
3643
10
      break;
3644
10
    case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
3645
0
    case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
3646
0
      Temp = DAG.getNOT(dl, N0, OpVT);
3647
0
      N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
3648
0
      if (!DCI.isCalledByLegalizer())
3649
0
        DCI.AddToWorklist(Temp.getNode());
3650
0
      break;
3651
0
    case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
3652
0
    case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
3653
0
      Temp = DAG.getNOT(dl, N1, OpVT);
3654
0
      N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
3655
0
      break;
3656
176
    }
3657
176
    if (VT.getScalarType() != MVT::i1) {
3658
4
      if (!DCI.isCalledByLegalizer())
3659
4
        DCI.AddToWorklist(N0.getNode());
3660
4
      // FIXME: If running after legalize, we probably can't do this.
3661
4
      ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
3662
4
      N0 = DAG.getNode(ExtendCode, dl, VT, N0);
3663
4
    }
3664
176
    return N0;
3665
176
  }
3666
1.15M
3667
1.15M
  // Could not fold it.
3668
1.15M
  return SDValue();
3669
1.15M
}
3670
3671
/// Returns true (and the GlobalValue and the offset) if the node is a
3672
/// GlobalAddress + offset.
3673
bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
3674
68.0M
                                    int64_t &Offset) const {
3675
68.0M
3676
68.0M
  SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
3677
68.0M
3678
68.0M
  if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
3679
408k
    GA = GASD->getGlobal();
3680
408k
    Offset += GASD->getOffset();
3681
408k
    return true;
3682
408k
  }
3683
67.6M
3684
67.6M
  if (N->getOpcode() == ISD::ADD) {
3685
29.7M
    SDValue N1 = N->getOperand(0);
3686
29.7M
    SDValue N2 = N->getOperand(1);
3687
29.7M
    if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
3688
164k
      if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
3689
140k
        Offset += V->getSExtValue();
3690
140k
        return true;
3691
140k
      }
3692
29.5M
    } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
3693
7.55k
      if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
3694
0
        Offset += V->getSExtValue();
3695
0
        return true;
3696
0
      }
3697
67.4M
    }
3698
29.7M
  }
3699
67.4M
3700
67.4M
  return false;
3701
67.4M
}
3702
3703
SDValue TargetLowering::PerformDAGCombine(SDNode *N,
3704
58.3k
                                          DAGCombinerInfo &DCI) const {
3705
58.3k
  // Default implementation: no optimization.
3706
58.3k
  return SDValue();
3707
58.3k
}
3708
3709
//===----------------------------------------------------------------------===//
3710
//  Inline Assembler Implementation Methods
3711
//===----------------------------------------------------------------------===//
3712
3713
TargetLowering::ConstraintType
3714
556k
TargetLowering::getConstraintType(StringRef Constraint) const {
3715
556k
  unsigned S = Constraint.size();
3716
556k
3717
556k
  if (S == 1) {
3718
16.0k
    switch (Constraint[0]) {
3719
16.0k
    
default: break2.61k
;
3720
16.0k
    
case 'r': return C_RegisterClass9.65k
;
3721
16.0k
    case 'm': // memory
3722
1.66k
    case 'o': // offsetable
3723
1.66k
    case 'V': // not offsetable
3724
1.66k
      return C_Memory;
3725
2.13k
    case 'i': // Simple Integer or Relocatable Constant
3726
2.13k
    case 'n': // Simple Integer
3727
2.13k
    case 'E': // Floating Point Constant
3728
2.13k
    case 'F': // Floating Point Constant
3729
2.13k
    case 's': // Relocatable Constant
3730
2.13k
    case 'p': // Address.
3731
2.13k
    case 'X': // Allow ANY value.
3732
2.13k
    case 'I': // Target registers.
3733
2.13k
    case 'J':
3734
2.13k
    case 'K':
3735
2.13k
    case 'L':
3736
2.13k
    case 'M':
3737
2.13k
    case 'N':
3738
2.13k
    case 'O':
3739
2.13k
    case 'P':
3740
2.13k
    case '<':
3741
2.13k
    case '>':
3742
2.13k
      return C_Other;
3743
542k
    }
3744
542k
  }
3745
542k
3746
542k
  if (S > 1 && 
Constraint[0] == '{'539k
&&
Constraint[S - 1] == '}'539k
) {
3747
539k
    if (S == 8 && 
Constraint.substr(1, 6) == "memory"39.4k
) // "{memory}"
3748
39.2k
      return C_Memory;
3749
500k
    return C_Register;
3750
500k
  }
3751
2.87k
  return C_Unknown;
3752
2.87k
}
3753
3754
/// Try to replace an X constraint, which matches anything, with another that
3755
/// has more specific requirements based on the type of the corresponding
3756
/// operand.
3757
133
const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
3758
133
  if (ConstraintVT.isInteger())
3759
94
    return "r";
3760
39
  if (ConstraintVT.isFloatingPoint())
3761
36
    return "f"; // works for many targets
3762
3
  return nullptr;
3763
3
}
3764
3765
SDValue TargetLowering::LowerAsmOutputForConstraint(
3766
    SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
3767
0
    SelectionDAG &DAG) const {
3768
0
  return SDValue();
3769
0
}
3770
3771
/// Lower the specified operand into the Ops vector.
3772
/// If it is invalid, don't add anything to Ops.
3773
void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3774
                                                  std::string &Constraint,
3775
                                                  std::vector<SDValue> &Ops,
3776
399
                                                  SelectionDAG &DAG) const {
3777
399
3778
399
  if (Constraint.length() > 1) 
return0
;
3779
399
3780
399
  char ConstraintLetter = Constraint[0];
3781
399
  switch (ConstraintLetter) {
3782
399
  
default: break54
;
3783
399
  case 'X':     // Allows any operand; labels (basic block) use this.
3784
34
    if (Op.getOpcode() == ISD::BasicBlock ||
3785
34
        
Op.getOpcode() == ISD::TargetBlockAddress32
) {
3786
12
      Ops.push_back(Op);
3787
12
      return;
3788
12
    }
3789
22
    LLVM_FALLTHROUGH;
3790
333
  case 'i':    // Simple Integer or Relocatable Constant
3791
333
  case 'n':    // Simple Integer
3792
333
  case 's': {  // Relocatable Constant
3793
333
3794
333
    GlobalAddressSDNode *GA;
3795
333
    ConstantSDNode *C;
3796
333
    BlockAddressSDNode *BA;
3797
333
    uint64_t Offset = 0;
3798
333
3799
333
    // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
3800
333
    // etc., since getelementpointer is variadic. We can't use
3801
333
    // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
3802
333
    // while in this case the GA may be furthest from the root node which is
3803
333
    // likely an ISD::ADD.
3804
348
    while (1) {
3805
348
      if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && 
ConstraintLetter != 'n'70
) {
3806
69
        Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
3807
69
                                                 GA->getValueType(0),
3808
69
                                                 Offset + GA->getOffset()));
3809
69
        return;
3810
279
      } else if ((C = dyn_cast<ConstantSDNode>(Op)) &&
3811
279
                 
ConstraintLetter != 's'182
) {
3812
182
        // gcc prints these as sign extended.  Sign extend value to 64 bits
3813
182
        // now; without this it would get ZExt'd later in
3814
182
        // ScheduleDAGSDNodes::EmitNode, which is very generic.
3815
182
        bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
3816
182
        BooleanContent BCont = getBooleanContents(MVT::i64);
3817
182
        ISD::NodeType ExtOpc = IsBool ? 
getExtendForContent(BCont)9
3818
182
                                      : 
ISD::SIGN_EXTEND173
;
3819
182
        int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? 
C->getZExtValue()9
3820
182
                                                    : 
C->getSExtValue()173
;
3821
182
        Ops.push_back(DAG.getTargetConstant(Offset + ExtVal,
3822
182
                                            SDLoc(C), MVT::i64));
3823
182
        return;
3824
182
      } else 
if (97
(BA = dyn_cast<BlockAddressSDNode>(Op))97
&&
3825
97
                 
ConstraintLetter != 'n'1
) {
3826
1
        Ops.push_back(DAG.getTargetBlockAddress(
3827
1
            BA->getBlockAddress(), BA->getValueType(0),
3828
1
            Offset + BA->getOffset(), BA->getTargetFlags()));
3829
1
        return;
3830
96
      } else {
3831
96
        const unsigned OpCode = Op.getOpcode();
3832
96
        if (OpCode == ISD::ADD || 
OpCode == ISD::SUB80
) {
3833
20
          if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
3834
0
            Op = Op.getOperand(1);
3835
20
          // Subtraction is not commutative.
3836
20
          else if (OpCode == ISD::ADD &&
3837
20
                   
(C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))16
)
3838
15
            Op = Op.getOperand(0);
3839
5
          else
3840
5
            return;
3841
15
          Offset += (OpCode == ISD::ADD ? 1 : 
-10
) * C->getSExtValue();
3842
15
          continue;
3843
15
        }
3844
96
      }
3845
76
      return;
3846
76
    }
3847
333
    
break0
;
3848
333
  }
3849
399
  }
3850
399
}
3851
3852
std::pair<unsigned, const TargetRegisterClass *>
3853
TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
3854
                                             StringRef Constraint,
3855
175k
                                             MVT VT) const {
3856
175k
  if (Constraint.empty() || 
Constraint[0] != '{'175k
)
3857
598
    return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
3858
175k
  assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
3859
175k
3860
175k
  // Remove the braces from around the name.
3861
175k
  StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
3862
175k
3863
175k
  std::pair<unsigned, const TargetRegisterClass *> R =
3864
175k
      std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
3865
175k
3866
175k
  // Figure out which register class contains this reg.
3867
20.0M
  for (const TargetRegisterClass *RC : RI->regclasses()) {
3868
20.0M
    // If none of the value types for this register class are valid, we
3869
20.0M
    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
3870
20.0M
    if (!isLegalRC(*RI, *RC))
3871
4.44M
      continue;
3872
15.5M
3873
15.5M
    for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
3874
156M
         I != E; 
++I141M
) {
3875
141M
      if (RegName.equals_lower(RI->getRegAsmName(*I))) {
3876
590k
        std::pair<unsigned, const TargetRegisterClass *> S =
3877
590k
            std::make_pair(*I, RC);
3878
590k
3879
590k
        // If this register class has the requested value type, return it,
3880
590k
        // otherwise keep searching and return the first class found
3881
590k
        // if no other is found which explicitly has the requested type.
3882
590k
        if (RI->isTypeLegalForClass(*RC, VT))
3883
1.11k
          return S;
3884
588k
        if (!R.second)
3885
108k
          R = S;
3886
588k
      }
3887
141M
    }
3888
15.5M
  }
3889
175k
3890
175k
  
return R173k
;
3891
175k
}
3892
3893
//===----------------------------------------------------------------------===//
3894
// Constraint Selection.
3895
3896
/// Return true of this is an input operand that is a matching constraint like
3897
/// "4".
3898
200k
bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
3899
200k
  assert(!ConstraintCode.empty() && "No known constraint!");
3900
200k
  return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
3901
200k
}
3902
3903
/// If this is an input matching constraint, this method returns the output
3904
/// operand it matches.
3905
1.30k
unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
3906
1.30k
  assert(!ConstraintCode.empty() && "No known constraint!");
3907
1.30k
  return atoi(ConstraintCode.c_str());
3908
1.30k
}
3909
3910
/// Split up the constraint string from the inline assembly value into the
3911
/// specific constraints and their prefixes, and also tie in the associated
3912
/// operand values.
3913
/// If this returns an empty vector, and if the constraint string itself
3914
/// isn't empty, there was an error parsing.
3915
TargetLowering::AsmOperandInfoVector
3916
TargetLowering::ParseConstraints(const DataLayout &DL,
3917
                                 const TargetRegisterInfo *TRI,
3918
68.0k
                                 ImmutableCallSite CS) const {
3919
68.0k
  /// Information about all of the constraints.
3920
68.0k
  AsmOperandInfoVector ConstraintOperands;
3921
68.0k
  const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
3922
68.0k
  unsigned maCount = 0; // Largest number of multiple alternative constraints.
3923
68.0k
3924
68.0k
  // Do a prepass over the constraints, canonicalizing them, and building up the
3925
68.0k
  // ConstraintOperands list.
3926
68.0k
  unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
3927
68.0k
  unsigned ResNo = 0; // ResNo - The result number of the next output.
3928
68.0k
3929
335k
  for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
3930
335k
    ConstraintOperands.emplace_back(std::move(CI));
3931
335k
    AsmOperandInfo &OpInfo = ConstraintOperands.back();
3932
335k
3933
335k
    // Update multiple alternative constraint count.
3934
335k
    if (OpInfo.multipleAlternatives.size() > maCount)
3935
417
      maCount = OpInfo.multipleAlternatives.size();
3936
335k
3937
335k
    OpInfo.ConstraintVT = MVT::Other;
3938
335k
3939
335k
    // Compute the value type for each operand.
3940
335k
    switch (OpInfo.Type) {
3941
335k
    case InlineAsm::isOutput:
3942
14.6k
      // Indirect outputs just consume an argument.
3943
14.6k
      if (OpInfo.isIndirect) {
3944
878
        OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
3945
878
        break;
3946
878
      }
3947
13.7k
3948
13.7k
      // The return value of the call is this value.  As such, there is no
3949
13.7k
      // corresponding argument.
3950
13.7k
      assert(!CS.getType()->isVoidTy() &&
3951
13.7k
             "Bad inline asm!");
3952
13.7k
      if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
3953
2.45k
        OpInfo.ConstraintVT =
3954
2.45k
            getSimpleValueType(DL, STy->getElementType(ResNo));
3955
11.3k
      } else {
3956
11.3k
        assert(ResNo == 0 && "Asm only has one result!");
3957
11.3k
        OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
3958
11.3k
      }
3959
13.7k
      ++ResNo;
3960
13.7k
      break;
3961
17.6k
    case InlineAsm::isInput:
3962
17.6k
      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
3963
17.6k
      break;
3964
303k
    case InlineAsm::isClobber:
3965
303k
      // Nothing to do.
3966
303k
      break;
3967
335k
    }
3968
335k
3969
335k
    if (OpInfo.CallOperandVal) {
3970
18.5k
      llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
3971
18.5k
      if (OpInfo.isIndirect) {
3972
1.45k
        llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
3973
1.45k
        if (!PtrTy)
3974
0
          report_fatal_error("Indirect operand for inline asm not a pointer!");
3975
1.45k
        OpTy = PtrTy->getElementType();
3976
1.45k
      }
3977
18.5k
3978
18.5k
      // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
3979
18.5k
      if (StructType *STy = dyn_cast<StructType>(OpTy))
3980
53
        if (STy->getNumElements() == 1)
3981
26
          OpTy = STy->getElementType(0);
3982
18.5k
3983
18.5k
      // If OpTy is not a single value, it may be a struct/union that we
3984
18.5k
      // can tile with integers.
3985
18.5k
      if (!OpTy->isSingleValueType() && 
OpTy->isSized()84
) {
3986
65
        unsigned BitSize = DL.getTypeSizeInBits(OpTy);
3987
65
        switch (BitSize) {
3988
65
        
default: break23
;
3989
65
        case 1:
3990
42
        case 8:
3991
42
        case 16:
3992
42
        case 32:
3993
42
        case 64:
3994
42
        case 128:
3995
42
          OpInfo.ConstraintVT =
3996
42
              MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
3997
42
          break;
3998
18.4k
        }
3999
18.4k
      } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
4000
6.20k
        unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
4001
6.20k
        OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
4002
12.2k
      } else {
4003
12.2k
        OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
4004
12.2k
      }
4005
18.5k
    }
4006
335k
  }
4007
68.0k
4008
68.0k
  // If we have multiple alternative constraints, select the best alternative.
4009
68.0k
  if (!ConstraintOperands.empty()) {
4010
66.1k
    if (maCount) {
4011
417
      unsigned bestMAIndex = 0;
4012
417
      int bestWeight = -1;
4013
417
      // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
4014
417
      int weight = -1;
4015
417
      unsigned maIndex;
4016
417
      // Compute the sums of the weights for each alternative, keeping track
4017
417
      // of the best (highest weight) one so far.
4018
1.30k
      for (maIndex = 0; maIndex < maCount; 
++maIndex888
) {
4019
888
        int weightSum = 0;
4020
888
        for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4021
3.21k
             cIndex != eIndex; 
++cIndex2.32k
) {
4022
2.50k
          AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4023
2.50k
          if (OpInfo.Type == InlineAsm::isClobber)
4024
825
            continue;
4025
1.67k
4026
1.67k
          // If this is an output operand with a matching input operand,
4027
1.67k
          // look up the matching input. If their types mismatch, e.g. one
4028
1.67k
          // is an integer, the other is floating point, or their sizes are
4029
1.67k
          // different, flag it as an maCantMatch.
4030
1.67k
          if (OpInfo.hasMatchingInput()) {
4031
0
            AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4032
0
            if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4033
0
              if ((OpInfo.ConstraintVT.isInteger() !=
4034
0
                   Input.ConstraintVT.isInteger()) ||
4035
0
                  (OpInfo.ConstraintVT.getSizeInBits() !=
4036
0
                   Input.ConstraintVT.getSizeInBits())) {
4037
0
                weightSum = -1; // Can't match.
4038
0
                break;
4039
0
              }
4040
1.67k
            }
4041
0
          }
4042
1.67k
          weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
4043
1.67k
          if (weight == -1) {
4044
177
            weightSum = -1;
4045
177
            break;
4046
177
          }
4047
1.50k
          weightSum += weight;
4048
1.50k
        }
4049
888
        // Update best.
4050
888
        if (weightSum > bestWeight) {
4051
570
          bestWeight = weightSum;
4052
570
          bestMAIndex = maIndex;
4053
570
        }
4054
888
      }
4055
417
4056
417
      // Now select chosen alternative in each constraint.
4057
417
      for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4058
1.73k
           cIndex != eIndex; 
++cIndex1.32k
) {
4059
1.32k
        AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
4060
1.32k
        if (cInfo.Type == InlineAsm::isClobber)
4061
489
          continue;
4062
831
        cInfo.selectAlternative(bestMAIndex);
4063
831
      }
4064
417
    }
4065
66.1k
  }
4066
68.0k
4067
68.0k
  // Check and hook up tied operands, choose constraint code to use.
4068
68.0k
  for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4069
403k
       cIndex != eIndex; 
++cIndex335k
) {
4070
335k
    AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4071
335k
4072
335k
    // If this is an output operand with a matching input operand, look up the
4073
335k
    // matching input. If their types mismatch, e.g. one is an integer, the
4074
335k
    // other is floating point, or their sizes are different, flag it as an
4075
335k
    // error.
4076
335k
    if (OpInfo.hasMatchingInput()) {
4077
1.35k
      AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4078
1.35k
4079
1.35k
      if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4080
39
        std::pair<unsigned, const TargetRegisterClass *> MatchRC =
4081
39
            getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
4082
39
                                         OpInfo.ConstraintVT);
4083
39
        std::pair<unsigned, const TargetRegisterClass *> InputRC =
4084
39
            getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
4085
39
                                         Input.ConstraintVT);
4086
39
        if ((OpInfo.ConstraintVT.isInteger() !=
4087
39
             Input.ConstraintVT.isInteger()) ||
4088
39
            (MatchRC.second != InputRC.second)) {
4089
0
          report_fatal_error("Unsupported asm: input constraint"
4090
0
                             " with a matching output constraint of"
4091
0
                             " incompatible type!");
4092
0
        }
4093
39
      }
4094
1.35k
    }
4095
335k
  }
4096
68.0k
4097
68.0k
  return ConstraintOperands;
4098
68.0k
}
4099
4100
/// Return an integer indicating how general CT is.
4101
147k
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
4102
147k
  switch (CT) {
4103
147k
  case TargetLowering::C_Other:
4104
1.91k
  case TargetLowering::C_Unknown:
4105
1.91k
    return 0;
4106
144k
  case TargetLowering::C_Register:
4107
144k
    return 1;
4108
1.91k
  case TargetLowering::C_RegisterClass:
4109
736
    return 2;
4110
1.91k
  case TargetLowering::C_Memory:
4111
411
    return 3;
4112
0
  }
4113
0
  llvm_unreachable("Invalid constraint type");
4114
0
}
4115
4116
/// Examine constraint type and operand type and determine a weight value.
4117
/// This object must already have been set up with the operand type
4118
/// and the current alternative constraint selected.
4119
TargetLowering::ConstraintWeight
4120
  TargetLowering::getMultipleConstraintMatchWeight(
4121
1.67k
    AsmOperandInfo &info, int maIndex) const {
4122
1.67k
  InlineAsm::ConstraintCodeVector *rCodes;
4123
1.67k
  if (maIndex >= (int)info.multipleAlternatives.size())
4124
3
    rCodes = &info.Codes;
4125
1.67k
  else
4126
1.67k
    rCodes = &info.multipleAlternatives[maIndex].Codes;
4127
1.67k
  ConstraintWeight BestWeight = CW_Invalid;
4128
1.67k
4129
1.67k
  // Loop over the options, keeping track of the most general one.
4130
3.56k
  for (unsigned i = 0, e = rCodes->size(); i != e; 
++i1.89k
) {
4131
1.89k
    ConstraintWeight weight =
4132
1.89k
      getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
4133
1.89k
    if (weight > BestWeight)
4134
1.53k
      BestWeight = weight;
4135
1.89k
  }
4136
1.67k
4137
1.67k
  return BestWeight;
4138
1.67k
}
4139
4140
/// Examine constraint type and operand type and determine a weight value.
4141
/// This object must already have been set up with the operand type
4142
/// and the current alternative constraint selected.
4143
TargetLowering::ConstraintWeight
4144
  TargetLowering::getSingleConstraintMatchWeight(
4145
1.26k
    AsmOperandInfo &info, const char *constraint) const {
4146
1.26k
  ConstraintWeight weight = CW_Invalid;
4147
1.26k
  Value *CallOperandVal = info.CallOperandVal;
4148
1.26k
    // If we don't have a value, we can't do a match,
4149
1.26k
    // but allow it at the lowest weight.
4150
1.26k
  if (!CallOperandVal)
4151
90
    return CW_Default;
4152
1.17k
  // Look at the constraint type.
4153
1.17k
  switch (*constraint) {
4154
1.17k
    case 'i': // immediate integer.
4155
111
    case 'n': // immediate integer with a known value.
4156
111
      if (isa<ConstantInt>(CallOperandVal))
4157
69
        weight = CW_Constant;
4158
111
      break;
4159
111
    case 's': // non-explicit intregal immediate.
4160
0
      if (isa<GlobalValue>(CallOperandVal))
4161
0
        weight = CW_Constant;
4162
0
      break;
4163
111
    case 'E': // immediate float if host format.
4164
0
    case 'F': // immediate float.
4165
0
      if (isa<ConstantFP>(CallOperandVal))
4166
0
        weight = CW_Constant;
4167
0
      break;
4168
342
    case '<': // memory operand with autodecrement.
4169
342
    case '>': // memory operand with autoincrement.
4170
342
    case 'm': // memory operand.
4171
342
    case 'o': // offsettable memory operand
4172
342
    case 'V': // non-offsettable memory operand
4173
342
      weight = CW_Memory;
4174
342
      break;
4175
576
    case 'r': // general register.
4176
576
    case 'g': // general register, memory operand or immediate integer.
4177
576
              // note: Clang converts "g" to "imr".
4178
576
      if (CallOperandVal->getType()->isIntegerTy())
4179
420
        weight = CW_Register;
4180
576
      break;
4181
576
    case 'X': // any operand.
4182
147
  default:
4183
147
    weight = CW_Default;
4184
147
    break;
4185
1.17k
  }
4186
1.17k
  return weight;
4187
1.17k
}
4188
4189
/// If there are multiple different constraints that we could pick for this
4190
/// operand (e.g. "imr") try to pick the 'best' one.
4191
/// This is somewhat tricky: constraints fall into four classes:
4192
///    Other         -> immediates and magic values
4193
///    Register      -> one specific register
4194
///    RegisterClass -> a group of regs
4195
///    Memory        -> memory
4196
/// Ideally, we would pick the most specific constraint possible: if we have
4197
/// something that fits into a register, we would pick it.  The problem here
4198
/// is that if we have something that could either be in a register or in
4199
/// memory that use of the register could cause selection of *other*
4200
/// operands to fail: they might only succeed if we pick memory.  Because of
4201
/// this the heuristic we use is:
4202
///
4203
///  1) If there is an 'other' constraint, and if the operand is valid for
4204
///     that constraint, use it.  This makes us take advantage of 'i'
4205
///     constraints when available.
4206
///  2) Otherwise, pick the most general constraint present.  This prefers
4207
///     'm' over 'r', for example.
4208
///
4209
static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
4210
                             const TargetLowering &TLI,
4211
10.1k
                             SDValue Op, SelectionDAG *DAG) {
4212
10.1k
  assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
4213
10.1k
  unsigned BestIdx = 0;
4214
10.1k
  TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
4215
10.1k
  int BestGenerality = -1;
4216
10.1k
4217
10.1k
  // Loop over the options, keeping track of the most general one.
4218
157k
  for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; 
++i147k
) {
4219
147k
    TargetLowering::ConstraintType CType =
4220
147k
      TLI.getConstraintType(OpInfo.Codes[i]);
4221
147k
4222
147k
    // If this is an 'other' constraint, see if the operand is valid for it.
4223
147k
    // For example, on X86 we might have an 'rI' constraint.  If the operand
4224
147k
    // is an integer in the range [0..31] we want to use I (saving a load
4225
147k
    // of a register), otherwise we must use 'r'.
4226
147k
    if (CType == TargetLowering::C_Other && 
Op.getNode()751
) {
4227
232
      assert(OpInfo.Codes[i].size() == 1 &&
4228
232
             "Unhandled multi-letter 'other' constraint");
4229
232
      std::vector<SDValue> ResultOps;
4230
232
      TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
4231
232
                                       ResultOps, *DAG);
4232
232
      if (!ResultOps.empty()) {
4233
72
        BestType = CType;
4234
72
        BestIdx = i;
4235
72
        break;
4236
72
      }
4237
147k
    }
4238
147k
4239
147k
    // Things with matching constraints can only be registers, per gcc
4240
147k
    // documentation.  This mainly affects "g" constraints.
4241
147k
    if (CType == TargetLowering::C_Memory && 
OpInfo.hasMatchingInput()444
)
4242
33
      continue;
4243
147k
4244
147k
    // This constraint letter is more general than the previous one, use it.
4245
147k
    int Generality = getConstraintGenerality(CType);
4246
147k
    if (Generality > BestGenerality) {
4247
10.8k
      BestType = CType;
4248
10.8k
      BestIdx = i;
4249
10.8k
      BestGenerality = Generality;
4250
10.8k
    }
4251
147k
  }
4252
10.1k
4253
10.1k
  OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
4254
10.1k
  OpInfo.ConstraintType = BestType;
4255
10.1k
}
4256
4257
/// Determines the constraint code and constraint type to use for the specific
4258
/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
4259
void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
4260
                                            SDValue Op,
4261
427k
                                            SelectionDAG *DAG) const {
4262
427k
  assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
4263
427k
4264
427k
  // Single-letter constraints ('r') are very common.
4265
427k
  if (OpInfo.Codes.size() == 1) {
4266
417k
    OpInfo.ConstraintCode = OpInfo.Codes[0];
4267
417k
    OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4268
417k
  } else {
4269
10.1k
    ChooseConstraint(OpInfo, *this, Op, DAG);
4270
10.1k
  }
4271
427k
4272
427k
  // 'X' matches anything.
4273
427k
  if (OpInfo.ConstraintCode == "X" && 
OpInfo.CallOperandVal286
) {
4274
286
    // Labels and constants are handled elsewhere ('X' is the only thing
4275
286
    // that matches labels).  For Functions, the type here is the type of
4276
286
    // the result, which is not what we want to look at; leave them alone.
4277
286
    Value *v = OpInfo.CallOperandVal;
4278
286
    if (isa<BasicBlock>(v) || 
isa<ConstantInt>(v)280
||
isa<Function>(v)232
) {
4279
72
      OpInfo.CallOperandVal = v;
4280
72
      return;
4281
72
    }
4282
214
4283
214
    if (Op.getNode() && 
Op.getOpcode() == ISD::TargetBlockAddress74
)
4284
10
      return;
4285
204
4286
204
    // Otherwise, try to resolve it to something we know about by looking at
4287
204
    // the actual operand type.
4288
204
    if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
4289
201
      OpInfo.ConstraintCode = Repl;
4290
201
      OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4291
201
    }
4292
204
  }
4293
427k
}
4294
4295
/// Given an exact SDIV by a constant, create a multiplication
4296
/// with the multiplicative inverse of the constant.
4297
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
4298
                              const SDLoc &dl, SelectionDAG &DAG,
4299
813
                              SmallVectorImpl<SDNode *> &Created) {
4300
813
  SDValue Op0 = N->getOperand(0);
4301
813
  SDValue Op1 = N->getOperand(1);
4302
813
  EVT VT = N->getValueType(0);
4303
813
  EVT SVT = VT.getScalarType();
4304
813
  EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
4305
813
  EVT ShSVT = ShVT.getScalarType();
4306
813
4307
813
  bool UseSRA = false;
4308
813
  SmallVector<SDValue, 16> Shifts, Factors;
4309
813
4310
849
  auto BuildSDIVPattern = [&](ConstantSDNode *C) {
4311
849
    if (C->isNullValue())
4312
0
      return false;
4313
849
    APInt Divisor = C->getAPIntValue();
4314
849
    unsigned Shift = Divisor.countTrailingZeros();
4315
849
    if (Shift) {
4316
823
      Divisor.ashrInPlace(Shift);
4317
823
      UseSRA = true;
4318
823
    }
4319
849
    // Calculate the multiplicative inverse, using Newton's method.
4320
849
    APInt t;
4321
849
    APInt Factor = Divisor;
4322
4.97k
    while ((t = Divisor * Factor) != 1)
4323
4.12k
      Factor *= APInt(Divisor.getBitWidth(), 2) - t;
4324
849
    Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
4325
849
    Factors.push_back(DAG.getConstant(Factor, dl, SVT));
4326
849
    return true;
4327
849
  };
4328
813
4329
813
  // Collect all magic values from the build vector.
4330
813
  if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
4331
0
    return SDValue();
4332
813
4333
813
  SDValue Shift, Factor;
4334
813
  if (VT.isVector()) {
4335
12
    Shift = DAG.getBuildVector(ShVT, dl, Shifts);
4336
12
    Factor = DAG.getBuildVector(VT, dl, Factors);
4337
801
  } else {
4338
801
    Shift = Shifts[0];
4339
801
    Factor = Factors[0];
4340
801
  }
4341
813
4342
813
  SDValue Res = Op0;
4343
813
4344
813
  // Shift the value upfront if it is even, so the LSB is one.
4345
813
  if (UseSRA) {
4346
807
    // TODO: For UDIV use SRL instead of SRA.
4347
807
    SDNodeFlags Flags;
4348
807
    Flags.setExact(true);
4349
807
    Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
4350
807
    Created.push_back(Res.getNode());
4351
807
  }
4352
813
4353
813
  return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
4354
813
}
4355
4356
SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
4357
                              SelectionDAG &DAG,
4358
319
                              SmallVectorImpl<SDNode *> &Created) const {
4359
319
  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4360
319
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4361
319
  if (TLI.isIntDivCheap(N->getValueType(0), Attr))
4362
14
    return SDValue(N, 0); // Lower SDIV as SDIV
4363
305
  return SDValue();
4364
305
}
4365
4366
/// Given an ISD::SDIV node expressing a divide by constant,
4367
/// return a DAG expression to select that will generate the same value by
4368
/// multiplying by a magic number.
4369
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
4370
SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
4371
                                  bool IsAfterLegalization,
4372
1.86k
                                  SmallVectorImpl<SDNode *> &Created) const {
4373
1.86k
  SDLoc dl(N);
4374
1.86k
  EVT VT = N->getValueType(0);
4375
1.86k
  EVT SVT = VT.getScalarType();
4376
1.86k
  EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4377
1.86k
  EVT ShSVT = ShVT.getScalarType();
4378
1.86k
  unsigned EltBits = VT.getScalarSizeInBits();
4379
1.86k
4380
1.86k
  // Check to see if we can do this.
4381
1.86k
  // FIXME: We should be more aggressive here.
4382
1.86k
  if (!isTypeLegal(VT))
4383
132
    return SDValue();
4384
1.73k
4385
1.73k
  // If the sdiv has an 'exact' bit we can use a simpler lowering.
4386
1.73k
  if (N->getFlags().hasExact())
4387
813
    return BuildExactSDIV(*this, N, dl, DAG, Created);
4388
924
4389
924
  SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
4390
924
4391
3.79k
  auto BuildSDIVPattern = [&](ConstantSDNode *C) {
4392
3.79k
    if (C->isNullValue())
4393
0
      return false;
4394
3.79k
4395
3.79k
    const APInt &Divisor = C->getAPIntValue();
4396
3.79k
    APInt::ms magics = Divisor.magic();
4397
3.79k
    int NumeratorFactor = 0;
4398
3.79k
    int ShiftMask = -1;
4399
3.79k
4400
3.79k
    if (Divisor.isOneValue() || 
Divisor.isAllOnesValue()3.61k
) {
4401
269
      // If d is +1/-1, we just multiply the numerator by +1/-1.
4402
269
      NumeratorFactor = Divisor.getSExtValue();
4403
269
      magics.m = 0;
4404
269
      magics.s = 0;
4405
269
      ShiftMask = 0;
4406
3.52k
    } else if (Divisor.isStrictlyPositive() && 
magics.m.isNegative()3.41k
) {
4407
1.48k
      // If d > 0 and m < 0, add the numerator.
4408
1.48k
      NumeratorFactor = 1;
4409
2.04k
    } else if (Divisor.isNegative() && 
magics.m.isStrictlyPositive()115
) {
4410
104
      // If d < 0 and m > 0, subtract the numerator.
4411
104
      NumeratorFactor = -1;
4412
104
    }
4413
3.79k
4414
3.79k
    MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
4415
3.79k
    Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
4416
3.79k
    Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
4417
3.79k
    ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
4418
3.79k
    return true;
4419
3.79k
  };
4420
924
4421
924
  SDValue N0 = N->getOperand(0);
4422
924
  SDValue N1 = N->getOperand(1);
4423
924
4424
924
  // Collect the shifts / magic values from each element.
4425
924
  if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
4426
0
    return SDValue();
4427
924
4428
924
  SDValue MagicFactor, Factor, Shift, ShiftMask;
4429
924
  if (VT.isVector()) {
4430
392
    MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
4431
392
    Factor = DAG.getBuildVector(VT, dl, Factors);
4432
392
    Shift = DAG.getBuildVector(ShVT, dl, Shifts);
4433
392
    ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
4434
532
  } else {
4435
532
    MagicFactor = MagicFactors[0];
4436
532
    Factor = Factors[0];
4437
532
    Shift = Shifts[0];
4438
532
    ShiftMask = ShiftMasks[0];
4439
532
  }
4440
924
4441
924
  // Multiply the numerator (operand 0) by the magic value.
4442
924
  // FIXME: We should support doing a MUL in a wider type.
4443
924
  SDValue Q;
4444
924
  if (IsAfterLegalization ? 
isOperationLegal(ISD::MULHS, VT)81
4445
924
                          : 
isOperationLegalOrCustom(ISD::MULHS, VT)843
)
4446
541
    Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
4447
383
  else if (IsAfterLegalization ? 
isOperationLegal(ISD::SMUL_LOHI, VT)77
4448
383
                               : 
isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)306
) {
4449
339
    SDValue LoHi =
4450
339
        DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
4451
339
    Q = SDValue(LoHi.getNode(), 1);
4452
339
  } else
4453
44
    return SDValue(); // No mulhs or equivalent.
4454
880
  Created.push_back(Q.getNode());
4455
880
4456
880
  // (Optionally) Add/subtract the numerator using Factor.
4457
880
  Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
4458
880
  Created.push_back(Factor.getNode());
4459
880
  Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
4460
880
  Created.push_back(Q.getNode());
4461
880
4462
880
  // Shift right algebraic by shift value.
4463
880
  Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
4464
880
  Created.push_back(Q.getNode());
4465
880
4466
880
  // Extract the sign bit, mask it and add it to the quotient.
4467
880
  SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
4468
880
  SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
4469
880
  Created.push_back(T.getNode());
4470
880
  T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
4471
880
  Created.push_back(T.getNode());
4472
880
  return DAG.getNode(ISD::ADD, dl, VT, Q, T);
4473
880
}
4474
4475
/// Given an ISD::UDIV node expressing a divide by constant,
4476
/// return a DAG expression to select that will generate the same value by
4477
/// multiplying by a magic number.
4478
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
4479
SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
4480
                                  bool IsAfterLegalization,
4481
1.80k
                                  SmallVectorImpl<SDNode *> &Created) const {
4482
1.80k
  SDLoc dl(N);
4483
1.80k
  EVT VT = N->getValueType(0);
4484
1.80k
  EVT SVT = VT.getScalarType();
4485
1.80k
  EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4486
1.80k
  EVT ShSVT = ShVT.getScalarType();
4487
1.80k
  unsigned EltBits = VT.getScalarSizeInBits();
4488
1.80k
4489
1.80k
  // Check to see if we can do this.
4490
1.80k
  // FIXME: We should be more aggressive here.
4491
1.80k
  if (!isTypeLegal(VT))
4492
149
    return SDValue();
4493
1.65k
4494
1.65k
  bool UseNPQ = false;
4495
1.65k
  SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
4496
1.65k
4497
4.25k
  auto BuildUDIVPattern = [&](ConstantSDNode *C) {
4498
4.25k
    if (C->isNullValue())
4499
0
      return false;
4500
4.25k
    // FIXME: We should use a narrower constant when the upper
4501
4.25k
    // bits are known to be zero.
4502
4.25k
    APInt Divisor = C->getAPIntValue();
4503
4.25k
    APInt::mu magics = Divisor.magicu();
4504
4.25k
    unsigned PreShift = 0, PostShift = 0;
4505
4.25k
4506
4.25k
    // If the divisor is even, we can avoid using the expensive fixup by
4507
4.25k
    // shifting the divided value upfront.
4508
4.25k
    if (magics.a != 0 && 
!Divisor[0]2.07k
) {
4509
251
      PreShift = Divisor.countTrailingZeros();
4510
251
      // Get magic number for the shifted divisor.
4511
251
      magics = Divisor.lshr(PreShift).magicu(PreShift);
4512
251
      assert(magics.a == 0 && "Should use cheap fixup now");
4513
251
    }
4514
4.25k
4515
4.25k
    APInt Magic = magics.m;
4516
4.25k
4517
4.25k
    unsigned SelNPQ;
4518
4.25k
    if (magics.a == 0 || 
Divisor.isOneValue()1.82k
) {
4519
2.55k
      assert(magics.s < Divisor.getBitWidth() &&
4520
2.55k
             "We shouldn't generate an undefined shift!");
4521
2.55k
      PostShift = magics.s;
4522
2.55k
      SelNPQ = false;
4523
2.55k
    } else {
4524
1.69k
      PostShift = magics.s - 1;
4525
1.69k
      SelNPQ = true;
4526
1.69k
    }
4527
4.25k
4528
4.25k
    PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
4529
4.25k
    MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
4530
4.25k
    NPQFactors.push_back(
4531
4.25k
        DAG.getConstant(SelNPQ ? 
APInt::getOneBitSet(EltBits, EltBits - 1)1.69k
4532
4.25k
                               : 
APInt::getNullValue(EltBits)2.55k
,
4533
4.25k
                        dl, SVT));
4534
4.25k
    PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
4535
4.25k
    UseNPQ |= SelNPQ;
4536
4.25k
    return true;
4537
4.25k
  };
4538
1.65k
4539
1.65k
  SDValue N0 = N->getOperand(0);
4540
1.65k
  SDValue N1 = N->getOperand(1);
4541
1.65k
4542
1.65k
  // Collect the shifts/magic values from each element.
4543
1.65k
  if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
4544
0
    return SDValue();
4545
1.65k
4546
1.65k
  SDValue PreShift, PostShift, MagicFactor, NPQFactor;
4547
1.65k
  if (VT.isVector()) {
4548
304
    PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
4549
304
    MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
4550
304
    NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
4551
304
    PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
4552
1.35k
  } else {
4553
1.35k
    PreShift = PreShifts[0];
4554
1.35k
    MagicFactor = MagicFactors[0];
4555
1.35k
    PostShift = PostShifts[0];
4556
1.35k
  }
4557
1.65k
4558
1.65k
  SDValue Q = N0;
4559
1.65k
  Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
4560
1.65k
  Created.push_back(Q.getNode());
4561
1.65k
4562
1.65k
  // FIXME: We should support doing a MUL in a wider type.
4563
1.77k
  auto GetMULHU = [&](SDValue X, SDValue Y) {
4564
1.77k
    if (IsAfterLegalization ? 
isOperationLegal(ISD::MULHU, VT)113
4565
1.77k
                            : 
isOperationLegalOrCustom(ISD::MULHU, VT)1.66k
)
4566
761
      return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
4567
1.01k
    if (IsAfterLegalization ? 
isOperationLegal(ISD::UMUL_LOHI, VT)93
4568
1.01k
                            : 
isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)919
) {
4569
948
      SDValue LoHi =
4570
948
          DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
4571
948
      return SDValue(LoHi.getNode(), 1);
4572
948
    }
4573
64
    return SDValue(); // No mulhu or equivalent
4574
64
  };
4575
1.65k
4576
1.65k
  // Multiply the numerator (operand 0) by the magic value.
4577
1.65k
  Q = GetMULHU(Q, MagicFactor);
4578
1.65k
  if (!Q)
4579
64
    return SDValue();
4580
1.59k
4581
1.59k
  Created.push_back(Q.getNode());
4582
1.59k
4583
1.59k
  if (UseNPQ) {
4584
277
    SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
4585
277
    Created.push_back(NPQ.getNode());
4586
277
4587
277
    // For vectors we might have a mix of non-NPQ/NPQ paths, so use
4588
277
    // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
4589
277
    if (VT.isVector())
4590
115
      NPQ = GetMULHU(NPQ, NPQFactor);
4591
162
    else
4592
162
      NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
4593
277
4594
277
    Created.push_back(NPQ.getNode());
4595
277
4596
277
    Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
4597
277
    Created.push_back(Q.getNode());
4598
277
  }
4599
1.59k
4600
1.59k
  Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
4601
1.59k
  Created.push_back(Q.getNode());
4602
1.59k
4603
1.59k
  SDValue One = DAG.getConstant(1, dl, VT);
4604
1.59k
  SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
4605
1.59k
  return DAG.getSelect(dl, VT, IsOne, N0, Q);
4606
1.59k
}
4607
4608
/// If all values in Values that *don't* match the predicate are same 'splat'
4609
/// value, then replace all values with that splat value.
4610
/// Else, if AlternativeReplacement was provided, then replace all values that
4611
/// do match predicate with AlternativeReplacement value.
4612
static void
4613
turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
4614
                          std::function<bool(SDValue)> Predicate,
4615
132
                          SDValue AlternativeReplacement = SDValue()) {
4616
132
  SDValue Replacement;
4617
132
  // Is there a value for which the Predicate does *NOT* match? What is it?
4618
132
  auto SplatValue = llvm::find_if_not(Values, Predicate);
4619
132
  if (SplatValue != Values.end()) {
4620
132
    // Does Values consist only of SplatValue's and values matching Predicate?
4621
342
    if (
llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) 132
{
4622
342
          return Value == *SplatValue || 
Predicate(Value)138
;
4623
342
        })) // Then we shall replace values matching predicate with SplatValue.
4624
30
      Replacement = *SplatValue;
4625
132
  }
4626
132
  if (!Replacement) {
4627
102
    // Oops, we did not find the "baseline" splat value.
4628
102
    if (!AlternativeReplacement)
4629
54
      return; // Nothing to do.
4630
48
    // Let's replace with provided value then.
4631
48
    Replacement = AlternativeReplacement;
4632
48
  }
4633
132
  std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
4634
78
}
4635
4636
/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
4637
/// where the divisor is constant and the comparison target is zero,
4638
/// return a DAG expression that will generate the same comparison result
4639
/// using only multiplications, additions and shifts/rotations.
4640
/// Ref: "Hacker's Delight" 10-17.
4641
SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
4642
                                        SDValue CompTargetNode,
4643
                                        ISD::CondCode Cond,
4644
                                        DAGCombinerInfo &DCI,
4645
337
                                        const SDLoc &DL) const {
4646
337
  SmallVector<SDNode *, 2> Built;
4647
337
  if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
4648
124
                                         DCI, DL, Built)) {
4649
124
    for (SDNode *N : Built)
4650
181
      DCI.AddToWorklist(N);
4651
124
    return Folded;
4652
124
  }
4653
213
4654
213
  return SDValue();
4655
213
}
4656
4657
SDValue
4658
TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
4659
                                  SDValue CompTargetNode, ISD::CondCode Cond,
4660
                                  DAGCombinerInfo &DCI, const SDLoc &DL,
4661
337
                                  SmallVectorImpl<SDNode *> &Created) const {
4662
337
  // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
4663
337
  // - D must be constant, with D = D0 * 2^K where D0 is odd
4664
337
  // - P is the multiplicative inverse of D0 modulo 2^W
4665
337
  // - Q = floor((2^W - 1) / D0)
4666
337
  // where W is the width of the common type of N and D.
4667
337
  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4668
337
         "Only applicable for (in)equality comparisons.");
4669
337
4670
337
  SelectionDAG &DAG = DCI.DAG;
4671
337
4672
337
  EVT VT = REMNode.getValueType();
4673
337
  EVT SVT = VT.getScalarType();
4674
337
  EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4675
337
  EVT ShSVT = ShVT.getScalarType();
4676
337
4677
337
  // If MUL is unavailable, we cannot proceed in any case.
4678
337
  if (!isOperationLegalOrCustom(ISD::MUL, VT))
4679
3
    return SDValue();
4680
334
4681
334
  // TODO: Could support comparing with non-zero too.
4682
334
  ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
4683
334
  if (!CompTarget || 
!CompTarget->isNullValue()320
)
4684
14
    return SDValue();
4685
320
4686
320
  bool HadOneDivisor = false;
4687
320
  bool AllDivisorsAreOnes = true;
4688
320
  bool HadEvenDivisor = false;
4689
320
  bool AllDivisorsArePowerOfTwo = true;
4690
320
  SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
4691
320
4692
782
  auto BuildUREMPattern = [&](ConstantSDNode *C) {
4693
782
    // Division by 0 is UB. Leave it to be constant-folded elsewhere.
4694
782
    if (C->isNullValue())
4695
0
      return false;
4696
782
4697
782
    const APInt &D = C->getAPIntValue();
4698
782
    // If all divisors are ones, we will prefer to avoid the fold.
4699
782
    HadOneDivisor |= D.isOneValue();
4700
782
    AllDivisorsAreOnes &= D.isOneValue();
4701
782
4702
782
    // Decompose D into D0 * 2^K
4703
782
    unsigned K = D.countTrailingZeros();
4704
782
    assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
4705
782
    APInt D0 = D.lshr(K);
4706
782
4707
782
    // D is even if it has trailing zeros.
4708
782
    HadEvenDivisor |= (K != 0);
4709
782
    // D is a power-of-two if D0 is one.
4710
782
    // If all divisors are power-of-two, we will prefer to avoid the fold.
4711
782
    AllDivisorsArePowerOfTwo &= D0.isOneValue();
4712
782
4713
782
    // P = inv(D0, 2^W)
4714
782
    // 2^W requires W + 1 bits, so we have to extend and then truncate.
4715
782
    unsigned W = D.getBitWidth();
4716
782
    APInt P = D0.zext(W + 1)
4717
782
                  .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
4718
782
                  .trunc(W);
4719
782
    assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
4720
782
    assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
4721
782
4722
782
    // Q = floor((2^W - 1) / D)
4723
782
    APInt Q = APInt::getAllOnesValue(W).udiv(D);
4724
782
4725
782
    assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
4726
782
           "We are expecting that K is always less than all-ones for ShSVT");
4727
782
4728
782
    // If the divisor is 1 the result can be constant-folded.
4729
782
    if (D.isOneValue()) {
4730
93
      // Set P and K amount to a bogus values so we can try to splat them.
4731
93
      P = 0;
4732
93
      K = -1;
4733
93
      assert(Q.isAllOnesValue() &&
4734
93
             "Expecting all-ones comparison for one divisor");
4735
93
    }
4736
782
4737
782
    PAmts.push_back(DAG.getConstant(P, DL, SVT));
4738
782
    KAmts.push_back(
4739
782
        DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
4740
782
    QAmts.push_back(DAG.getConstant(Q, DL, SVT));
4741
782
    return true;
4742
782
  };
4743
320
4744
320
  SDValue N = REMNode.getOperand(0);
4745
320
  SDValue D = REMNode.getOperand(1);
4746
320
4747
320
  // Collect the values from each element.
4748
320
  if (!ISD::matchUnaryPredicate(D, BuildUREMPattern))
4749
69
    return SDValue();
4750
251
4751
251
  // If this is a urem by a one, avoid the fold since it can be constant-folded.
4752
251
  if (AllDivisorsAreOnes)
4753
9
    return SDValue();
4754
242
4755
242
  // If this is a urem by a powers-of-two, avoid the fold since it can be
4756
242
  // best implemented as a bit test.
4757
242
  if (AllDivisorsArePowerOfTwo)
4758
10
    return SDValue();
4759
232
4760
232
  SDValue PVal, KVal, QVal;
4761
232
  if (VT.isVector()) {
4762
165
    if (HadOneDivisor) {
4763
66
      // Try to turn PAmts into a splat, since we don't care about the values
4764
66
      // that are currently '0'. If we can't, just keep '0'`s.
4765
66
      turnVectorIntoSplatVector(PAmts, isNullConstant);
4766
66
      // Try to turn KAmts into a splat, since we don't care about the values
4767
66
      // that are currently '-1'. If we can't, change them to '0'`s.
4768
66
      turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
4769
66
                                DAG.getConstant(0, DL, ShSVT));
4770
66
    }
4771
165
4772
165
    PVal = DAG.getBuildVector(VT, DL, PAmts);
4773
165
    KVal = DAG.getBuildVector(ShVT, DL, KAmts);
4774
165
    QVal = DAG.getBuildVector(VT, DL, QAmts);
4775
165
  } else {
4776
67
    PVal = PAmts[0];
4777
67
    KVal = KAmts[0];
4778
67
    QVal = QAmts[0];
4779
67
  }
4780
232
4781
232
  // (mul N, P)
4782
232
  SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
4783
232
  Created.push_back(Op0.getNode());
4784
232
4785
232
  // Rotate right only if any divisor was even. We avoid rotates for all-odd
4786
232
  // divisors as a performance improvement, since rotating by 0 is a no-op.
4787
232
  if (HadEvenDivisor) {
4788
165
    // We need ROTR to do this.
4789
165
    if (!isOperationLegalOrCustom(ISD::ROTR, VT))
4790
108
      return SDValue();
4791
57
    SDNodeFlags Flags;
4792
57
    Flags.setExact(true);
4793
57
    // UREM: (rotr (mul N, P), K)
4794
57
    Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
4795
57
    Created.push_back(Op0.getNode());
4796
57
  }
4797
232
4798
232
  // UREM: (setule/setugt (rotr (mul N, P), K), Q)
4799
232
  return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
4800
124
                      ((Cond == ISD::SETEQ) ? 
ISD::SETULE86
:
ISD::SETUGT38
));
4801
232
}
4802
4803
bool TargetLowering::
4804
4.90k
verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
4805
4.90k
  if (!isa<ConstantSDNode>(Op.getOperand(0))) {
4806
0
    DAG.getContext()->emitError("argument to '__builtin_return_address' must "
4807
0
                                "be a constant integer");
4808
0
    return true;
4809
0
  }
4810
4.90k
4811
4.90k
  return false;
4812
4.90k
}
4813
4814
//===----------------------------------------------------------------------===//
4815
// Legalization Utilities
4816
//===----------------------------------------------------------------------===//
4817
4818
bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
4819
                                    SDValue LHS, SDValue RHS,
4820
                                    SmallVectorImpl<SDValue> &Result,
4821
                                    EVT HiLoVT, SelectionDAG &DAG,
4822
                                    MulExpansionKind Kind, SDValue LL,
4823
3.63k
                                    SDValue LH, SDValue RL, SDValue RH) const {
4824
3.63k
  assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
4825
3.63k
         Opcode == ISD::SMUL_LOHI);
4826
3.63k
4827
3.63k
  bool HasMULHS = (Kind == MulExpansionKind::Always) ||
4828
3.63k
                  
isOperationLegalOrCustom(ISD::MULHS, HiLoVT)3.49k
;
4829
3.63k
  bool HasMULHU = (Kind == MulExpansionKind::Always) ||
4830
3.63k
                  
isOperationLegalOrCustom(ISD::MULHU, HiLoVT)3.49k
;
4831
3.63k
  bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
4832
3.63k
                      
isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT)3.49k
;
4833
3.63k
  bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
4834
3.63k
                      
isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT)3.49k
;
4835
3.63k
4836
3.63k
  if (!HasMULHU && 
!HasMULHS2.71k
&&
!HasUMUL_LOHI2.13k
&&
!HasSMUL_LOHI512
)
4837
512
    return false;
4838
3.12k
4839
3.12k
  unsigned OuterBitSize = VT.getScalarSizeInBits();
4840
3.12k
  unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
4841
3.12k
  unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
4842
3.12k
  unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
4843
3.12k
4844
3.12k
  // LL, LH, RL, and RH must be either all NULL or all set to a value.
4845
3.12k
  assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
4846
3.12k
         (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
4847
3.12k
4848
3.12k
  SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
4849
3.12k
  auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
4850
3.56k
                          bool Signed) -> bool {
4851
3.56k
    if ((Signed && 
HasSMUL_LOHI230
) ||
(3.37k
!Signed3.37k
&&
HasUMUL_LOHI3.33k
)) {
4852
2.80k
      Lo = DAG.getNode(Signed ? 
ISD::SMUL_LOHI190
:
ISD::UMUL_LOHI2.61k
, dl, VTs, L, R);
4853
2.80k
      Hi = SDValue(Lo.getNode(), 1);
4854
2.80k
      return true;
4855
2.80k
    }
4856
763
    if ((Signed && 
HasMULHS40
) ||
(723
!Signed723
&&
HasMULHU723
)) {
4857
763
      Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
4858
763
      Hi = DAG.getNode(Signed ? 
ISD::MULHS40
:
ISD::MULHU723
, dl, HiLoVT, L, R);
4859
763
      return true;
4860
763
    }
4861
0
    return false;
4862
0
  };
4863
3.12k
4864
3.12k
  SDValue Lo, Hi;
4865
3.12k
4866
3.12k
  if (!LL.getNode() && 
!RL.getNode()809
&&
4867
3.12k
      
isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)809
) {
4868
809
    LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
4869
809
    RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
4870
809
  }
4871
3.12k
4872
3.12k
  if (!LL.getNode())
4873
0
    return false;
4874
3.12k
4875
3.12k
  APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
4876
3.12k
  if (DAG.MaskedValueIsZero(LHS, HighMask) &&
4877
3.12k
      
DAG.MaskedValueIsZero(RHS, HighMask)2.11k
) {
4878
2.10k
    // The inputs are both zero-extended.
4879
2.10k
    if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
4880
2.10k
      Result.push_back(Lo);
4881
2.10k
      Result.push_back(Hi);
4882
2.10k
      if (Opcode != ISD::MUL) {
4883
0
        SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
4884
0
        Result.push_back(Zero);
4885
0
        Result.push_back(Zero);
4886
0
      }
4887
2.10k
      return true;
4888
2.10k
    }
4889
1.02k
  }
4890
1.02k
4891
1.02k
  if (!VT.isVector() && Opcode == ISD::MUL && 
LHSSB > InnerBitSize873
&&
4892
1.02k
      
RHSSB > InnerBitSize225
) {
4893
223
    // The input values are both sign-extended.
4894
223
    // TODO non-MUL case?
4895
223
    if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
4896
223
      Result.push_back(Lo);
4897
223
      Result.push_back(Hi);
4898
223
      return true;
4899
223
    }
4900
799
  }
4901
799
4902
799
  unsigned ShiftAmount = OuterBitSize - InnerBitSize;
4903
799
  EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
4904
799
  if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
4905
0
    // FIXME getShiftAmountTy does not always return a sensible result when VT
4906
0
    // is an illegal type, and so the type may be too small to fit the shift
4907
0
    // amount. Override it with i32. The shift will have to be legalized.
4908
0
    ShiftAmountTy = MVT::i32;
4909
0
  }
4910
799
  SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
4911
799
4912
799
  if (!LH.getNode() && 
!RH.getNode()334
&&
4913
799
      
isOperationLegalOrCustom(ISD::SRL, VT)334
&&
4914
799
      
isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)334
) {
4915
334
    LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
4916
334
    LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
4917
334
    RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
4918
334
    RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
4919
334
  }
4920
799
4921
799
  if (!LH.getNode())