Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
11
// both before and after the DAG is legalized.
12
//
13
// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14
// primarily intended to handle simplification opportunities that are implicit
15
// in the LLVM IR and exposed by the various codegen lowering phases.
16
//
17
//===----------------------------------------------------------------------===//
18
19
#include "llvm/ADT/APFloat.h"
20
#include "llvm/ADT/APInt.h"
21
#include "llvm/ADT/ArrayRef.h"
22
#include "llvm/ADT/DenseMap.h"
23
#include "llvm/ADT/None.h"
24
#include "llvm/ADT/Optional.h"
25
#include "llvm/ADT/STLExtras.h"
26
#include "llvm/ADT/SetVector.h"
27
#include "llvm/ADT/SmallBitVector.h"
28
#include "llvm/ADT/SmallPtrSet.h"
29
#include "llvm/ADT/SmallSet.h"
30
#include "llvm/ADT/SmallVector.h"
31
#include "llvm/ADT/Statistic.h"
32
#include "llvm/Analysis/AliasAnalysis.h"
33
#include "llvm/Analysis/MemoryLocation.h"
34
#include "llvm/CodeGen/DAGCombine.h"
35
#include "llvm/CodeGen/ISDOpcodes.h"
36
#include "llvm/CodeGen/MachineFrameInfo.h"
37
#include "llvm/CodeGen/MachineFunction.h"
38
#include "llvm/CodeGen/MachineMemOperand.h"
39
#include "llvm/CodeGen/MachineValueType.h"
40
#include "llvm/CodeGen/RuntimeLibcalls.h"
41
#include "llvm/CodeGen/SelectionDAG.h"
42
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
43
#include "llvm/CodeGen/SelectionDAGNodes.h"
44
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
45
#include "llvm/CodeGen/ValueTypes.h"
46
#include "llvm/IR/Attributes.h"
47
#include "llvm/IR/Constant.h"
48
#include "llvm/IR/DataLayout.h"
49
#include "llvm/IR/DerivedTypes.h"
50
#include "llvm/IR/Function.h"
51
#include "llvm/IR/LLVMContext.h"
52
#include "llvm/IR/Metadata.h"
53
#include "llvm/Support/Casting.h"
54
#include "llvm/Support/CodeGen.h"
55
#include "llvm/Support/CommandLine.h"
56
#include "llvm/Support/Compiler.h"
57
#include "llvm/Support/Debug.h"
58
#include "llvm/Support/ErrorHandling.h"
59
#include "llvm/Support/KnownBits.h"
60
#include "llvm/Support/MathExtras.h"
61
#include "llvm/Support/raw_ostream.h"
62
#include "llvm/Target/TargetLowering.h"
63
#include "llvm/Target/TargetMachine.h"
64
#include "llvm/Target/TargetOptions.h"
65
#include "llvm/Target/TargetRegisterInfo.h"
66
#include "llvm/Target/TargetSubtargetInfo.h"
67
#include <algorithm>
68
#include <cassert>
69
#include <cstdint>
70
#include <functional>
71
#include <iterator>
72
#include <string>
73
#include <tuple>
74
#include <utility>
75
#include <vector>
76
77
using namespace llvm;
78
79
#define DEBUG_TYPE "dagcombine"
80
81
STATISTIC(NodesCombined   , "Number of dag nodes combined");
82
STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
83
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
84
STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
85
STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
86
STATISTIC(SlicedLoads, "Number of load sliced");
87
88
static cl::opt<bool>
89
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90
                 cl::desc("Enable DAG combiner's use of IR alias analysis"));
91
92
static cl::opt<bool>
93
UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94
        cl::desc("Enable DAG combiner's use of TBAA"));
95
96
#ifndef NDEBUG
97
static cl::opt<std::string>
98
CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99
                   cl::desc("Only use DAG-combiner alias analysis in this"
100
                            " function"));
101
#endif
102
103
/// Hidden option to stress test load slicing, i.e., when this option
104
/// is enabled, load slicing bypasses most of its profitability guards.
105
static cl::opt<bool>
106
StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107
                  cl::desc("Bypass the profitability model of load slicing"),
108
                  cl::init(false));
109
110
static cl::opt<bool>
111
  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112
                    cl::desc("DAG combiner may split indexing from loads"));
113
114
namespace {
115
116
  class DAGCombiner {
117
    SelectionDAG &DAG;
118
    const TargetLowering &TLI;
119
    CombineLevel Level;
120
    CodeGenOpt::Level OptLevel;
121
    bool LegalOperations = false;
122
    bool LegalTypes = false;
123
    bool ForCodeSize;
124
125
    /// \brief Worklist of all of the nodes that need to be simplified.
126
    ///
127
    /// This must behave as a stack -- new nodes to process are pushed onto the
128
    /// back and when processing we pop off of the back.
129
    ///
130
    /// The worklist will not contain duplicates but may contain null entries
131
    /// due to nodes being deleted from the underlying DAG.
132
    SmallVector<SDNode *, 64> Worklist;
133
134
    /// \brief Mapping from an SDNode to its position on the worklist.
135
    ///
136
    /// This is used to find and remove nodes from the worklist (by nulling
137
    /// them) when they are deleted from the underlying DAG. It relies on
138
    /// stable indices of nodes within the worklist.
139
    DenseMap<SDNode *, unsigned> WorklistMap;
140
141
    /// \brief Set of nodes which have been combined (at least once).
142
    ///
143
    /// This is used to allow us to reliably add any operands of a DAG node
144
    /// which have not yet been combined to the worklist.
145
    SmallPtrSet<SDNode *, 32> CombinedNodes;
146
147
    // AA - Used for DAG load/store alias analysis.
148
    AliasAnalysis *AA;
149
150
    /// When an instruction is simplified, add all users of the instruction to
151
    /// the work lists because they might get more simplified now.
152
7.28M
    void AddUsersToWorklist(SDNode *N) {
153
7.28M
      for (SDNode *Node : N->uses())
154
11.1M
        AddToWorklist(Node);
155
7.28M
    }
156
157
    /// Call the node-specific routine that folds each particular type of node.
158
    SDValue visit(SDNode *N);
159
160
  public:
161
    DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
162
        : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
163
7.42M
          OptLevel(OL), AA(AA) {
164
7.42M
      ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
165
7.42M
166
7.42M
      MaximumLegalStoreInBits = 0;
167
7.42M
      for (MVT VT : MVT::all_valuetypes())
168
824M
        
if (824M
EVT(VT).isSimple() && 824M
VT != MVT::Other824M
&&
169
816M
            TLI.isTypeLegal(EVT(VT)) &&
170
145M
            VT.getSizeInBits() >= MaximumLegalStoreInBits)
171
81.2M
          MaximumLegalStoreInBits = VT.getSizeInBits();
172
7.42M
    }
173
174
    /// Add to the worklist making sure its instance is at the back (next to be
175
    /// processed.)
176
496M
    void AddToWorklist(SDNode *N) {
177
496M
      assert(N->getOpcode() != ISD::DELETED_NODE &&
178
496M
             "Deleted Node added to Worklist");
179
496M
180
496M
      // Skip handle nodes as they can't usefully be combined and confuse the
181
496M
      // zero-use deletion strategy.
182
496M
      if (N->getOpcode() == ISD::HANDLENODE)
183
48.5k
        return;
184
496M
185
496M
      
if (496M
WorklistMap.insert(std::make_pair(N, Worklist.size())).second496M
)
186
195M
        Worklist.push_back(N);
187
496M
    }
188
189
    /// Remove all instances of N from the worklist.
190
23.1M
    void removeFromWorklist(SDNode *N) {
191
23.1M
      CombinedNodes.erase(N);
192
23.1M
193
23.1M
      auto It = WorklistMap.find(N);
194
23.1M
      if (It == WorklistMap.end())
195
13.5M
        return; // Not in the worklist.
196
9.58M
197
9.58M
      // Null out the entry rather than erasing it to avoid a linear operation.
198
9.58M
      Worklist[It->second] = nullptr;
199
9.58M
      WorklistMap.erase(It);
200
9.58M
    }
201
202
    void deleteAndRecombine(SDNode *N);
203
    bool recursivelyDeleteUnusedNodes(SDNode *N);
204
205
    /// Replaces all uses of the results of one DAG node with new values.
206
    SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
207
                      bool AddTo = true);
208
209
    /// Replaces all uses of the results of one DAG node with new values.
210
1.68M
    SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
211
1.68M
      return CombineTo(N, &Res, 1, AddTo);
212
1.68M
    }
213
214
    /// Replaces all uses of the results of one DAG node with new values.
215
    SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
216
523k
                      bool AddTo = true) {
217
523k
      SDValue To[] = { Res0, Res1 };
218
523k
      return CombineTo(N, To, 2, AddTo);
219
523k
    }
220
221
    void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
222
223
  private:
224
    unsigned MaximumLegalStoreInBits;
225
226
    /// Check the specified integer node value to see if it can be simplified or
227
    /// if things it uses can be simplified by bit propagation.
228
    /// If so, return true.
229
11.8M
    bool SimplifyDemandedBits(SDValue Op) {
230
11.8M
      unsigned BitWidth = Op.getScalarValueSizeInBits();
231
11.8M
      APInt Demanded = APInt::getAllOnesValue(BitWidth);
232
11.8M
      return SimplifyDemandedBits(Op, Demanded);
233
11.8M
    }
234
235
    bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
236
237
    bool CombineToPreIndexedLoadStore(SDNode *N);
238
    bool CombineToPostIndexedLoadStore(SDNode *N);
239
    SDValue SplitIndexingFromLoad(LoadSDNode *LD);
240
    bool SliceUpLoad(SDNode *N);
241
242
    /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
243
    ///   load.
244
    ///
245
    /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
246
    /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
247
    /// \param EltNo index of the vector element to load.
248
    /// \param OriginalLoad load that EVE came from to be replaced.
249
    /// \returns EVE on success SDValue() on failure.
250
    SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
251
        SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
252
    void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
253
    SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
254
    SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
255
    SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
256
    SDValue PromoteIntBinOp(SDValue Op);
257
    SDValue PromoteIntShiftOp(SDValue Op);
258
    SDValue PromoteExtend(SDValue Op);
259
    bool PromoteLoad(SDValue Op);
260
261
    void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
262
                         SDValue ExtLoad, const SDLoc &DL,
263
                         ISD::NodeType ExtType);
264
265
    /// Call the node-specific routine that knows how to fold each
266
    /// particular type of node. If that doesn't do anything, try the
267
    /// target-specific DAG combines.
268
    SDValue combine(SDNode *N);
269
270
    // Visitation implementation - Implement dag node combining for different
271
    // node types.  The semantics are as follows:
272
    // Return Value:
273
    //   SDValue.getNode() == 0 - No change was made
274
    //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
275
    //   otherwise              - N should be replaced by the returned Operand.
276
    //
277
    SDValue visitTokenFactor(SDNode *N);
278
    SDValue visitMERGE_VALUES(SDNode *N);
279
    SDValue visitADD(SDNode *N);
280
    SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
281
    SDValue visitSUB(SDNode *N);
282
    SDValue visitADDC(SDNode *N);
283
    SDValue visitUADDO(SDNode *N);
284
    SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
285
    SDValue visitSUBC(SDNode *N);
286
    SDValue visitUSUBO(SDNode *N);
287
    SDValue visitADDE(SDNode *N);
288
    SDValue visitADDCARRY(SDNode *N);
289
    SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
290
    SDValue visitSUBE(SDNode *N);
291
    SDValue visitSUBCARRY(SDNode *N);
292
    SDValue visitMUL(SDNode *N);
293
    SDValue useDivRem(SDNode *N);
294
    SDValue visitSDIV(SDNode *N);
295
    SDValue visitUDIV(SDNode *N);
296
    SDValue visitREM(SDNode *N);
297
    SDValue visitMULHU(SDNode *N);
298
    SDValue visitMULHS(SDNode *N);
299
    SDValue visitSMUL_LOHI(SDNode *N);
300
    SDValue visitUMUL_LOHI(SDNode *N);
301
    SDValue visitSMULO(SDNode *N);
302
    SDValue visitUMULO(SDNode *N);
303
    SDValue visitIMINMAX(SDNode *N);
304
    SDValue visitAND(SDNode *N);
305
    SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
306
    SDValue visitOR(SDNode *N);
307
    SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
308
    SDValue visitXOR(SDNode *N);
309
    SDValue SimplifyVBinOp(SDNode *N);
310
    SDValue visitSHL(SDNode *N);
311
    SDValue visitSRA(SDNode *N);
312
    SDValue visitSRL(SDNode *N);
313
    SDValue visitRotate(SDNode *N);
314
    SDValue visitABS(SDNode *N);
315
    SDValue visitBSWAP(SDNode *N);
316
    SDValue visitBITREVERSE(SDNode *N);
317
    SDValue visitCTLZ(SDNode *N);
318
    SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
319
    SDValue visitCTTZ(SDNode *N);
320
    SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
321
    SDValue visitCTPOP(SDNode *N);
322
    SDValue visitSELECT(SDNode *N);
323
    SDValue visitVSELECT(SDNode *N);
324
    SDValue visitSELECT_CC(SDNode *N);
325
    SDValue visitSETCC(SDNode *N);
326
    SDValue visitSETCCE(SDNode *N);
327
    SDValue visitSETCCCARRY(SDNode *N);
328
    SDValue visitSIGN_EXTEND(SDNode *N);
329
    SDValue visitZERO_EXTEND(SDNode *N);
330
    SDValue visitANY_EXTEND(SDNode *N);
331
    SDValue visitAssertZext(SDNode *N);
332
    SDValue visitSIGN_EXTEND_INREG(SDNode *N);
333
    SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
334
    SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
335
    SDValue visitTRUNCATE(SDNode *N);
336
    SDValue visitBITCAST(SDNode *N);
337
    SDValue visitBUILD_PAIR(SDNode *N);
338
    SDValue visitFADD(SDNode *N);
339
    SDValue visitFSUB(SDNode *N);
340
    SDValue visitFMUL(SDNode *N);
341
    SDValue visitFMA(SDNode *N);
342
    SDValue visitFDIV(SDNode *N);
343
    SDValue visitFREM(SDNode *N);
344
    SDValue visitFSQRT(SDNode *N);
345
    SDValue visitFCOPYSIGN(SDNode *N);
346
    SDValue visitSINT_TO_FP(SDNode *N);
347
    SDValue visitUINT_TO_FP(SDNode *N);
348
    SDValue visitFP_TO_SINT(SDNode *N);
349
    SDValue visitFP_TO_UINT(SDNode *N);
350
    SDValue visitFP_ROUND(SDNode *N);
351
    SDValue visitFP_ROUND_INREG(SDNode *N);
352
    SDValue visitFP_EXTEND(SDNode *N);
353
    SDValue visitFNEG(SDNode *N);
354
    SDValue visitFABS(SDNode *N);
355
    SDValue visitFCEIL(SDNode *N);
356
    SDValue visitFTRUNC(SDNode *N);
357
    SDValue visitFFLOOR(SDNode *N);
358
    SDValue visitFMINNUM(SDNode *N);
359
    SDValue visitFMAXNUM(SDNode *N);
360
    SDValue visitBRCOND(SDNode *N);
361
    SDValue visitBR_CC(SDNode *N);
362
    SDValue visitLOAD(SDNode *N);
363
364
    SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
365
    SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
366
367
    SDValue visitSTORE(SDNode *N);
368
    SDValue visitINSERT_VECTOR_ELT(SDNode *N);
369
    SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
370
    SDValue visitBUILD_VECTOR(SDNode *N);
371
    SDValue visitCONCAT_VECTORS(SDNode *N);
372
    SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
373
    SDValue visitVECTOR_SHUFFLE(SDNode *N);
374
    SDValue visitSCALAR_TO_VECTOR(SDNode *N);
375
    SDValue visitINSERT_SUBVECTOR(SDNode *N);
376
    SDValue visitMLOAD(SDNode *N);
377
    SDValue visitMSTORE(SDNode *N);
378
    SDValue visitMGATHER(SDNode *N);
379
    SDValue visitMSCATTER(SDNode *N);
380
    SDValue visitFP_TO_FP16(SDNode *N);
381
    SDValue visitFP16_TO_FP(SDNode *N);
382
383
    SDValue visitFADDForFMACombine(SDNode *N);
384
    SDValue visitFSUBForFMACombine(SDNode *N);
385
    SDValue visitFMULForFMADistributiveCombine(SDNode *N);
386
387
    SDValue XformToShuffleWithZero(SDNode *N);
388
    SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
389
                           SDValue RHS);
390
391
    SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
392
393
    SDValue foldSelectOfConstants(SDNode *N);
394
    SDValue foldVSelectOfConstants(SDNode *N);
395
    SDValue foldBinOpIntoSelect(SDNode *BO);
396
    bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
397
    SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
398
    SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
399
    SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
400
                             SDValue N2, SDValue N3, ISD::CondCode CC,
401
                             bool NotExtCompare = false);
402
    SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
403
                                   SDValue N2, SDValue N3, ISD::CondCode CC);
404
    SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
405
                              const SDLoc &DL);
406
    SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
407
                          const SDLoc &DL, bool foldBooleans = true);
408
409
    bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
410
                           SDValue &CC) const;
411
    bool isOneUseSetCC(SDValue N) const;
412
413
    SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
414
                                         unsigned HiOp);
415
    SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
416
    SDValue CombineExtLoad(SDNode *N);
417
    SDValue combineRepeatedFPDivisors(SDNode *N);
418
    SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
419
    SDValue BuildSDIV(SDNode *N);
420
    SDValue BuildSDIVPow2(SDNode *N);
421
    SDValue BuildUDIV(SDNode *N);
422
    SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
423
    SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
424
    SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
425
    SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
426
    SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
427
    SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
428
                                SDNodeFlags Flags, bool Reciprocal);
429
    SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
430
                                SDNodeFlags Flags, bool Reciprocal);
431
    SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
432
                               bool DemandHighBits = true);
433
    SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
434
    SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
435
                              SDValue InnerPos, SDValue InnerNeg,
436
                              unsigned PosOpcode, unsigned NegOpcode,
437
                              const SDLoc &DL);
438
    SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
439
    SDValue MatchLoadCombine(SDNode *N);
440
    SDValue ReduceLoadWidth(SDNode *N);
441
    SDValue ReduceLoadOpStoreWidth(SDNode *N);
442
    SDValue splitMergedValStore(StoreSDNode *ST);
443
    SDValue TransformFPLoadStorePair(SDNode *N);
444
    SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
445
    SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
446
    SDValue reduceBuildVecToShuffle(SDNode *N);
447
    SDValue reduceBuildVecToTrunc(SDNode *N);
448
    SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
449
                                  ArrayRef<int> VectorMask, SDValue VecIn1,
450
                                  SDValue VecIn2, unsigned LeftIdx);
451
    SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
452
453
    /// Walk up chain skipping non-aliasing memory nodes,
454
    /// looking for aliasing nodes and adding them to the Aliases vector.
455
    void GatherAllAliases(SDNode *N, SDValue OriginalChain,
456
                          SmallVectorImpl<SDValue> &Aliases);
457
458
    /// Return true if there is any possibility that the two addresses overlap.
459
    bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
460
461
    /// Walk up chain skipping non-aliasing memory nodes, looking for a better
462
    /// chain (aliasing node.)
463
    SDValue FindBetterChain(SDNode *N, SDValue Chain);
464
465
    /// Try to replace a store and any possibly adjacent stores on
466
    /// consecutive chains with better chains. Return true only if St is
467
    /// replaced.
468
    ///
469
    /// Notice that other chains may still be replaced even if the function
470
    /// returns false.
471
    bool findBetterNeighborChains(StoreSDNode *St);
472
473
    /// Match "(X shl/srl V1) & V2" where V2 may not be present.
474
    bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
475
476
    /// Holds a pointer to an LSBaseSDNode as well as information on where it
477
    /// is located in a sequence of memory operations connected by a chain.
478
    struct MemOpLink {
479
      // Ptr to the mem node.
480
      LSBaseSDNode *MemNode;
481
482
      // Offset from the base ptr.
483
      int64_t OffsetFromBase;
484
485
      MemOpLink(LSBaseSDNode *N, int64_t Offset)
486
21.9M
          : MemNode(N), OffsetFromBase(Offset) {}
487
    };
488
489
    /// This is a helper function for visitMUL to check the profitability
490
    /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
491
    /// MulNode is the original multiply, AddNode is (add x, c1),
492
    /// and ConstNode is c2.
493
    bool isMulAddWithConstProfitable(SDNode *MulNode,
494
                                     SDValue &AddNode,
495
                                     SDValue &ConstNode);
496
497
    /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
498
    /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
499
    /// the type of the loaded value to be extended.  LoadedVT returns the type
500
    /// of the original loaded value.  NarrowLoad returns whether the load would
501
    /// need to be narrowed in order to match.
502
    bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
503
                          EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
504
                          bool &NarrowLoad);
505
506
    /// Helper function for MergeConsecutiveStores which merges the
507
    /// component store chains.
508
    SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
509
                                unsigned NumStores);
510
511
    /// This is a helper function for MergeConsecutiveStores. When the
512
    /// source elements of the consecutive stores are all constants or
513
    /// all extracted vector elements, try to merge them into one
514
    /// larger store introducing bitcasts if necessary.  \return True
515
    /// if a merged store was created.
516
    bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
517
                                         EVT MemVT, unsigned NumStores,
518
                                         bool IsConstantSrc, bool UseVector,
519
                                         bool UseTrunc);
520
521
    /// This is a helper function for MergeConsecutiveStores. Stores
522
    /// that potentially may be merged with St are placed in
523
    /// StoreNodes.
524
    void getStoreMergeCandidates(StoreSDNode *St,
525
                                 SmallVectorImpl<MemOpLink> &StoreNodes);
526
527
    /// Helper function for MergeConsecutiveStores. Checks if
528
    /// candidate stores have indirect dependency through their
529
    /// operands. \return True if safe to merge.
530
    bool checkMergeStoreCandidatesForDependencies(
531
        SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
532
533
    /// Merge consecutive store operations into a wide store.
534
    /// This optimization uses wide integers or vectors when possible.
535
    /// \return number of stores that were merged into a merged store (the
536
    /// affected nodes are stored as a prefix in \p StoreNodes).
537
    bool MergeConsecutiveStores(StoreSDNode *N);
538
539
    /// \brief Try to transform a truncation where C is a constant:
540
    ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
541
    ///
542
    /// \p N needs to be a truncation and its first operand an AND. Other
543
    /// requirements are checked by the function (e.g. that trunc is
544
    /// single-use) and if missed an empty SDValue is returned.
545
    SDValue distributeTruncateThroughAnd(SDNode *N);
546
547
  public:
548
    /// Runs the dag combiner on all nodes in the work list
549
    void Run(CombineLevel AtLevel);
550
551
181M
    SelectionDAG &getDAG() const { return DAG; }
552
553
    /// Returns a type large enough to hold any valid shift amount - before type
554
    /// legalization these can be huge.
555
40.4k
    EVT getShiftAmountTy(EVT LHSTy) {
556
40.4k
      assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
557
40.4k
      if (LHSTy.isVector())
558
358
        return LHSTy;
559
40.0k
      auto &DL = DAG.getDataLayout();
560
7.77k
      return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
561
32.3k
                        : TLI.getPointerTy(DL);
562
40.4k
    }
563
564
    /// This method returns true if we are running before type legalization or
565
    /// if the specified VT is legal.
566
446k
    bool isTypeLegal(const EVT &VT) {
567
446k
      if (
!LegalTypes446k
)
return true165k
;
568
280k
      return TLI.isTypeLegal(VT);
569
280k
    }
570
571
    /// Convenience wrapper around TargetLowering::getSetCCResultType
572
2.58M
    EVT getSetCCResultType(EVT VT) const {
573
2.58M
      return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
574
2.58M
    }
575
  };
576
577
/// This class is a DAGUpdateListener that removes any deleted
578
/// nodes from the worklist.
579
class WorklistRemover : public SelectionDAG::DAGUpdateListener {
580
  DAGCombiner &DC;
581
582
public:
583
  explicit WorklistRemover(DAGCombiner &dc)
584
181M
    : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
585
586
92.1k
  void NodeDeleted(SDNode *N, SDNode *E) override {
587
92.1k
    DC.removeFromWorklist(N);
588
92.1k
  }
589
};
590
591
} // end anonymous namespace
592
593
//===----------------------------------------------------------------------===//
594
//  TargetLowering::DAGCombinerInfo implementation
595
//===----------------------------------------------------------------------===//
596
597
18.1k
void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
598
18.1k
  ((DAGCombiner*)DC)->AddToWorklist(N);
599
18.1k
}
600
601
SDValue TargetLowering::DAGCombinerInfo::
602
1.67k
CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
603
1.67k
  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
604
1.67k
}
605
606
SDValue TargetLowering::DAGCombinerInfo::
607
7.25k
CombineTo(SDNode *N, SDValue Res, bool AddTo) {
608
7.25k
  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
609
7.25k
}
610
611
SDValue TargetLowering::DAGCombinerInfo::
612
4.31k
CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
613
4.31k
  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
614
4.31k
}
615
616
void TargetLowering::DAGCombinerInfo::
617
327
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
618
327
  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
619
327
}
620
621
//===----------------------------------------------------------------------===//
622
// Helper Functions
623
//===----------------------------------------------------------------------===//
624
625
2.94M
void DAGCombiner::deleteAndRecombine(SDNode *N) {
626
2.94M
  removeFromWorklist(N);
627
2.94M
628
2.94M
  // If the operands of this node are only used by the node, they will now be
629
2.94M
  // dead. Make sure to re-visit them and recursively delete dead nodes.
630
2.94M
  for (const SDValue &Op : N->ops())
631
2.94M
    // For an operand generating multiple values, one of the values may
632
2.94M
    // become dead allowing further simplification (e.g. split index
633
2.94M
    // arithmetic from an indexed load).
634
8.61M
    
if (8.61M
Op->hasOneUse() || 8.61M
Op->getNumValues() > 17.67M
)
635
2.29M
      AddToWorklist(Op.getNode());
636
2.94M
637
2.94M
  DAG.DeleteNode(N);
638
2.94M
}
639
640
/// Return 1 if we can compute the negated form of the specified expression for
641
/// the same cost as the expression itself, or 2 if we can compute the negated
642
/// form more cheaply than the expression itself.
643
static char isNegatibleForFree(SDValue Op, bool LegalOperations,
644
                               const TargetLowering &TLI,
645
                               const TargetOptions *Options,
646
774k
                               unsigned Depth = 0) {
647
774k
  // fneg is removable even if it has multiple uses.
648
774k
  if (
Op.getOpcode() == ISD::FNEG774k
)
return 21.35k
;
649
773k
650
773k
  // Don't allow anything with multiple uses.
651
773k
  
if (773k
!Op.hasOneUse()773k
)
return 0240k
;
652
532k
653
532k
  // Don't recurse exponentially.
654
532k
  
if (532k
Depth > 6532k
)
return 01.73k
;
655
530k
656
530k
  switch (Op.getOpcode()) {
657
309k
  default: return false;
658
43.2k
  case ISD::ConstantFP: {
659
43.2k
    if (!LegalOperations)
660
32.9k
      return 1;
661
10.3k
662
10.3k
    // Don't invert constant FP values after legalization unless the target says
663
10.3k
    // the negated constant is legal.
664
10.3k
    EVT VT = Op.getValueType();
665
10.3k
    return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
666
9.79k
      TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
667
10.3k
  }
668
66.8k
  case ISD::FADD:
669
66.8k
    // FIXME: determine better conditions for this xform.
670
66.8k
    if (
!Options->UnsafeFPMath66.8k
)
return 065.7k
;
671
1.08k
672
1.08k
    // After operation legalization, it might not be legal to create new FSUBs.
673
1.08k
    
if (1.08k
LegalOperations &&
674
427
        !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
675
0
      return 0;
676
1.08k
677
1.08k
    // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
678
1.08k
    
if (char 1.08k
V1.08k
= isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
679
1.08k
                                    Options, Depth + 1))
680
19
      return V;
681
1.06k
    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
682
1.06k
    return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
683
1.06k
                              Depth + 1);
684
8.26k
  case ISD::FSUB:
685
8.26k
    // We can't turn -(A-B) into B-A when we honor signed zeros.
686
8.26k
    if (!Options->NoSignedZerosFPMath &&
687
8.23k
        !Op.getNode()->getFlags().hasNoSignedZeros())
688
7.68k
      return 0;
689
578
690
578
    // fold (fneg (fsub A, B)) -> (fsub B, A)
691
578
    return 1;
692
578
693
96.8k
  case ISD::FMUL:
694
96.8k
  case ISD::FDIV:
695
96.8k
    if (
Options->HonorSignDependentRoundingFPMath()96.8k
)
return 00
;
696
96.8k
697
96.8k
    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
698
96.8k
    
if (char 96.8k
V96.8k
= isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
699
96.8k
                                    Options, Depth + 1))
700
697
      return V;
701
96.1k
702
96.1k
    return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
703
96.1k
                              Depth + 1);
704
96.1k
705
5.48k
  case ISD::FP_EXTEND:
706
5.48k
  case ISD::FP_ROUND:
707
5.48k
  case ISD::FSIN:
708
5.48k
    return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
709
5.48k
                              Depth + 1);
710
0
  }
711
0
}
712
713
/// If isNegatibleForFree returns true, return the newly negated expression.
714
static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
715
2.59k
                                    bool LegalOperations, unsigned Depth = 0) {
716
2.59k
  const TargetOptions &Options = DAG.getTarget().Options;
717
2.59k
  // fneg is removable even if it has multiple uses.
718
2.59k
  if (
Op.getOpcode() == ISD::FNEG2.59k
)
return Op.getOperand(0)684
;
719
1.90k
720
1.90k
  // Don't allow anything with multiple uses.
721
2.59k
  assert(Op.hasOneUse() && "Unknown reuse!");
722
1.90k
723
1.90k
  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
724
1.90k
725
1.90k
  const SDNodeFlags Flags = Op.getNode()->getFlags();
726
1.90k
727
1.90k
  switch (Op.getOpcode()) {
728
0
  
default: 0
llvm_unreachable0
("Unknown code");
729
732
  case ISD::ConstantFP: {
730
732
    APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
731
732
    V.changeSign();
732
732
    return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
733
1.90k
  }
734
18
  case ISD::FADD:
735
18
    // FIXME: determine better conditions for this xform.
736
18
    assert(Options.UnsafeFPMath);
737
18
738
18
    // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
739
18
    if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
740
18
                           DAG.getTargetLoweringInfo(), &Options, Depth+1))
741
18
      return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
742
18
                         GetNegatedExpression(Op.getOperand(0), DAG,
743
18
                                              LegalOperations, Depth+1),
744
18
                         Op.getOperand(1), Flags);
745
0
    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
746
0
    return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
747
0
                       GetNegatedExpression(Op.getOperand(1), DAG,
748
0
                                            LegalOperations, Depth+1),
749
0
                       Op.getOperand(0), Flags);
750
19
  case ISD::FSUB:
751
19
    // fold (fneg (fsub 0, B)) -> B
752
19
    if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
753
5
      
if (5
N0CFP->isZero()5
)
754
5
        return Op.getOperand(1);
755
14
756
14
    // fold (fneg (fsub A, B)) -> (fsub B, A)
757
14
    return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
758
14
                       Op.getOperand(1), Op.getOperand(0), Flags);
759
14
760
1.10k
  case ISD::FMUL:
761
1.10k
  case ISD::FDIV:
762
1.10k
    assert(!Options.HonorSignDependentRoundingFPMath());
763
1.10k
764
1.10k
    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
765
1.10k
    if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
766
1.10k
                           DAG.getTargetLoweringInfo(), &Options, Depth+1))
767
289
      return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
768
289
                         GetNegatedExpression(Op.getOperand(0), DAG,
769
289
                                              LegalOperations, Depth+1),
770
289
                         Op.getOperand(1), Flags);
771
820
772
820
    // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
773
820
    return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
774
820
                       Op.getOperand(0),
775
820
                       GetNegatedExpression(Op.getOperand(1), DAG,
776
820
                                            LegalOperations, Depth+1), Flags);
777
820
778
18
  case ISD::FP_EXTEND:
779
18
  case ISD::FSIN:
780
18
    return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
781
18
                       GetNegatedExpression(Op.getOperand(0), DAG,
782
18
                                            LegalOperations, Depth+1));
783
12
  case ISD::FP_ROUND:
784
12
      return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
785
12
                         GetNegatedExpression(Op.getOperand(0), DAG,
786
12
                                              LegalOperations, Depth+1),
787
12
                         Op.getOperand(1));
788
0
  }
789
0
}
790
791
// APInts must be the same size for most operations, this helper
792
// function zero extends the shorter of the pair so that they match.
793
// We provide an Offset so that we can create bitwidths that won't overflow.
794
11.4k
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
795
11.4k
  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
796
11.4k
  LHS = LHS.zextOrSelf(Bits);
797
11.4k
  RHS = RHS.zextOrSelf(Bits);
798
11.4k
}
799
800
// Return true if this node is a setcc, or is a select_cc
801
// that selects between the target values used for true and false, making it
802
// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
803
// the appropriate nodes based on the type of node we are checking. This
804
// simplifies life a bit for the callers.
805
bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
806
2.25M
                                    SDValue &CC) const {
807
2.25M
  if (
N.getOpcode() == ISD::SETCC2.25M
) {
808
1.12M
    LHS = N.getOperand(0);
809
1.12M
    RHS = N.getOperand(1);
810
1.12M
    CC  = N.getOperand(2);
811
1.12M
    return true;
812
1.12M
  }
813
1.13M
814
1.13M
  
if (1.13M
N.getOpcode() != ISD::SELECT_CC ||
815
5.05k
      !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
816
294
      !TLI.isConstFalseVal(N.getOperand(3).getNode()))
817
1.13M
    return false;
818
288
819
288
  
if (288
TLI.getBooleanContents(N.getValueType()) ==
820
288
      TargetLowering::UndefinedBooleanContent)
821
0
    return false;
822
288
823
288
  LHS = N.getOperand(0);
824
288
  RHS = N.getOperand(1);
825
288
  CC  = N.getOperand(4);
826
288
  return true;
827
288
}
828
829
/// Return true if this is a SetCC-equivalent operation with only one use.
830
/// If this is true, it allows the users to invert the operation for free when
831
/// it is profitable to do so.
832
158
bool DAGCombiner::isOneUseSetCC(SDValue N) const {
833
158
  SDValue N0, N1, N2;
834
158
  if (
isSetCCEquivalent(N, N0, N1, N2) && 158
N.getNode()->hasOneUse()85
)
835
73
    return true;
836
85
  return false;
837
85
}
838
839
// \brief Returns the SDNode if it is a constant float BuildVector
840
// or constant float.
841
3.71M
static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
842
3.71M
  if (isa<ConstantFPSDNode>(N))
843
138k
    return N.getNode();
844
3.57M
  
if (3.57M
ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())3.57M
)
845
14.2k
    return N.getNode();
846
3.56M
  return nullptr;
847
3.56M
}
848
849
// Determines if it is a constant integer or a build vector of constant
850
// integers (and undefs).
851
// Do not permit build vector implicit truncation.
852
23.8M
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
853
23.8M
  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
854
18.7M
    
return !(Const->isOpaque() && 18.7M
NoOpaques13.8k
);
855
5.15M
  
if (5.15M
N.getOpcode() != ISD::BUILD_VECTOR5.15M
)
856
5.02M
    return false;
857
124k
  unsigned BitWidth = N.getScalarValueSizeInBits();
858
417k
  for (const SDValue &Op : N->op_values()) {
859
417k
    if (Op.isUndef())
860
882
      continue;
861
416k
    ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
862
416k
    if (
!Const || 416k
Const->getAPIntValue().getBitWidth() != BitWidth403k
||
863
399k
        
(Const->isOpaque() && 399k
NoOpaques0
))
864
16.6k
      return false;
865
107k
  }
866
107k
  return true;
867
107k
}
868
869
// Determines if it is a constant null integer or a splatted vector of a
870
// constant null integer (with no undefs).
871
// Build vector implicit truncation is not an issue for null values.
872
1.51M
static bool isNullConstantOrNullSplatConstant(SDValue N) {
873
1.51M
  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
874
159k
    return Splat->isNullValue();
875
1.35M
  return false;
876
1.35M
}
877
878
// Determines if it is a constant integer of one or a splatted vector of a
879
// constant integer of one (with no undefs).
880
// Do not permit build vector implicit truncation.
881
4.32k
static bool isOneConstantOrOneSplatConstant(SDValue N) {
882
4.32k
  unsigned BitWidth = N.getScalarValueSizeInBits();
883
4.32k
  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
884
3.90k
    
return Splat->isOne() && 3.90k
Splat->getAPIntValue().getBitWidth() == BitWidth1.98k
;
885
415
  return false;
886
415
}
887
888
// Determines if it is a constant integer of all ones or a splatted vector of a
889
// constant integer of all ones (with no undefs).
890
// Do not permit build vector implicit truncation.
891
426k
static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
892
426k
  unsigned BitWidth = N.getScalarValueSizeInBits();
893
426k
  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
894
123k
    return Splat->isAllOnesValue() &&
895
418
           Splat->getAPIntValue().getBitWidth() == BitWidth;
896
303k
  return false;
897
303k
}
898
899
// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
900
// undef's.
901
18.6k
static bool isAnyConstantBuildVector(const SDNode *N) {
902
18.6k
  return ISD::isBuildVectorOfConstantSDNodes(N) ||
903
16.5k
         ISD::isBuildVectorOfConstantFPSDNodes(N);
904
18.6k
}
905
906
// Attempt to match a unary predicate against a scalar/splat constant or
907
// every element of a constant BUILD_VECTOR.
908
static bool matchUnaryPredicate(SDValue Op,
909
1.12M
                                std::function<bool(ConstantSDNode *)> Match) {
910
1.12M
  if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
911
1.03M
    return Match(Cst);
912
85.0k
913
85.0k
  
if (85.0k
ISD::BUILD_VECTOR != Op.getOpcode()85.0k
)
914
75.1k
    return false;
915
9.99k
916
9.99k
  EVT SVT = Op.getValueType().getScalarType();
917
10.1k
  for (unsigned i = 0, e = Op.getNumOperands(); 
i != e10.1k
;
++i140
) {
918
10.1k
    auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
919
10.1k
    if (
!Cst || 10.1k
Cst->getValueType(0) != SVT9.71k
||
!Match(Cst)8.63k
)
920
9.96k
      return false;
921
10.1k
  }
922
28
  return true;
923
1.12M
}
924
925
// Attempt to match a binary predicate against a pair of scalar/splat constants
926
// or every element of a pair of constant BUILD_VECTORs.
927
static bool matchBinaryPredicate(
928
    SDValue LHS, SDValue RHS,
929
16.4k
    std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
930
16.4k
  if (LHS.getValueType() != RHS.getValueType())
931
232
    return false;
932
16.2k
933
16.2k
  
if (auto *16.2k
LHSCst16.2k
= dyn_cast<ConstantSDNode>(LHS))
934
14.0k
    
if (auto *14.0k
RHSCst14.0k
= dyn_cast<ConstantSDNode>(RHS))
935
12.5k
      return Match(LHSCst, RHSCst);
936
3.69k
937
3.69k
  
if (3.69k
ISD::BUILD_VECTOR != LHS.getOpcode() ||
938
158
      ISD::BUILD_VECTOR != RHS.getOpcode())
939
3.53k
    return false;
940
158
941
158
  EVT SVT = LHS.getValueType().getScalarType();
942
1.25k
  for (unsigned i = 0, e = LHS.getNumOperands(); 
i != e1.25k
;
++i1.09k
) {
943
1.11k
    auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
944
1.11k
    auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
945
1.11k
    if (
!LHSCst || 1.11k
!RHSCst1.11k
)
946
0
      return false;
947
1.11k
    
if (1.11k
LHSCst->getValueType(0) != SVT ||
948
1.11k
        LHSCst->getValueType(0) != RHSCst->getValueType(0))
949
0
      return false;
950
1.11k
    
if (1.11k
!Match(LHSCst, RHSCst)1.11k
)
951
19
      return false;
952
1.11k
  }
953
139
  return true;
954
16.4k
}
955
956
SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
957
11.9M
                                    SDValue N1) {
958
11.9M
  EVT VT = N0.getValueType();
959
11.9M
  if (
N0.getOpcode() == Opc11.9M
) {
960
1.21M
    if (SDNode *
L1.21M
= DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
961
590k
      if (SDNode *
R590k
= DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
962
499k
        // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
963
499k
        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
964
499k
          return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
965
70
        return SDValue();
966
70
      }
967
91.5k
      
if (91.5k
N0.hasOneUse()91.5k
) {
968
63.6k
        // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
969
63.6k
        // use
970
63.6k
        SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
971
63.6k
        if (!OpNode.getNode())
972
0
          return SDValue();
973
63.6k
        AddToWorklist(OpNode.getNode());
974
63.6k
        return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
975
63.6k
      }
976
590k
    }
977
1.21M
  }
978
11.4M
979
11.4M
  
if (11.4M
N1.getOpcode() == Opc11.4M
) {
980
66.4k
    if (SDNode *
R66.4k
= DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
981
34.1k
      if (SDNode *
L34.1k
= DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
982
0
        // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
983
0
        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
984
0
          return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
985
0
        return SDValue();
986
0
      }
987
34.1k
      
if (34.1k
N1.hasOneUse()34.1k
) {
988
24.2k
        // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
989
24.2k
        // use
990
24.2k
        SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
991
24.2k
        if (!OpNode.getNode())
992
0
          return SDValue();
993
24.2k
        AddToWorklist(OpNode.getNode());
994
24.2k
        return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
995
24.2k
      }
996
34.1k
    }
997
66.4k
  }
998
11.3M
999
11.3M
  return SDValue();
1000
11.3M
}
1001
1002
SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1003
2.20M
                               bool AddTo) {
1004
2.20M
  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1005
2.20M
  ++NodesCombined;
1006
2.20M
  DEBUG(dbgs() << "\nReplacing.1 ";
1007
2.20M
        N->dump(&DAG);
1008
2.20M
        dbgs() << "\nWith: ";
1009
2.20M
        To[0].getNode()->dump(&DAG);
1010
2.20M
        dbgs() << " and " << NumTo-1 << " other values\n");
1011
4.94M
  for (unsigned i = 0, e = NumTo; 
i != e4.94M
;
++i2.73M
)
1012
2.20M
    assert((!To[i].getNode() ||
1013
2.20M
            N->getValueType(i) == To[i].getValueType()) &&
1014
2.20M
           "Cannot combine value to value of different type!");
1015
2.20M
1016
2.20M
  WorklistRemover DeadNodes(*this);
1017
2.20M
  DAG.ReplaceAllUsesWith(N, To);
1018
2.20M
  if (
AddTo2.20M
) {
1019
1.26M
    // Push the new nodes and any users onto the worklist
1020
3.05M
    for (unsigned i = 0, e = NumTo; 
i != e3.05M
;
++i1.79M
) {
1021
1.79M
      if (
To[i].getNode()1.79M
) {
1022
1.79M
        AddToWorklist(To[i].getNode());
1023
1.79M
        AddUsersToWorklist(To[i].getNode());
1024
1.79M
      }
1025
1.79M
    }
1026
1.26M
  }
1027
2.20M
1028
2.20M
  // Finally, if the node is now dead, remove it from the graph.  The node
1029
2.20M
  // may not be dead if the replacement process recursively simplified to
1030
2.20M
  // something else needing this node.
1031
2.20M
  if (N->use_empty())
1032
2.20M
    deleteAndRecombine(N);
1033
2.20M
  return SDValue(N, 0);
1034
2.20M
}
1035
1036
void DAGCombiner::
1037
458k
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1038
458k
  // Replace all uses.  If any nodes become isomorphic to other nodes and
1039
458k
  // are deleted, make sure to remove them from our worklist.
1040
458k
  WorklistRemover DeadNodes(*this);
1041
458k
  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1042
458k
1043
458k
  // Push the new node and any (possibly new) users onto the worklist.
1044
458k
  AddToWorklist(TLO.New.getNode());
1045
458k
  AddUsersToWorklist(TLO.New.getNode());
1046
458k
1047
458k
  // Finally, if the node is now dead, remove it from the graph.  The node
1048
458k
  // may not be dead if the replacement process recursively simplified to
1049
458k
  // something else needing this node.
1050
458k
  if (TLO.Old.getNode()->use_empty())
1051
452k
    deleteAndRecombine(TLO.Old.getNode());
1052
458k
}
1053
1054
/// Check the specified integer node value to see if it can be simplified or if
1055
/// things it uses can be simplified by bit propagation. If so, return true.
1056
12.7M
bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1057
12.7M
  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1058
12.7M
  KnownBits Known;
1059
12.7M
  if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1060
12.3M
    return false;
1061
458k
1062
458k
  // Revisit the node.
1063
458k
  AddToWorklist(Op.getNode());
1064
458k
1065
458k
  // Replace the old value with the new one.
1066
458k
  ++NodesCombined;
1067
458k
  DEBUG(dbgs() << "\nReplacing.2 ";
1068
12.7M
        TLO.Old.getNode()->dump(&DAG);
1069
12.7M
        dbgs() << "\nWith: ";
1070
12.7M
        TLO.New.getNode()->dump(&DAG);
1071
12.7M
        dbgs() << '\n');
1072
12.7M
1073
12.7M
  CommitTargetLoweringOpt(TLO);
1074
12.7M
  return true;
1075
12.7M
}
1076
1077
50
void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1078
50
  SDLoc DL(Load);
1079
50
  EVT VT = Load->getValueType(0);
1080
50
  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1081
50
1082
50
  DEBUG(dbgs() << "\nReplacing.9 ";
1083
50
        Load->dump(&DAG);
1084
50
        dbgs() << "\nWith: ";
1085
50
        Trunc.getNode()->dump(&DAG);
1086
50
        dbgs() << '\n');
1087
50
  WorklistRemover DeadNodes(*this);
1088
50
  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1089
50
  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1090
50
  deleteAndRecombine(Load);
1091
50
  AddToWorklist(Trunc.getNode());
1092
50
}
1093
1094
3.35k
SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1095
3.35k
  Replace = false;
1096
3.35k
  SDLoc DL(Op);
1097
3.35k
  if (
ISD::isUNINDEXEDLoad(Op.getNode())3.35k
) {
1098
103
    LoadSDNode *LD = cast<LoadSDNode>(Op);
1099
103
    EVT MemVT = LD->getMemoryVT();
1100
103
    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1101
73
      ? 
(TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? 73
ISD::ZEXTLOAD73
1102
73
                                                       : ISD::EXTLOAD)
1103
30
      : LD->getExtensionType();
1104
103
    Replace = true;
1105
103
    return DAG.getExtLoad(ExtType, DL, PVT,
1106
103
                          LD->getChain(), LD->getBasePtr(),
1107
103
                          MemVT, LD->getMemOperand());
1108
103
  }
1109
3.25k
1110
3.25k
  unsigned Opc = Op.getOpcode();
1111
3.25k
  switch (Opc) {
1112
2.39k
  default: break;
1113
9
  case ISD::AssertSext:
1114
9
    if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1115
9
      return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1116
0
    break;
1117
33
  case ISD::AssertZext:
1118
33
    if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1119
33
      return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1120
0
    break;
1121
817
  case ISD::Constant: {
1122
817
    unsigned ExtOpc =
1123
817
      Op.getValueType().isByteSized() ? 
ISD::SIGN_EXTEND817
:
ISD::ZERO_EXTEND0
;
1124
817
    return DAG.getNode(ExtOpc, DL, PVT, Op);
1125
2.39k
  }
1126
2.39k
  }
1127
2.39k
1128
2.39k
  
if (2.39k
!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)2.39k
)
1129
0
    return SDValue();
1130
2.39k
  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1131
2.39k
}
1132
1133
9
SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1134
9
  if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1135
0
    return SDValue();
1136
9
  EVT OldVT = Op.getValueType();
1137
9
  SDLoc DL(Op);
1138
9
  bool Replace = false;
1139
9
  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1140
9
  if (!NewOp.getNode())
1141
0
    return SDValue();
1142
9
  AddToWorklist(NewOp.getNode());
1143
9
1144
9
  if (Replace)
1145
0
    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1146
9
  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1147
9
                     DAG.getValueType(OldVT));
1148
9
}
1149
1150
770
SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1151
770
  EVT OldVT = Op.getValueType();
1152
770
  SDLoc DL(Op);
1153
770
  bool Replace = false;
1154
770
  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1155
770
  if (!NewOp.getNode())
1156
0
    return SDValue();
1157
770
  AddToWorklist(NewOp.getNode());
1158
770
1159
770
  if (Replace)
1160
5
    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1161
770
  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1162
770
}
1163
1164
/// Promote the specified integer binary operation if the target indicates it is
1165
/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1166
/// i32 since i16 instructions are longer.
1167
10.3M
SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1168
10.3M
  if (!LegalOperations)
1169
5.85M
    return SDValue();
1170
4.48M
1171
4.48M
  EVT VT = Op.getValueType();
1172
4.48M
  if (
VT.isVector() || 4.48M
!VT.isInteger()4.33M
)
1173
145k
    return SDValue();
1174
4.33M
1175
4.33M
  // If operation type is 'undesirable', e.g. i16 on x86, consider
1176
4.33M
  // promoting it.
1177
4.33M
  unsigned Opc = Op.getOpcode();
1178
4.33M
  if (TLI.isTypeDesirableForOp(Opc, VT))
1179
4.33M
    return SDValue();
1180
1.41k
1181
1.41k
  EVT PVT = VT;
1182
1.41k
  // Consult target whether it is a good idea to promote this operation and
1183
1.41k
  // what's the right type to promote it to.
1184
1.41k
  if (
TLI.IsDesirableToPromoteOp(Op, PVT)1.41k
) {
1185
1.24k
    assert(PVT != VT && "Don't know what type to promote to!");
1186
1.24k
1187
1.24k
    DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1188
1.24k
1189
1.24k
    bool Replace0 = false;
1190
1.24k
    SDValue N0 = Op.getOperand(0);
1191
1.24k
    SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1192
1.24k
1193
1.24k
    bool Replace1 = false;
1194
1.24k
    SDValue N1 = Op.getOperand(1);
1195
1.24k
    SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1196
1.24k
    SDLoc DL(Op);
1197
1.24k
1198
1.24k
    SDValue RV =
1199
1.24k
        DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1200
1.24k
1201
1.24k
    // We are always replacing N0/N1's use in N and only need
1202
1.24k
    // additional replacements if there are additional uses.
1203
1.24k
    Replace0 &= !N0->hasOneUse();
1204
1.23k
    Replace1 &= (N0 != N1) && !N1->hasOneUse();
1205
1.24k
1206
1.24k
    // Combine Op here so it is presreved past replacements.
1207
1.24k
    CombineTo(Op.getNode(), RV);
1208
1.24k
1209
1.24k
    // If operands have a use ordering, make sur we deal with
1210
1.24k
    // predecessor first.
1211
1.24k
    if (
Replace0 && 1.24k
Replace117
&&
N0.getNode()->isPredecessorOf(N1.getNode())0
) {
1212
0
      std::swap(N0, N1);
1213
0
      std::swap(NN0, NN1);
1214
0
    }
1215
1.24k
1216
1.24k
    if (
Replace01.24k
) {
1217
17
      AddToWorklist(NN0.getNode());
1218
17
      ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1219
17
    }
1220
1.24k
    if (
Replace11.24k
) {
1221
11
      AddToWorklist(NN1.getNode());
1222
11
      ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1223
11
    }
1224
1.24k
    return Op;
1225
1.24k
  }
1226
170
  return SDValue();
1227
170
}
1228
1229
/// Promote the specified integer shift operation if the target indicates it is
1230
/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1231
/// i32 since i16 instructions are longer.
1232
1.05M
SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1233
1.05M
  if (!LegalOperations)
1234
572k
    return SDValue();
1235
479k
1236
479k
  EVT VT = Op.getValueType();
1237
479k
  if (
VT.isVector() || 479k
!VT.isInteger()477k
)
1238
1.63k
    return SDValue();
1239
477k
1240
477k
  // If operation type is 'undesirable', e.g. i16 on x86, consider
1241
477k
  // promoting it.
1242
477k
  unsigned Opc = Op.getOpcode();
1243
477k
  if (TLI.isTypeDesirableForOp(Opc, VT))
1244
474k
    return SDValue();
1245
2.87k
1246
2.87k
  EVT PVT = VT;
1247
2.87k
  // Consult target whether it is a good idea to promote this operation and
1248
2.87k
  // what's the right type to promote it to.
1249
2.87k
  if (
TLI.IsDesirableToPromoteOp(Op, PVT)2.87k
) {
1250
827
    assert(PVT != VT && "Don't know what type to promote to!");
1251
827
1252
827
    DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1253
827
1254
827
    bool Replace = false;
1255
827
    SDValue N0 = Op.getOperand(0);
1256
827
    SDValue N1 = Op.getOperand(1);
1257
827
    if (Opc == ISD::SRA)
1258
0
      N0 = SExtPromoteOperand(N0, PVT);
1259
827
    else 
if (827
Opc == ISD::SRL827
)
1260
737
      N0 = ZExtPromoteOperand(N0, PVT);
1261
827
    else
1262
90
      N0 = PromoteOperand(N0, PVT, Replace);
1263
827
1264
827
    if (!N0.getNode())
1265
0
      return SDValue();
1266
827
1267
827
    SDLoc DL(Op);
1268
827
    SDValue RV =
1269
827
        DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1270
827
1271
827
    AddToWorklist(N0.getNode());
1272
827
    if (Replace)
1273
17
      ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1274
827
1275
827
    // Deal with Op being deleted.
1276
827
    if (
Op && 827
Op.getOpcode() != ISD::DELETED_NODE827
)
1277
826
      return RV;
1278
2.05k
  }
1279
2.05k
  return SDValue();
1280
2.05k
}
1281
1282
787k
SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1283
787k
  if (!LegalOperations)
1284
466k
    return SDValue();
1285
320k
1286
320k
  EVT VT = Op.getValueType();
1287
320k
  if (
VT.isVector() || 320k
!VT.isInteger()270k
)
1288
50.2k
    return SDValue();
1289
270k
1290
270k
  // If operation type is 'undesirable', e.g. i16 on x86, consider
1291
270k
  // promoting it.
1292
270k
  unsigned Opc = Op.getOpcode();
1293
270k
  if (TLI.isTypeDesirableForOp(Opc, VT))
1294
270k
    return SDValue();
1295
108
1296
108
  EVT PVT = VT;
1297
108
  // Consult target whether it is a good idea to promote this operation and
1298
108
  // what's the right type to promote it to.
1299
108
  if (
TLI.IsDesirableToPromoteOp(Op, PVT)108
) {
1300
104
    assert(PVT != VT && "Don't know what type to promote to!");
1301
104
    // fold (aext (aext x)) -> (aext x)
1302
104
    // fold (aext (zext x)) -> (zext x)
1303
104
    // fold (aext (sext x)) -> (sext x)
1304
104
    DEBUG(dbgs() << "\nPromoting ";
1305
104
          Op.getNode()->dump(&DAG));
1306
104
    return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1307
104
  }
1308
4
  return SDValue();
1309
4
}
1310
1311
5.67M
bool DAGCombiner::PromoteLoad(SDValue Op) {
1312
5.67M
  if (!LegalOperations)
1313
3.40M
    return false;
1314
2.26M
1315
2.26M
  
if (2.26M
!ISD::isUNINDEXEDLoad(Op.getNode())2.26M
)
1316
10.3k
    return false;
1317
2.25M
1318
2.25M
  EVT VT = Op.getValueType();
1319
2.25M
  if (
VT.isVector() || 2.25M
!VT.isInteger()2.06M
)
1320
300k
    return false;
1321
1.95M
1322
1.95M
  // If operation type is 'undesirable', e.g. i16 on x86, consider
1323
1.95M
  // promoting it.
1324
1.95M
  unsigned Opc = Op.getOpcode();
1325
1.95M
  if (TLI.isTypeDesirableForOp(Opc, VT))
1326
1.95M
    return false;
1327
1.42k
1328
1.42k
  EVT PVT = VT;
1329
1.42k
  // Consult target whether it is a good idea to promote this operation and
1330
1.42k
  // what's the right type to promote it to.
1331
1.42k
  if (
TLI.IsDesirableToPromoteOp(Op, PVT)1.42k
) {
1332
0
    assert(PVT != VT && "Don't know what type to promote to!");
1333
0
1334
0
    SDLoc DL(Op);
1335
0
    SDNode *N = Op.getNode();
1336
0
    LoadSDNode *LD = cast<LoadSDNode>(N);
1337
0
    EVT MemVT = LD->getMemoryVT();
1338
0
    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1339
0
      ? 
(TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? 0
ISD::ZEXTLOAD0
1340
0
                                                       : ISD::EXTLOAD)
1341
0
      : LD->getExtensionType();
1342
0
    SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1343
0
                                   LD->getChain(), LD->getBasePtr(),
1344
0
                                   MemVT, LD->getMemOperand());
1345
0
    SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1346
0
1347
0
    DEBUG(dbgs() << "\nPromoting ";
1348
0
          N->dump(&DAG);
1349
0
          dbgs() << "\nTo: ";
1350
0
          Result.getNode()->dump(&DAG);
1351
0
          dbgs() << '\n');
1352
0
    WorklistRemover DeadNodes(*this);
1353
0
    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1354
0
    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1355
0
    deleteAndRecombine(N);
1356
0
    AddToWorklist(Result.getNode());
1357
0
    return true;
1358
0
  }
1359
1.42k
  return false;
1360
1.42k
}
1361
1362
/// \brief Recursively delete a node which has no uses and any operands for
1363
/// which it is the only use.
1364
///
1365
/// Note that this both deletes the nodes and removes them from the worklist.
1366
/// It also adds any nodes who have had a user deleted to the worklist as they
1367
/// may now have only one use and subject to other combines.
1368
190M
bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1369
190M
  if (!N->use_empty())
1370
179M
    return false;
1371
11.8M
1372
11.8M
  SmallSetVector<SDNode *, 16> Nodes;
1373
11.8M
  Nodes.insert(N);
1374
44.9M
  do {
1375
44.9M
    N = Nodes.pop_back_val();
1376
44.9M
    if (!N)
1377
0
      continue;
1378
44.9M
1379
44.9M
    
if (44.9M
N->use_empty()44.9M
) {
1380
20.1M
      for (const SDValue &ChildN : N->op_values())
1381
33.6M
        Nodes.insert(ChildN.getNode());
1382
20.1M
1383
20.1M
      removeFromWorklist(N);
1384
20.1M
      DAG.DeleteNode(N);
1385
44.9M
    } else {
1386
24.7M
      AddToWorklist(N);
1387
24.7M
    }
1388
44.9M
  } while (!Nodes.empty());
1389
190M
  return true;
1390
190M
}
1391
1392
//===----------------------------------------------------------------------===//
1393
//  Main DAG Combiner implementation
1394
//===----------------------------------------------------------------------===//
1395
1396
7.42M
void DAGCombiner::Run(CombineLevel AtLevel) {
1397
7.42M
  // set the instance variables, so that the various visit routines may use it.
1398
7.42M
  Level = AtLevel;
1399
7.42M
  LegalOperations = Level >= AfterLegalizeVectorOps;
1400
7.42M
  LegalTypes = Level >= AfterLegalizeTypes;
1401
7.42M
1402
7.42M
  // Add all the dag nodes to the worklist.
1403
7.42M
  for (SDNode &Node : DAG.allnodes())
1404
170M
    AddToWorklist(&Node);
1405
7.42M
1406
7.42M
  // Create a dummy node (which is not added to allnodes), that adds a reference
1407
7.42M
  // to the root node, preventing it from being deleted, and tracking any
1408
7.42M
  // changes of the root.
1409
7.42M
  HandleSDNode Dummy(DAG.getRoot());
1410
7.42M
1411
7.42M
  // While the worklist isn't empty, find a node and try to combine it.
1412
193M
  while (
!WorklistMap.empty()193M
) {
1413
185M
    SDNode *N;
1414
185M
    // The Worklist holds the SDNodes in order, but it may contain null entries.
1415
195M
    do {
1416
195M
      N = Worklist.pop_back_val();
1417
195M
    } while (!N);
1418
185M
1419
185M
    bool GoodWorklistEntry = WorklistMap.erase(N);
1420
185M
    (void)GoodWorklistEntry;
1421
185M
    assert(GoodWorklistEntry &&
1422
185M
           "Found a worklist entry without a corresponding map entry!");
1423
185M
1424
185M
    // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1425
185M
    // N is deleted from the DAG, since they too may now be dead or may have a
1426
185M
    // reduced number of uses, allowing other xforms.
1427
185M
    if (recursivelyDeleteUnusedNodes(N))
1428
6.86M
      continue;
1429
179M
1430
179M
    WorklistRemover DeadNodes(*this);
1431
179M
1432
179M
    // If this combine is running after legalizing the DAG, re-legalize any
1433
179M
    // nodes pulled off the worklist.
1434
179M
    if (
Level == AfterLegalizeDAG179M
) {
1435
72.8M
      SmallSetVector<SDNode *, 16> UpdatedNodes;
1436
72.8M
      bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1437
72.8M
1438
16.6k
      for (SDNode *LN : UpdatedNodes) {
1439
16.6k
        AddToWorklist(LN);
1440
16.6k
        AddUsersToWorklist(LN);
1441
16.6k
      }
1442
72.8M
      if (!NIsValid)
1443
7.46k
        continue;
1444
179M
    }
1445
179M
1446
179M
    
DEBUG179M
(dbgs() << "\nCombining: "; N->dump(&DAG));
1447
179M
1448
179M
    // Add any operands of the new node which have not yet been combined to the
1449
179M
    // worklist as well. Because the worklist uniques things already, this
1450
179M
    // won't repeatedly process the same operand.
1451
179M
    CombinedNodes.insert(N);
1452
179M
    for (const SDValue &ChildN : N->op_values())
1453
303M
      
if (303M
!CombinedNodes.count(ChildN.getNode())303M
)
1454
276M
        AddToWorklist(ChildN.getNode());
1455
179M
1456
179M
    SDValue RV = combine(N);
1457
179M
1458
179M
    if (!RV.getNode())
1459
172M
      continue;
1460
6.87M
1461
6.87M
    ++NodesCombined;
1462
6.87M
1463
6.87M
    // If we get back the same node we passed in, rather than a new node or
1464
6.87M
    // zero, we know that the node must have defined multiple values and
1465
6.87M
    // CombineTo was used.  Since CombineTo takes care of the worklist
1466
6.87M
    // mechanics for us, we have no work to do in this case.
1467
6.87M
    if (RV.getNode() == N)
1468
1.90M
      continue;
1469
4.96M
1470
6.87M
    assert(N->getOpcode() != ISD::DELETED_NODE &&
1471
4.96M
           RV.getOpcode() != ISD::DELETED_NODE &&
1472
4.96M
           "Node was deleted but visit returned new node!");
1473
4.96M
1474
4.96M
    DEBUG(dbgs() << " ... into: ";
1475
4.96M
          RV.getNode()->dump(&DAG));
1476
4.96M
1477
4.96M
    if (N->getNumValues() == RV.getNode()->getNumValues())
1478
4.89M
      DAG.ReplaceAllUsesWith(N, RV.getNode());
1479
64.2k
    else {
1480
64.2k
      assert(N->getValueType(0) == RV.getValueType() &&
1481
64.2k
             N->getNumValues() == 1 && "Type mismatch");
1482
64.2k
      DAG.ReplaceAllUsesWith(N, &RV);
1483
64.2k
    }
1484
185M
1485
185M
    // Push the new node and any users onto the worklist
1486
185M
    AddToWorklist(RV.getNode());
1487
185M
    AddUsersToWorklist(RV.getNode());
1488
185M
1489
185M
    // Finally, if the node is now dead, remove it from the graph.  The node
1490
185M
    // may not be dead if the replacement process recursively simplified to
1491
185M
    // something else needing this node. This will also take care of adding any
1492
185M
    // operands which have lost a user to the worklist.
1493
185M
    recursivelyDeleteUnusedNodes(N);
1494
185M
  }
1495
7.42M
1496
7.42M
  // If the root changed (e.g. it was a dead load, update the root).
1497
7.42M
  DAG.setRoot(Dummy.getValue());
1498
7.42M
  DAG.RemoveDeadNodes();
1499
7.42M
}
1500
1501
179M
SDValue DAGCombiner::visit(SDNode *N) {
1502
179M
  switch (N->getOpcode()) {
1503
133M
  default: break;
1504
5.93M
  case ISD::TokenFactor:        return visitTokenFactor(N);
1505
54.0k
  case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1506
9.28M
  case ISD::ADD:                return visitADD(N);
1507
419k
  case ISD::SUB:                return visitSUB(N);
1508
302
  case ISD::ADDC:               return visitADDC(N);
1509
4.95k
  case ISD::UADDO:              return visitUADDO(N);
1510
1.18k
  case ISD::SUBC:               return visitSUBC(N);
1511
2.20k
  case ISD::USUBO:              return visitUSUBO(N);
1512
333
  case ISD::ADDE:               return visitADDE(N);
1513
15.9k
  case ISD::ADDCARRY:           return visitADDCARRY(N);
1514
145
  case ISD::SUBE:               return visitSUBE(N);
1515
972
  case ISD::SUBCARRY:           return visitSUBCARRY(N);
1516
478k
  case ISD::MUL:                return visitMUL(N);
1517
18.9k
  case ISD::SDIV:               return visitSDIV(N);
1518
22.7k
  case ISD::UDIV:               return visitUDIV(N);
1519
8.48k
  case ISD::SREM:
1520
8.48k
  case ISD::UREM:               return visitREM(N);
1521
15.8k
  case ISD::MULHU:              return visitMULHU(N);
1522
1.87k
  case ISD::MULHS:              return visitMULHS(N);
1523
647
  case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1524
2.96k
  case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1525
82
  case ISD::SMULO:              return visitSMULO(N);
1526
2.49k
  case ISD::UMULO:              return visitUMULO(N);
1527
12.6k
  case ISD::SMIN:
1528
12.6k
  case ISD::SMAX:
1529
12.6k
  case ISD::UMIN:
1530
12.6k
  case ISD::UMAX:               return visitIMINMAX(N);
1531
1.11M
  case ISD::AND:                return visitAND(N);
1532
255k
  case ISD::OR:                 return visitOR(N);
1533
874k
  case ISD::XOR:                return visitXOR(N);
1534
740k
  case ISD::SHL:                return visitSHL(N);
1535
86.4k
  case ISD::SRA:                return visitSRA(N);
1536
303k
  case ISD::SRL:                return visitSRL(N);
1537
5.68k
  case ISD::ROTR:
1538
5.68k
  case ISD::ROTL:               return visitRotate(N);
1539
1.11k
  case ISD::ABS:                return visitABS(N);
1540
2.66k
  case ISD::BSWAP:              return visitBSWAP(N);
1541
634
  case ISD::BITREVERSE:         return visitBITREVERSE(N);
1542
5.57k
  case ISD::CTLZ:               return visitCTLZ(N);
1543
4.14k
  case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1544
728
  case ISD::CTTZ:               return visitCTTZ(N);
1545
1.00k
  case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1546
1.18k
  case ISD::CTPOP:              return visitCTPOP(N);
1547
177k
  case ISD::SELECT:             return visitSELECT(N);
1548
30.3k
  case ISD::VSELECT:            return visitVSELECT(N);
1549
195k
  case ISD::SELECT_CC:          return visitSELECT_CC(N);
1550
759k
  case ISD::SETCC:              return visitSETCC(N);
1551
1.05k
  case ISD::SETCCE:             return visitSETCCE(N);
1552
648
  case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1553
518k
  case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1554
388k
  case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1555
168k
  case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1556
510k
  case ISD::AssertZext:         return visitAssertZext(N);
1557
99.7k
  case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1558
3.09k
  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1559
3.91k
  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1560
849k
  case ISD::TRUNCATE:           return visitTRUNCATE(N);
1561
394k
  case ISD::BITCAST:            return visitBITCAST(N);
1562
25.6k
  case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1563
142k
  case ISD::FADD:               return visitFADD(N);
1564
29.4k
  case ISD::FSUB:               return visitFSUB(N);
1565
123k
  case ISD::FMUL:               return visitFMUL(N);
1566
6.34k
  case ISD::FMA:                return visitFMA(N);
1567
121k
  case ISD::FDIV:               return visitFDIV(N);
1568
264
  case ISD::FREM:               return visitFREM(N);
1569
2.78k
  case ISD::FSQRT:              return visitFSQRT(N);
1570
3.42k
  case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1571
127k
  case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1572
92.9k
  case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1573
12.2k
  case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1574
17.0k
  case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1575
23.5k
  case ISD::FP_ROUND:           return visitFP_ROUND(N);
1576
0
  case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1577
68.2k
  case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1578
12.7k
  case ISD::FNEG:               return visitFNEG(N);
1579
8.32k
  case ISD::FABS:               return visitFABS(N);
1580
1.53k
  case ISD::FFLOOR:             return visitFFLOOR(N);
1581
2.84k
  case ISD::FMINNUM:            return visitFMINNUM(N);
1582
2.59k
  case ISD::FMAXNUM:            return visitFMAXNUM(N);
1583
970
  case ISD::FCEIL:              return visitFCEIL(N);
1584
1.13k
  case ISD::FTRUNC:             return visitFTRUNC(N);
1585
2.86M
  case ISD::BRCOND:             return visitBRCOND(N);
1586
2.09M
  case ISD::BR_CC:              return visitBR_CC(N);
1587
6.04M
  case ISD::LOAD:               return visitLOAD(N);
1588
8.90M
  case ISD::STORE:              return visitSTORE(N);
1589
60.5k
  case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1590
349k
  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1591
366k
  case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1592
31.3k
  case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1593
248k
  case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1594
53.1k
  case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1595
35.7k
  case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1596
38.7k
  case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1597
642
  case ISD::MGATHER:            return visitMGATHER(N);
1598
725
  case ISD::MLOAD:              return visitMLOAD(N);
1599
282
  case ISD::MSCATTER:           return visitMSCATTER(N);
1600
369
  case ISD::MSTORE:             return visitMSTORE(N);
1601
3.38k
  case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1602
4.21k
  case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1603
133M
  }
1604
133M
  return SDValue();
1605
133M
}
1606
1607
179M
SDValue DAGCombiner::combine(SDNode *N) {
1608
179M
  SDValue RV = visit(N);
1609
179M
1610
179M
  // If nothing happened, try a target-specific DAG combine.
1611
179M
  if (
!RV.getNode()179M
) {
1612
172M
    assert(N->getOpcode() != ISD::DELETED_NODE &&
1613
172M
           "Node was deleted but visit returned NULL!");
1614
172M
1615
172M
    if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1616
172M
        
TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())161M
) {
1617
32.3M
1618
32.3M
      // Expose the DAG combiner to the target combiner impls.
1619
32.3M
      TargetLowering::DAGCombinerInfo
1620
32.3M
        DagCombineInfo(DAG, Level, false, this);
1621
32.3M
1622
32.3M
      RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1623
32.3M
    }
1624
172M
  }
1625
179M
1626
179M
  // If nothing happened still, try promoting the operation.
1627
179M
  if (
!RV.getNode()179M
) {
1628
172M
    switch (N->getOpcode()) {
1629
154M
    default: break;
1630
10.3M
    case ISD::ADD:
1631
10.3M
    case ISD::SUB:
1632
10.3M
    case ISD::MUL:
1633
10.3M
    case ISD::AND:
1634
10.3M
    case ISD::OR:
1635
10.3M
    case ISD::XOR:
1636
10.3M
      RV = PromoteIntBinOp(SDValue(N, 0));
1637
10.3M
      break;
1638
1.05M
    case ISD::SHL:
1639
1.05M
    case ISD::SRA:
1640
1.05M
    case ISD::SRL:
1641
1.05M
      RV = PromoteIntShiftOp(SDValue(N, 0));
1642
1.05M
      break;
1643
787k
    case ISD::SIGN_EXTEND:
1644
787k
    case ISD::ZERO_EXTEND:
1645
787k
    case ISD::ANY_EXTEND:
1646
787k
      RV = PromoteExtend(SDValue(N, 0));
1647
787k
      break;
1648
5.67M
    case ISD::LOAD:
1649
5.67M
      if (PromoteLoad(SDValue(N, 0)))
1650
0
        RV = SDValue(N, 0);
1651
10.3M
      break;
1652
179M
    }
1653
179M
  }
1654
179M
1655
179M
  // If N is a commutative binary node, try eliminate it if the commuted
1656
179M
  // version is already present in the DAG.
1657
179M
  
if (179M
!RV.getNode() && 179M
TLI.isCommutativeBinOp(N->getOpcode())172M
&&
1658
179M
      
N->getNumValues() == 110.2M
) {
1659
10.2M
    SDValue N0 = N->getOperand(0);
1660
10.2M
    SDValue N1 = N->getOperand(1);
1661
10.2M
1662
10.2M
    // Constant operands are canonicalized to RHS.
1663
10.2M
    if (
N0 != N1 && 10.2M
(isa<ConstantSDNode>(N0) || 10.1M
!isa<ConstantSDNode>(N1)10.1M
)) {
1664
2.37M
      SDValue Ops[] = {N1, N0};
1665
2.37M
      SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1666
2.37M
                                            N->getFlags());
1667
2.37M
      if (CSENode)
1668
378
        return SDValue(CSENode, 0);
1669
179M
    }
1670
10.2M
  }
1671
179M
1672
179M
  return RV;
1673
179M
}
1674
1675
/// Given a node, return its input chain if it has one, otherwise return a null
1676
/// sd operand.
1677
7.43M
static SDValue getInputChainForNode(SDNode *N) {
1678
7.43M
  if (unsigned 
NumOps7.43M
= N->getNumOperands()) {
1679
7.43M
    if (N->getOperand(0).getValueType() == MVT::Other)
1680
7.41M
      return N->getOperand(0);
1681
24.2k
    
if (24.2k
N->getOperand(NumOps-1).getValueType() == MVT::Other24.2k
)
1682
24.2k
      return N->getOperand(NumOps-1);
1683
0
    
for (unsigned i = 1; 0
i < NumOps-10
;
++i0
)
1684
0
      
if (0
N->getOperand(i).getValueType() == MVT::Other0
)
1685
0
        return N->getOperand(i);
1686
7.43M
  }
1687
1.16k
  return SDValue();
1688
7.43M
}
1689
1690
5.93M
SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1691
5.93M
  // If N has two operands, where one has an input chain equal to the other,
1692
5.93M
  // the 'other' chain is redundant.
1693
5.93M
  if (
N->getNumOperands() == 25.93M
) {
1694
3.74M
    if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1695
55.7k
      return N->getOperand(0);
1696
3.69M
    
if (3.69M
getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)3.69M
)
1697
39.2k
      return N->getOperand(1);
1698
5.83M
  }
1699
5.83M
1700
5.83M
  SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1701
5.83M
  SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1702
5.83M
  SmallPtrSet<SDNode*, 16> SeenOps;
1703
5.83M
  bool Changed = false;             // If we should replace this token factor.
1704
5.83M
1705
5.83M
  // Start out with this token factor.
1706
5.83M
  TFs.push_back(N);
1707
5.83M
1708
5.83M
  // Iterate through token factors.  The TFs grows when new token factors are
1709
5.83M
  // encountered.
1710
12.5M
  for (unsigned i = 0; 
i < TFs.size()12.5M
;
++i6.69M
) {
1711
6.69M
    SDNode *TF = TFs[i];
1712
6.69M
1713
6.69M
    // Check each of the operands.
1714
24.6M
    for (const SDValue &Op : TF->op_values()) {
1715
24.6M
      switch (Op.getOpcode()) {
1716
6.10k
      case ISD::EntryToken:
1717
6.10k
        // Entry tokens don't need to be added to the list. They are
1718
6.10k
        // redundant.
1719
6.10k
        Changed = true;
1720
6.10k
        break;
1721
24.6M
1722
1.16M
      case ISD::TokenFactor:
1723
1.16M
        if (
Op.hasOneUse() && 1.16M
!is_contained(TFs, Op.getNode())852k
) {
1724
852k
          // Queue up for processing.
1725
852k
          TFs.push_back(Op.getNode());
1726
852k
          // Clean up in case the token factor is removed.
1727
852k
          AddToWorklist(Op.getNode());
1728
852k
          Changed = true;
1729
852k
          break;
1730
852k
        }
1731
315k
        
LLVM_FALLTHROUGH315k
;
1732
315k
1733
23.7M
      default:
1734
23.7M
        // Only add if it isn't already in the list.
1735
23.7M
        if (SeenOps.insert(Op.getNode()).second)
1736
23.6M
          Ops.push_back(Op);
1737
23.7M
        else
1738
104k
          Changed = true;
1739
1.16M
        break;
1740
24.6M
      }
1741
24.6M
    }
1742
6.69M
  }
1743
5.83M
1744
5.83M
  // Remove Nodes that are chained to another node in the list. Do so
1745
5.83M
  // by walking up chains breath-first stopping when we've seen
1746
5.83M
  // another operand. In general we must climb to the EntryNode, but we can exit
1747
5.83M
  // early if we find all remaining work is associated with just one operand as
1748
5.83M
  // no further pruning is possible.
1749
5.83M
1750
5.83M
  // List of nodes to search through and original Ops from which they originate.
1751
5.83M
  SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1752
5.83M
  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1753
5.83M
  SmallPtrSet<SDNode *, 16> SeenChains;
1754
5.83M
  bool DidPruneOps = false;
1755
5.83M
1756
5.83M
  unsigned NumLeftToConsider = 0;
1757
23.6M
  for (const SDValue &Op : Ops) {
1758
23.6M
    Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1759
23.6M
    OpWorkCount.push_back(1);
1760
23.6M
  }
1761
5.83M
1762
32.7M
  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1763
32.7M
    // If this is an Op, we can remove the op from the list. Remark any
1764
32.7M
    // search associated with it as from the current OpNumber.
1765
32.7M
    if (
SeenOps.count(Op) != 032.7M
) {
1766
513k
      Changed = true;
1767
513k
      DidPruneOps = true;
1768
513k
      unsigned OrigOpNumber = 0;
1769
28.1M
      while (
OrigOpNumber < Ops.size() && 28.1M
Ops[OrigOpNumber].getNode() != Op28.1M
)
1770
27.6M
        OrigOpNumber++;
1771
513k
      assert((OrigOpNumber != Ops.size()) &&
1772
513k
             "expected to find TokenFactor Operand");
1773
513k
      // Re-mark worklist from OrigOpNumber to OpNumber
1774
22.6M
      for (unsigned i = CurIdx + 1; 
i < Worklist.size()22.6M
;
++i22.1M
) {
1775
22.1M
        if (
Worklist[i].second == OrigOpNumber22.1M
) {
1776
206k
          Worklist[i].second = OpNumber;
1777
206k
        }
1778
22.1M
      }
1779
513k
      OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1780
513k
      OpWorkCount[OrigOpNumber] = 0;
1781
513k
      NumLeftToConsider--;
1782
513k
    }
1783
32.7M
    // Add if it's a new chain
1784
32.7M
    if (
SeenChains.insert(Op).second32.7M
) {
1785
14.0M
      OpWorkCount[OpNumber]++;
1786
14.0M
      Worklist.push_back(std::make_pair(Op, OpNumber));
1787
14.0M
    }
1788
32.7M
  };
1789
5.83M
1790
38.6M
  for (unsigned i = 0; 
i < Worklist.size() && 38.6M
i < 102436.7M
;
++i32.7M
) {
1791
36.7M
    // We need at least be consider at least 2 Ops to prune.
1792
36.7M
    if (NumLeftToConsider <= 1)
1793
4.00M
      break;
1794
32.7M
    auto CurNode = Worklist[i].first;
1795
32.7M
    auto CurOpNumber = Worklist[i].second;
1796
32.7M
    assert((OpWorkCount[CurOpNumber] > 0) &&
1797
32.7M
           "Node should not appear in worklist");
1798
32.7M
    switch (CurNode->getOpcode()) {
1799
1.84M
    case ISD::EntryToken:
1800
1.84M
      // Hitting EntryToken is the only way for the search to terminate without
1801
1.84M
      // hitting
1802
1.84M
      // another operand's search. Prevent us from marking this operand
1803
1.84M
      // considered.
1804
1.84M
      NumLeftToConsider++;
1805
1.84M
      break;
1806
1.26M
    case ISD::TokenFactor:
1807
1.26M
      for (const SDValue &Op : CurNode->op_values())
1808
4.95M
        AddToWorklist(i, Op.getNode(), CurOpNumber);
1809
1.26M
      break;
1810
7.37M
    case ISD::CopyFromReg:
1811
7.37M
    case ISD::CopyToReg:
1812
7.37M
      AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1813
7.37M
      break;
1814
22.2M
    default:
1815
22.2M
      if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1816
20.4M
        AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1817
7.37M
      break;
1818
32.7M
    }
1819
32.7M
    OpWorkCount[CurOpNumber]--;
1820
32.7M
    if (OpWorkCount[CurOpNumber] == 0)
1821
19.3M
      NumLeftToConsider--;
1822
36.7M
  }
1823
5.83M
1824
5.83M
  // If we've changed things around then replace token factor.
1825
5.83M
  
if (5.83M
Changed5.83M
) {
1826
822k
    SDValue Result;
1827
822k
    if (
Ops.empty()822k
) {
1828
315
      // The entry token is the only possible outcome.
1829
315
      Result = DAG.getEntryNode();
1830
822k
    } else {
1831
822k
      if (
DidPruneOps822k
) {
1832
173k
        SmallVector<SDValue, 8> PrunedOps;
1833
173k
        //
1834
1.12M
        for (const SDValue &Op : Ops) {
1835
1.12M
          if (SeenChains.count(Op.getNode()) == 0)
1836
867k
            PrunedOps.push_back(Op);
1837
1.12M
        }
1838
173k
        Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1839
822k
      } else {
1840
649k
        Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1841
649k
      }
1842
822k
    }
1843
822k
    return Result;
1844
822k
  }
1845
5.01M
  return SDValue();
1846
5.01M
}
1847
1848
/// MERGE_VALUES can always be eliminated.
1849
54.0k
SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1850
54.0k
  WorklistRemover DeadNodes(*this);
1851
54.0k
  // Replacing results may cause a different MERGE_VALUES to suddenly
1852
54.0k
  // be CSE'd with N, and carry its uses with it. Iterate until no
1853
54.0k
  // uses remain, to ensure that the node can be safely deleted.
1854
54.0k
  // First add the users of this node to the work list so that they
1855
54.0k
  // can be tried again once they have new operands.
1856
54.0k
  AddUsersToWorklist(N);
1857
54.0k
  do {
1858
166k
    for (unsigned i = 0, e = N->getNumOperands(); 
i != e166k
;
++i112k
)
1859
112k
      DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1860
54.0k
  } while (!N->use_empty());
1861
54.0k
  deleteAndRecombine(N);
1862
54.0k
  return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1863
54.0k
}
1864
1865
/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1866
/// ConstantSDNode pointer else nullptr.
1867
4.74M
static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1868
4.74M
  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1869
4.74M
  return Const != nullptr && 
!Const->isOpaque()874k
?
Const874k
:
nullptr3.87M
;
1870
4.74M
}
1871
1872
14.0M
SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1873
14.0M
  auto BinOpcode = BO->getOpcode();
1874
14.0M
  assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1875
14.0M
          BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1876
14.0M
          BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1877
14.0M
          BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1878
14.0M
          BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1879
14.0M
          BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1880
14.0M
          BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1881
14.0M
          BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1882
14.0M
          BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1883
14.0M
         "Unexpected binary operator");
1884
14.0M
1885
14.0M
  // Bail out if any constants are opaque because we can't constant fold those.
1886
14.0M
  SDValue C1 = BO->getOperand(1);
1887
14.0M
  if (!isConstantOrConstantVector(C1, true) &&
1888
3.16M
      !isConstantFPBuildVectorOrConstantFP(C1))
1889
3.06M
    return SDValue();
1890
10.9M
1891
10.9M
  // Don't do this unless the old select is going away. We want to eliminate the
1892
10.9M
  // binary operator, not replace a binop with a select.
1893
10.9M
  // TODO: Handle ISD::SELECT_CC.
1894
10.9M
  SDValue Sel = BO->getOperand(0);
1895
10.9M
  if (
Sel.getOpcode() != ISD::SELECT || 10.9M
!Sel.hasOneUse()19.0k
)
1896
10.9M
    return SDValue();
1897
5.75k
1898
5.75k
  SDValue CT = Sel.getOperand(1);
1899
5.75k
  if (!isConstantOrConstantVector(CT, true) &&
1900
5.43k
      !isConstantFPBuildVectorOrConstantFP(CT))
1901
5.42k
    return SDValue();
1902
332
1903
332
  SDValue CF = Sel.getOperand(2);
1904
332
  if (!isConstantOrConstantVector(CF, true) &&
1905
129
      !isConstantFPBuildVectorOrConstantFP(CF))
1906
115
    return SDValue();
1907
217
1908
217
  // We have a select-of-constants followed by a binary operator with a
1909
217
  // constant. Eliminate the binop by pulling the constant math into the select.
1910
217
  // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
1911
217
  EVT VT = Sel.getValueType();
1912
217
  SDLoc DL(Sel);
1913
217
  SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
1914
217
  assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
1915
217
          isConstantFPBuildVectorOrConstantFP(NewCT)) &&
1916
217
         "Failed to constant fold a binop with constant operands");
1917
217
1918
217
  SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
1919
217
  assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
1920
217
          isConstantFPBuildVectorOrConstantFP(NewCF)) &&
1921
217
         "Failed to constant fold a binop with constant operands");
1922
217
1923
217
  return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1924
217
}
1925
1926
9.28M
SDValue DAGCombiner::visitADD(SDNode *N) {
1927
9.28M
  SDValue N0 = N->getOperand(0);
1928
9.28M
  SDValue N1 = N->getOperand(1);
1929
9.28M
  EVT VT = N0.getValueType();
1930
9.28M
  SDLoc DL(N);
1931
9.28M
1932
9.28M
  // fold vector ops
1933
9.28M
  if (
VT.isVector()9.28M
) {
1934
93.4k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
1935
18
      return FoldedVOp;
1936
93.4k
1937
93.4k
    // fold (add x, 0) -> x, vector edition
1938
93.4k
    
if (93.4k
ISD::isBuildVectorAllZeros(N1.getNode())93.4k
)
1939
76
      return N0;
1940
93.3k
    
if (93.3k
ISD::isBuildVectorAllZeros(N0.getNode())93.3k
)
1941
87
      return N1;
1942
9.28M
  }
1943
9.28M
1944
9.28M
  // fold (add x, undef) -> undef
1945
9.28M
  
if (9.28M
N0.isUndef()9.28M
)
1946
3
    return N0;
1947
9.28M
1948
9.28M
  
if (9.28M
N1.isUndef()9.28M
)
1949
4
    return N1;
1950
9.28M
1951
9.28M
  
if (9.28M
DAG.isConstantIntBuildVectorOrConstantInt(N0)9.28M
) {
1952
2.28k
    // canonicalize constant to RHS
1953
2.28k
    if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
1954
959
      return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1955
1.32k
    // fold (add c1, c2) -> c1+c2
1956
1.32k
    return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1957
1.32k
                                      N1.getNode());
1958
1.32k
  }
1959
9.27M
1960
9.27M
  // fold (add x, 0) -> x
1961
9.27M
  
if (9.27M
isNullConstant(N1)9.27M
)
1962
30
    return N0;
1963
9.27M
1964
9.27M
  
if (9.27M
isConstantOrConstantVector(N1, /* NoOpaque */ true)9.27M
) {
1965
7.67M
    // fold ((c1-A)+c2) -> (c1+c2)-A
1966
7.67M
    if (N0.getOpcode() == ISD::SUB &&
1967
7.67M
        
isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)5.95k
) {
1968
566
      // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
1969
566
      return DAG.getNode(ISD::SUB, DL, VT,
1970
566
                         DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
1971
566
                         N0.getOperand(1));
1972
566
    }
1973
7.67M
1974
7.67M
    // add (sext i1 X), 1 -> zext (not i1 X)
1975
7.67M
    // We don't transform this pattern:
1976
7.67M
    //   add (zext i1 X), -1 -> sext (not i1 X)
1977
7.67M
    // because most (?) targets generate better code for the zext form.
1978
7.67M
    
if (7.67M
N0.getOpcode() == ISD::SIGN_EXTEND && 7.67M
N0.hasOneUse()14.3k
&&
1979
7.67M
        
isOneConstantOrOneSplatConstant(N1)3.64k
) {
1980
1.74k
      SDValue X = N0.getOperand(0);
1981
1.74k
      if ((!LegalOperations ||
1982
366
           (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
1983
366
            TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
1984
1.74k
          
X.getScalarValueSizeInBits() == 11.39k
) {
1985
17
        SDValue Not = DAG.getNOT(DL, X, X.getValueType());
1986
17
        return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
1987
17
      }
1988
7.67M
    }
1989
7.67M
1990
7.67M
    // Undo the add -> or combine to merge constant offsets from a frame index.
1991
7.67M
    
if (7.67M
N0.getOpcode() == ISD::OR &&
1992
13.4k
        isa<FrameIndexSDNode>(N0.getOperand(0)) &&
1993
2.84k
        isa<ConstantSDNode>(N0.getOperand(1)) &&
1994
7.67M
        
DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))2.84k
) {
1995
2.84k
      SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
1996
2.84k
      return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
1997
2.84k
    }
1998
9.27M
  }
1999
9.27M
2000
9.27M
  
if (SDValue 9.27M
NewSel9.27M
= foldBinOpIntoSelect(N))
2001
8
    return NewSel;
2002
9.27M
2003
9.27M
  // reassociate add
2004
9.27M
  
if (SDValue 9.27M
RADD9.27M
= ReassociateOps(ISD::ADD, DL, N0, N1))
2005
583k
    return RADD;
2006
8.69M
2007
8.69M
  // fold ((0-A) + B) -> B-A
2008
8.69M
  
if (8.69M
N0.getOpcode() == ISD::SUB &&
2009
24.1k
      isNullConstantOrNullSplatConstant(N0.getOperand(0)))
2010
271
    return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2011
8.69M
2012
8.69M
  // fold (A + (0-B)) -> A-B
2013
8.69M
  
if (8.69M
N1.getOpcode() == ISD::SUB &&
2014
14.6k
      isNullConstantOrNullSplatConstant(N1.getOperand(0)))
2015
4.61k
    return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2016
8.68M
2017
8.68M
  // fold (A+(B-A)) -> B
2018
8.68M
  
if (8.68M
N1.getOpcode() == ISD::SUB && 8.68M
N0 == N1.getOperand(1)10.0k
)
2019
6
    return N1.getOperand(0);
2020
8.68M
2021
8.68M
  // fold ((B-A)+A) -> B
2022
8.68M
  
if (8.68M
N0.getOpcode() == ISD::SUB && 8.68M
N1 == N0.getOperand(1)23.8k
)
2023
10
    return N0.getOperand(0);
2024
8.68M
2025
8.68M
  // fold (A+(B-(A+C))) to (B-C)
2026
8.68M
  
if (8.68M
N1.getOpcode() == ISD::SUB && 8.68M
N1.getOperand(1).getOpcode() == ISD::ADD10.0k
&&
2027
527
      N0 == N1.getOperand(1).getOperand(0))
2028
2
    return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2029
2
                       N1.getOperand(1).getOperand(1));
2030
8.68M
2031
8.68M
  // fold (A+(B-(C+A))) to (B-C)
2032
8.68M
  
if (8.68M
N1.getOpcode() == ISD::SUB && 8.68M
N1.getOperand(1).getOpcode() == ISD::ADD10.0k
&&
2033
525
      N0 == N1.getOperand(1).getOperand(1))
2034
3
    return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2035
3
                       N1.getOperand(1).getOperand(0));
2036
8.68M
2037
8.68M
  // fold (A+((B-A)+or-C)) to (B+or-C)
2038
8.68M
  
if (8.68M
(N1.getOpcode() == ISD::SUB || 8.68M
N1.getOpcode() == ISD::ADD8.67M
) &&
2039
47.7k
      N1.getOperand(0).getOpcode() == ISD::SUB &&
2040
1.24k
      N0 == N1.getOperand(0).getOperand(1))
2041
7
    return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2042
7
                       N1.getOperand(1));
2043
8.68M
2044
8.68M
  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2045
8.68M
  
if (8.68M
N0.getOpcode() == ISD::SUB && 8.68M
N1.getOpcode() == ISD::SUB23.8k
) {
2046
1.09k
    SDValue N00 = N0.getOperand(0);
2047
1.09k
    SDValue N01 = N0.getOperand(1);
2048
1.09k
    SDValue N10 = N1.getOperand(0);
2049
1.09k
    SDValue N11 = N1.getOperand(1);
2050
1.09k
2051
1.09k
    if (
isConstantOrConstantVector(N00) || 1.09k
isConstantOrConstantVector(N10)1.08k
)
2052
17
      return DAG.getNode(ISD::SUB, DL, VT,
2053
17
                         DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2054
17
                         DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2055
8.68M
  }
2056
8.68M
2057
8.68M
  
if (8.68M
SimplifyDemandedBits(SDValue(N, 0))8.68M
)
2058
8.28k
    return SDValue(N, 0);
2059
8.67M
2060
8.67M
  // fold (a+b) -> (a|b) iff a and b share no bits.
2061
8.67M
  
if (8.67M
(!LegalOperations || 8.67M
TLI.isOperationLegal(ISD::OR, VT)3.73M
) &&
2062
8.62M
      DAG.haveNoCommonBitsSet(N0, N1))
2063
20.2k
    return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2064
8.65M
2065
8.65M
  
if (SDValue 8.65M
Combined8.65M
= visitADDLike(N0, N1, N))
2066
4.08k
    return Combined;
2067
8.65M
2068
8.65M
  
if (SDValue 8.65M
Combined8.65M
= visitADDLike(N1, N0, N))
2069
220
    return Combined;
2070
8.65M
2071
8.65M
  return SDValue();
2072
8.65M
}
2073
2074
729k
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2075
729k
  bool Masked = false;
2076
729k
2077
729k
  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2078
735k
  while (
true735k
) {
2079
735k
    if (
V.getOpcode() == ISD::TRUNCATE || 735k
V.getOpcode() == ISD::ZERO_EXTEND732k
) {
2080
5.63k
      V = V.getOperand(0);
2081
5.63k
      continue;
2082
5.63k
    }
2083
730k
2084
730k
    
if (730k
V.getOpcode() == ISD::AND && 730k
isOneConstant(V.getOperand(1))7.21k
) {
2085
1.14k
      Masked = true;
2086
1.14k
      V = V.getOperand(0);
2087
1.14k
      continue;
2088
1.14k
    }
2089
729k
2090
729k
    break;
2091
729k
  }
2092
729k
2093
729k
  // If this is not a carry, return.
2094
729k
  if (V.getResNo() != 1)
2095
726k
    return SDValue();
2096
2.99k
2097
2.99k
  
if (2.99k
V.getOpcode() != ISD::ADDCARRY && 2.99k
V.getOpcode() != ISD::SUBCARRY2.72k
&&
2098
2.99k
      
V.getOpcode() != ISD::UADDO2.72k
&&
V.getOpcode() != ISD::USUBO2.51k
)
2099
2.51k
    return SDValue();
2100
478
2101
478
  // If the result is masked, then no matter what kind of bool it is we can
2102
478
  // return. If it isn't, then we need to make sure the bool type is either 0 or
2103
478
  // 1 and not other values.
2104
478
  
if (478
Masked ||
2105
242
      TLI.getBooleanContents(V.getValueType()) ==
2106
242
          TargetLoweringBase::ZeroOrOneBooleanContent)
2107
478
    return V;
2108
0
2109
0
  return SDValue();
2110
0
}
2111
2112
17.3M
SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2113
17.3M
  EVT VT = N0.getValueType();
2114
17.3M
  SDLoc DL(LocReference);
2115
17.3M
2116
17.3M
  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2117
17.3M
  if (
N1.getOpcode() == ISD::SHL && 17.3M
N1.getOperand(0).getOpcode() == ISD::SUB549k
&&
2118
4.56k
      isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
2119
359
    return DAG.getNode(ISD::SUB, DL, VT, N0,
2120
359
                       DAG.getNode(ISD::SHL, DL, VT,
2121
359
                                   N1.getOperand(0).getOperand(1),
2122
359
                                   N1.getOperand(1)));
2123
17.3M
2124
17.3M
  
if (17.3M
N1.getOpcode() == ISD::AND17.3M
) {
2125
37.1k
    SDValue AndOp0 = N1.getOperand(0);
2126
37.1k
    unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2127
37.1k
    unsigned DestBits = VT.getScalarSizeInBits();
2128
37.1k
2129
37.1k
    // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2130
37.1k
    // and similar xforms where the inner op is either ~0 or 0.
2131
37.1k
    if (NumSignBits == DestBits &&
2132
677
        isOneConstantOrOneSplatConstant(N1->getOperand(1)))
2133
240
      return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2134
17.3M
  }
2135
17.3M
2136
17.3M
  // add (sext i1), X -> sub X, (zext i1)
2137
17.3M
  
if (17.3M
N0.getOpcode() == ISD::SIGN_EXTEND &&
2138
64.8k
      N0.getOperand(0).getValueType() == MVT::i1 &&
2139
17.3M
      
!TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)3.09k
) {
2140
3.06k
    SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2141
3.06k
    return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2142
3.06k
  }
2143
17.3M
2144
17.3M
  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2145
17.3M
  
if (17.3M
N1.getOpcode() == ISD::SIGN_EXTEND_INREG17.3M
) {
2146
3.92k
    VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2147
3.92k
    if (
TN->getVT() == MVT::i13.92k
) {
2148
627
      SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2149
627
                                 DAG.getConstant(1, DL, VT));
2150
627
      return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2151
627
    }
2152
17.3M
  }
2153
17.3M
2154
17.3M
  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2155
17.3M
  
if (17.3M
N1.getOpcode() == ISD::ADDCARRY && 17.3M
isNullConstant(N1.getOperand(1))19
)
2156
12
    return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2157
12
                       N0, N1.getOperand(0), N1.getOperand(2));
2158
17.3M
2159
17.3M
  // (add X, Carry) -> (addcarry X, 0, Carry)
2160
17.3M
  
if (17.3M
TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)17.3M
)
2161
692k
    
if (SDValue 692k
Carry692k
= getAsCarry(TLI, N1))
2162
4
      return DAG.getNode(ISD::ADDCARRY, DL,
2163
4
                         DAG.getVTList(VT, Carry.getValueType()), N0,
2164
4
                         DAG.getConstant(0, DL, VT), Carry);
2165
17.3M
2166
17.3M
  return SDValue();
2167
17.3M
}
2168
2169
302
SDValue DAGCombiner::visitADDC(SDNode *N) {
2170
302
  SDValue N0 = N->getOperand(0);
2171
302
  SDValue N1 = N->getOperand(1);
2172
302
  EVT VT = N0.getValueType();
2173
302
  SDLoc DL(N);
2174
302
2175
302
  // If the flag result is dead, turn this into an ADD.
2176
302
  if (!N->hasAnyUseOfValue(1))
2177
5
    return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2178
5
                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2179
297
2180
297
  // canonicalize constant to RHS.
2181
297
  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2182
297
  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2183
297
  if (
N0C && 297
!N1C0
)
2184
0
    return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2185
297
2186
297
  // fold (addc x, 0) -> x + no carry out
2187
297
  
if (297
isNullConstant(N1)297
)
2188
0
    return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2189
0
                                        DL, MVT::Glue));
2190
297
2191
297
  // If it cannot overflow, transform into an add.
2192
297
  
if (297
DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never297
)
2193
2
    return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2194
2
                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2195
295
2196
295
  return SDValue();
2197
295
}
2198
2199
4.95k
SDValue DAGCombiner::visitUADDO(SDNode *N) {
2200
4.95k
  SDValue N0 = N->getOperand(0);
2201
4.95k
  SDValue N1 = N->getOperand(1);
2202
4.95k
  EVT VT = N0.getValueType();
2203
4.95k
  if (VT.isVector())
2204
0
    return SDValue();
2205
4.95k
2206
4.95k
  EVT CarryVT = N->getValueType(1);
2207
4.95k
  SDLoc DL(N);
2208
4.95k
2209
4.95k
  // If the flag result is dead, turn this into an ADD.
2210
4.95k
  if (!N->hasAnyUseOfValue(1))
2211
634
    return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2212
634
                     DAG.getUNDEF(CarryVT));
2213
4.31k
2214
4.31k
  // canonicalize constant to RHS.
2215
4.31k
  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2216
4.31k
  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2217
4.31k
  if (
N0C && 4.31k
!N1C106
)
2218
2
    return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2219
4.31k
2220
4.31k
  // fold (uaddo x, 0) -> x + no carry out
2221
4.31k
  
if (4.31k
isNullConstant(N1)4.31k
)
2222
368
    return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2223
3.94k
2224
3.94k
  // If it cannot overflow, transform into an add.
2225
3.94k
  
if (3.94k
DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never3.94k
)
2226
87
    return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2227
87
                     DAG.getConstant(0, DL, CarryVT));
2228
3.86k
2229
3.86k
  
if (SDValue 3.86k
Combined3.86k
= visitUADDOLike(N0, N1, N))
2230
69
    return Combined;
2231
3.79k
2232
3.79k
  
if (SDValue 3.79k
Combined3.79k
= visitUADDOLike(N1, N0, N))
2233
8
    return Combined;
2234
3.78k
2235
3.78k
  return SDValue();
2236
3.78k
}
2237
2238
7.65k
SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2239
7.65k
  auto VT = N0.getValueType();
2240
7.65k
2241
7.65k
  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2242
7.65k
  // If Y + 1 cannot overflow.
2243
7.65k
  if (
N1.getOpcode() == ISD::ADDCARRY && 7.65k
isNullConstant(N1.getOperand(1))247
) {
2244
194
    SDValue Y = N1.getOperand(0);
2245
194
    SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2246
194
    if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2247
77
      return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2248
77
                         N1.getOperand(2));
2249
7.57k
  }
2250
7.57k
2251
7.57k
  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2252
7.57k
  
if (7.57k
TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)7.57k
)
2253
6.64k
    
if (SDValue 6.64k
Carry6.64k
= getAsCarry(TLI, N1))
2254
0
      return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2255
0
                         DAG.getConstant(0, SDLoc(N), VT), Carry);
2256
7.57k
2257
7.57k
  return SDValue();
2258
7.57k
}
2259
2260
333
SDValue DAGCombiner::visitADDE(SDNode *N) {
2261
333
  SDValue N0 = N->getOperand(0);
2262
333
  SDValue N1 = N->getOperand(1);
2263
333
  SDValue CarryIn = N->getOperand(2);
2264
333
2265
333
  // canonicalize constant to RHS
2266
333
  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2267
333
  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2268
333
  if (
N0C && 333
!N1C6
)
2269
4
    return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2270
4
                       N1, N0, CarryIn);
2271
329
2272
329
  // fold (adde x, y, false) -> (addc x, y)
2273
329
  
if (329
CarryIn.getOpcode() == ISD::CARRY_FALSE329
)
2274
2
    return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2275
327
2276
327
  return SDValue();
2277
327
}
2278
2279
15.9k
SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2280
15.9k
  SDValue N0 = N->getOperand(0);
2281
15.9k
  SDValue N1 = N->getOperand(1);
2282
15.9k
  SDValue CarryIn = N->getOperand(2);
2283
15.9k
  SDLoc DL(N);
2284
15.9k
2285
15.9k
  // canonicalize constant to RHS
2286
15.9k
  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2287
15.9k
  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2288
15.9k
  if (
N0C && 15.9k
!N1C604
)
2289
224
    return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2290
15.6k
2291
15.6k
  // fold (addcarry x, y, false) -> (uaddo x, y)
2292
15.6k
  
if (15.6k
isNullConstant(CarryIn)15.6k
)
2293
469
    return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2294
15.2k
2295
15.2k
  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2296
15.2k
  
if (15.2k
isNullConstant(N0) && 15.2k
isNullConstant(N1)293
) {
2297
288
    EVT VT = N0.getValueType();
2298
288
    EVT CarryVT = CarryIn.getValueType();
2299
288
    SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2300
288
    AddToWorklist(CarryExt.getNode());
2301
288
    return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2302
288
                                    DAG.getConstant(1, DL, VT)),
2303
288
                     DAG.getConstant(0, DL, CarryVT));
2304
288
  }
2305
14.9k
2306
14.9k
  
if (SDValue 14.9k
Combined14.9k
= visitADDCARRYLike(N0, N1, CarryIn, N))
2307
51
    return Combined;
2308
14.8k
2309
14.8k
  
if (SDValue 14.8k
Combined14.8k
= visitADDCARRYLike(N1, N0, CarryIn, N))
2310
0
    return Combined;
2311
14.8k
2312
14.8k
  return SDValue();
2313
14.8k
}
2314
2315
SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2316
29.8k
                                       SDNode *N) {
2317
29.8k
  // Iff the flag result is dead:
2318
29.8k
  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2319
29.8k
  if ((N0.getOpcode() == ISD::ADD ||
2320
29.5k
       
(N0.getOpcode() == ISD::UADDO && 29.5k
N0.getResNo() == 01.72k
)) &&
2321
29.8k
      
isNullConstant(N1)1.96k
&&
!N->hasAnyUseOfValue(1)1.13k
)
2322
49
    return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2323
49
                       N0.getOperand(0), N0.getOperand(1), CarryIn);
2324
29.7k
2325
29.7k
  /**
2326
29.7k
   * When one of the addcarry argument is itself a carry, we may be facing
2327
29.7k
   * a diamond carry propagation. In which case we try to transform the DAG
2328
29.7k
   * to ensure linear carry propagation if that is possible.
2329
29.7k
   *
2330
29.7k
   * We are trying to get:
2331
29.7k
   *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2332
29.7k
   */
2333
29.7k
  
if (auto 29.7k
Y29.7k
= getAsCarry(TLI, N1)) {
2334
474
    /**
2335
474
     *            (uaddo A, B)
2336
474
     *             /       \
2337
474
     *          Carry      Sum
2338
474
     *            |          \
2339
474
     *            | (addcarry *, 0, Z)
2340
474
     *            |       /
2341
474
     *             \   Carry
2342
474
     *              |   /
2343
474
     * (addcarry X, *, *)
2344
474
     */
2345
474
    if (Y.getOpcode() == ISD::UADDO &&
2346
207
        CarryIn.getResNo() == 1 &&
2347
207
        CarryIn.getOpcode() == ISD::ADDCARRY &&
2348
4
        isNullConstant(CarryIn.getOperand(1)) &&
2349
474
        
CarryIn.getOperand(0) == Y.getValue(0)2
) {
2350
2
      auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2351
2
                              Y.getOperand(0), Y.getOperand(1),
2352
2
                              CarryIn.getOperand(2));
2353
2
      AddToWorklist(NewY.getNode());
2354
2
      return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2355
2
                         DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2356
2
                         NewY.getValue(1));
2357
2
    }
2358
29.7k
  }
2359
29.7k
2360
29.7k
  return SDValue();
2361
29.7k
}
2362
2363
// Since it may not be valid to emit a fold to zero for vector initializers
2364
// check if we can before folding.
2365
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2366
                             SelectionDAG &DAG, bool LegalOperations,
2367
23
                             bool LegalTypes) {
2368
23
  if (!VT.isVector())
2369
13
    return DAG.getConstant(0, DL, VT);
2370
10
  
if (10
!LegalOperations || 10
TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)0
)
2371
10
    return DAG.getConstant(0, DL, VT);
2372
0
  return SDValue();
2373
0
}
2374
2375
419k
SDValue DAGCombiner::visitSUB(SDNode *N) {
2376
419k
  SDValue N0 = N->getOperand(0);
2377
419k
  SDValue N1 = N->getOperand(1);
2378
419k
  EVT VT = N0.getValueType();
2379
419k
  SDLoc DL(N);
2380
419k
2381
419k
  // fold vector ops
2382
419k
  if (
VT.isVector()419k
) {
2383
15.3k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
2384
3
      return FoldedVOp;
2385
15.3k
2386
15.3k
    // fold (sub x, 0) -> x, vector edition
2387
15.3k
    
if (15.3k
ISD::isBuildVectorAllZeros(N1.getNode())15.3k
)
2388
8
      return N0;
2389
419k
  }
2390
419k
2391
419k
  // fold (sub x, x) -> 0
2392
419k
  // FIXME: Refactor this and xor and other similar operations together.
2393
419k
  
if (419k
N0 == N1419k
)
2394
10
    return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2395
419k
  
if (419k
DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2396
419k
      
DAG.isConstantIntBuildVectorOrConstantInt(N1)112k
) {
2397
2
    // fold (sub c1, c2) -> c1-c2
2398
2
    return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2399
2
                                      N1.getNode());
2400
2
  }
2401
419k
2402
419k
  
if (SDValue 419k
NewSel419k
= foldBinOpIntoSelect(N))
2403
2
    return NewSel;
2404
419k
2405
419k
  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2406
419k
2407
419k
  // fold (sub x, c) -> (add x, -c)
2408
419k
  if (
N1C419k
) {
2409
4.42k
    return DAG.getNode(ISD::ADD, DL, VT, N0,
2410
4.42k
                       DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2411
4.42k
  }
2412
415k
2413
415k
  
if (415k
isNullConstantOrNullSplatConstant(N0)415k
) {
2414
41.3k
    unsigned BitWidth = VT.getScalarSizeInBits();
2415
41.3k
    // Right-shifting everything out but the sign bit followed by negation is
2416
41.3k
    // the same as flipping arithmetic/logical shift type without the negation:
2417
41.3k
    // -(X >>u 31) -> (X >>s 31)
2418
41.3k
    // -(X >>s 31) -> (X >>u 31)
2419
41.3k
    if (
N1->getOpcode() == ISD::SRA || 41.3k
N1->getOpcode() == ISD::SRL35.3k
) {
2420
6.00k
      ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2421
6.00k
      if (
ShiftAmt && 6.00k
ShiftAmt->getZExtValue() == BitWidth - 15.85k
) {
2422
10
        auto NewSh = N1->getOpcode() == ISD::SRA ? 
ISD::SRL2
:
ISD::SRA8
;
2423
10
        if (
!LegalOperations || 10
TLI.isOperationLegal(NewSh, VT)4
)
2424
10
          return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2425
41.2k
      }
2426
6.00k
    }
2427
41.2k
2428
41.2k
    // 0 - X --> 0 if the sub is NUW.
2429
41.2k
    
if (41.2k
N->getFlags().hasNoUnsignedWrap()41.2k
)
2430
2
      return N0;
2431
41.2k
2432
41.2k
    
if (41.2k
DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))41.2k
) {
2433
6
      // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2434
6
      // N1 must be 0 because negating the minimum signed value is undefined.
2435
6
      if (N->getFlags().hasNoSignedWrap())
2436
2
        return N0;
2437
4
2438
4
      // 0 - X --> X if X is 0 or the minimum signed value.
2439
4
      return N1;
2440
4
    }
2441
41.3k
  }
2442
415k
2443
415k
  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2444
415k
  
if (415k
isAllOnesConstantOrAllOnesSplatConstant(N0)415k
)
2445
392
    return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2446
414k
2447
414k
  // fold A-(A-B) -> B
2448
414k
  
if (414k
N1.getOpcode() == ISD::SUB && 414k
N0 == N1.getOperand(0)4.07k
)
2449
597
    return N1.getOperand(1);
2450
414k
2451
414k
  // fold (A+B)-A -> B
2452
414k
  
if (414k
N0.getOpcode() == ISD::ADD && 414k
N0.getOperand(0) == N115.5k
)
2453
17
    return N0.getOperand(1);
2454
414k
2455
414k
  // fold (A+B)-B -> A
2456
414k
  
if (414k
N0.getOpcode() == ISD::ADD && 414k
N0.getOperand(1) == N115.5k
)
2457
5
    return N0.getOperand(0);
2458
414k
2459
414k
  // fold C2-(A+C1) -> (C2-C1)-A
2460
414k
  
if (414k
N1.getOpcode() == ISD::ADD414k
) {
2461
5.04k
    SDValue N11 = N1.getOperand(1);
2462
5.04k
    if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2463
5.04k
        
isConstantOrConstantVector(N11, /* NoOpaques */ true)736
) {
2464
307
      SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2465
307
      return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2466
307
    }
2467
414k
  }
2468
414k
2469
414k
  // fold ((A+(B+or-C))-B) -> A+or-C
2470
414k
  
if (414k
N0.getOpcode() == ISD::ADD &&
2471
15.5k
      (N0.getOperand(1).getOpcode() == ISD::SUB ||
2472
15.5k
       N0.getOperand(1).getOpcode() == ISD::ADD) &&
2473
896
      N0.getOperand(1).getOperand(0) == N1)
2474
8
    return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2475
8
                       N0.getOperand(1).getOperand(1));
2476
414k
2477
414k
  // fold ((A+(C+B))-B) -> A+C
2478
414k
  
if (414k
N0.getOpcode() == ISD::ADD && 414k
N0.getOperand(1).getOpcode() == ISD::ADD15.4k
&&
2479
545
      N0.getOperand(1).getOperand(1) == N1)
2480
1
    return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2481
1
                       N0.getOperand(1).getOperand(0));
2482
414k
2483
414k
  // fold ((A-(B-C))-C) -> A-B
2484
414k
  
if (414k
N0.getOpcode() == ISD::SUB && 414k
N0.getOperand(1).getOpcode() == ISD::SUB5.59k
&&
2485
251
      N0.getOperand(1).getOperand(1) == N1)
2486
3
    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2487
3
                       N0.getOperand(1).getOperand(0));
2488
414k
2489
414k
  // If either operand of a sub is undef, the result is undef
2490
414k
  
if (414k
N0.isUndef()414k
)
2491
0
    return N0;
2492
414k
  
if (414k
N1.isUndef()414k
)
2493
0
    return N1;
2494
414k
2495
414k
  // If the relocation model supports it, consider symbol offsets.
2496
414k
  
if (GlobalAddressSDNode *414k
GA414k
= dyn_cast<GlobalAddressSDNode>(N0))
2497
24
    
if (24
!LegalOperations && 24
TLI.isOffsetFoldingLegal(GA)24
) {
2498
0
      // fold (sub Sym, c) -> Sym-c
2499
0
      if (
N1C && 0
GA->getOpcode() == ISD::GlobalAddress0
)
2500
0
        return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2501
0
                                    GA->getOffset() -
2502
0
                                        (uint64_t)N1C->getSExtValue());
2503
0
      // fold (sub Sym+c1, Sym+c2) -> c1-c2
2504
0
      
if (GlobalAddressSDNode *0
GB0
= dyn_cast<GlobalAddressSDNode>(N1))
2505
0
        
if (0
GA->getGlobal() == GB->getGlobal()0
)
2506
0
          return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2507
0
                                 DL, VT);
2508
414k
    }
2509
414k
2510
414k
  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2511
414k
  
if (414k
N1.getOpcode() == ISD::SIGN_EXTEND_INREG414k
) {
2512
2.83k
    VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2513
2.83k
    if (
TN->getVT() == MVT::i12.83k
) {
2514
54
      SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2515
54
                                 DAG.getConstant(1, DL, VT));
2516
54
      return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2517
54
    }
2518
413k
  }
2519
413k
2520
413k
  return SDValue();
2521
413k
}
2522
2523
1.18k
SDValue DAGCombiner::visitSUBC(SDNode *N) {
2524
1.18k
  SDValue N0 = N->getOperand(0);
2525
1.18k
  SDValue N1 = N->getOperand(1);
2526
1.18k
  EVT VT = N0.getValueType();
2527
1.18k
  SDLoc DL(N);
2528
1.18k
2529
1.18k
  // If the flag result is dead, turn this into an SUB.
2530
1.18k
  if (!N->hasAnyUseOfValue(1))
2531
0
    return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2532
0
                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2533
1.18k
2534
1.18k
  // fold (subc x, x) -> 0 + no borrow
2535
1.18k
  
if (1.18k
N0 == N11.18k
)
2536
30
    return CombineTo(N, DAG.getConstant(0, DL, VT),
2537
30
                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2538
1.15k
2539
1.15k
  // fold (subc x, 0) -> x + no borrow
2540
1.15k
  
if (1.15k
isNullConstant(N1)1.15k
)
2541
0
    return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2542
1.15k
2543
1.15k
  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2544
1.15k
  
if (1.15k
isAllOnesConstant(N0)1.15k
)
2545
0
    return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2546
0
                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2547
1.15k
2548
1.15k
  return SDValue();
2549
1.15k
}
2550
2551
2.20k
SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2552
2.20k
  SDValue N0 = N->getOperand(0);
2553
2.20k
  SDValue N1 = N->getOperand(1);
2554
2.20k
  EVT VT = N0.getValueType();
2555
2.20k
  if (VT.isVector())
2556
0
    return SDValue();
2557
2.20k
2558
2.20k
  EVT CarryVT = N->getValueType(1);
2559
2.20k
  SDLoc DL(N);
2560
2.20k
2561
2.20k
  // If the flag result is dead, turn this into an SUB.
2562
2.20k
  if (!N->hasAnyUseOfValue(1))
2563
30
    return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2564
30
                     DAG.getUNDEF(CarryVT));
2565
2.17k
2566
2.17k
  // fold (usubo x, x) -> 0 + no borrow
2567
2.17k
  
if (2.17k
N0 == N12.17k
)
2568
17
    return CombineTo(N, DAG.getConstant(0, DL, VT),
2569
17
                     DAG.getConstant(0, DL, CarryVT));
2570
2.16k
2571
2.16k
  // fold (usubo x, 0) -> x + no borrow
2572
2.16k
  
if (2.16k
isNullConstant(N1)2.16k
)
2573
26
    return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2574
2.13k
2575
2.13k
  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2576
2.13k
  
if (2.13k
isAllOnesConstant(N0)2.13k
)
2577
3
    return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2578
3
                     DAG.getConstant(0, DL, CarryVT));
2579
2.13k
2580
2.13k
  return SDValue();
2581
2.13k
}
2582
2583
145
SDValue DAGCombiner::visitSUBE(SDNode *N) {
2584
145
  SDValue N0 = N->getOperand(0);
2585
145
  SDValue N1 = N->getOperand(1);
2586
145
  SDValue CarryIn = N->getOperand(2);
2587
145
2588
145
  // fold (sube x, y, false) -> (subc x, y)
2589
145
  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2590
0
    return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2591
145
2592
145
  return SDValue();
2593
145
}
2594
2595
972
SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2596
972
  SDValue N0 = N->getOperand(0);
2597
972
  SDValue N1 = N->getOperand(1);
2598
972
  SDValue CarryIn = N->getOperand(2);
2599
972
2600
972
  // fold (subcarry x, y, false) -> (usubo x, y)
2601
972
  if (isNullConstant(CarryIn))
2602
10
    return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2603
962
2604
962
  return SDValue();
2605
962
}
2606
2607
478k
SDValue DAGCombiner::visitMUL(SDNode *N) {
2608
478k
  SDValue N0 = N->getOperand(0);
2609
478k
  SDValue N1 = N->getOperand(1);
2610
478k
  EVT VT = N0.getValueType();
2611
478k
2612
478k
  // fold (mul x, undef) -> 0
2613
478k
  if (
N0.isUndef() || 478k
N1.isUndef()478k
)
2614
21
    return DAG.getConstant(0, SDLoc(N), VT);
2615
478k
2616
478k
  bool N0IsConst = false;
2617
478k
  bool N1IsConst = false;
2618
478k
  bool N1IsOpaqueConst = false;
2619
478k
  bool N0IsOpaqueConst = false;
2620
478k
  APInt ConstValue0, ConstValue1;
2621
478k
  // fold vector ops
2622
478k
  if (
VT.isVector()478k
) {
2623
13.3k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
2624
3
      return FoldedVOp;
2625
13.3k
2626
13.3k
    N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2627
13.3k
    N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2628
13.3k
    assert((!N0IsConst ||
2629
13.3k
            ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2630
13.3k
           "Splat APInt should be element width");
2631
13.3k
    assert((!N1IsConst ||
2632
13.3k
            ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2633
13.3k
           "Splat APInt should be element width");
2634
478k
  } else {
2635
464k
    N0IsConst = isa<ConstantSDNode>(N0);
2636
464k
    if (
N0IsConst464k
) {
2637
6
      ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2638
6
      N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2639
6
    }
2640
464k
    N1IsConst = isa<ConstantSDNode>(N1);
2641
464k
    if (
N1IsConst464k
) {
2642
237k
      ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2643
237k
      N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2644
237k
    }
2645
464k
  }
2646
478k
2647
478k
  // fold (mul c1, c2) -> c1*c2
2648
478k
  
if (478k
N0IsConst && 478k
N1IsConst9
&&
!N0IsOpaqueConst6
&&
!N1IsOpaqueConst6
)
2649
6
    return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2650
6
                                      N0.getNode(), N1.getNode());
2651
478k
2652
478k
  // canonicalize constant to RHS (vector doesn't have to splat)
2653
478k
  
if (478k
DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2654
7
     !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2655
7
    return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2656
478k
  // fold (mul x, 0) -> 0
2657
478k
  
if (478k
N1IsConst && 478k
ConstValue1.isNullValue()239k
)
2658
22
    return N1;
2659
478k
  // fold (mul x, 1) -> x
2660
478k
  
if (478k
N1IsConst && 478k
ConstValue1.isOneValue()239k
)
2661
471
    return N0;
2662
477k
2663
477k
  
if (SDValue 477k
NewSel477k
= foldBinOpIntoSelect(N))
2664
5
    return NewSel;
2665
477k
2666
477k
  // fold (mul x, -1) -> 0-x
2667
477k
  
if (477k
N1IsConst && 477k
ConstValue1.isAllOnesValue()238k
) {
2668
558
    SDLoc DL(N);
2669
558
    return DAG.getNode(ISD::SUB, DL, VT,
2670
558
                       DAG.getConstant(0, DL, VT), N0);
2671
558
  }
2672
477k
  // fold (mul x, (1 << c)) -> x << c
2673
477k
  
if (477k
isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2674
477k
      
DAG.isKnownToBeAPowerOfTwo(N1)236k
) {
2675
22.9k
    SDLoc DL(N);
2676
22.9k
    SDValue LogBase2 = BuildLogBase2(N1, DL);
2677
22.9k
    AddToWorklist(LogBase2.getNode());
2678
22.9k
2679
22.9k
    EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2680
22.9k
    SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2681
22.9k
    AddToWorklist(Trunc.getNode());
2682
22.9k
    return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2683
22.9k
  }
2684
454k
  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2685
454k
  
if (454k
N1IsConst && 454k
!N1IsOpaqueConst215k
&&
(-ConstValue1).isPowerOf2()213k
) {
2686
3.22k
    unsigned Log2Val = (-ConstValue1).logBase2();
2687
3.22k
    SDLoc DL(N);
2688
3.22k
    // FIXME: If the input is something that is easily negated (e.g. a
2689
3.22k
    // single-use add), we should put the negate there.
2690
3.22k
    return DAG.getNode(ISD::SUB, DL, VT,
2691
3.22k
                       DAG.getConstant(0, DL, VT),
2692
3.22k
                       DAG.getNode(ISD::SHL, DL, VT, N0,
2693
3.22k
                            DAG.getConstant(Log2Val, DL,
2694
3.22k
                                      getShiftAmountTy(N0.getValueType()))));
2695
3.22k
  }
2696
450k
2697
450k
  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2698
450k
  
if (450k
N0.getOpcode() == ISD::SHL &&
2699
545
      isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2700
450k
      
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)20
) {
2701
12
    SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2702
12
    if (isConstantOrConstantVector(C3))
2703
12
      return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2704
450k
  }
2705
450k
2706
450k
  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2707
450k
  // use.
2708
450k
  {
2709
450k
    SDValue Sh(nullptr, 0), Y(nullptr, 0);
2710
450k
2711
450k
    // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2712
450k
    if (N0.getOpcode() == ISD::SHL &&
2713
533
        isConstantOrConstantVector(N0.getOperand(1)) &&
2714
450k
        
N0.getNode()->hasOneUse()441
) {
2715
280
      Sh = N0; Y = N1;
2716
450k
    } else 
if (450k
N1.getOpcode() == ISD::SHL &&
2717
56
               isConstantOrConstantVector(N1.getOperand(1)) &&
2718
450k
               
N1.getNode()->hasOneUse()47
) {
2719
5
      Sh = N1; Y = N0;
2720
5
    }
2721
450k
2722
450k
    if (
Sh.getNode()450k
) {
2723
285
      SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2724
285
      return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2725
285
    }
2726
450k
  }
2727
450k
2728
450k
  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2729
450k
  
if (450k
DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2730
212k
      N0.getOpcode() == ISD::ADD &&
2731
9.44k
      DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2732
977
      isMulAddWithConstProfitable(N, N0, N1))
2733
541
      return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2734
541
                         DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2735
541
                                     N0.getOperand(0), N1),
2736
541
                         DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2737
541
                                     N0.getOperand(1), N1));
2738
450k
2739
450k
  // reassociate mul
2740
450k
  
if (SDValue 450k
RMUL450k
= ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2741
44
    return RMUL;
2742
450k
2743
450k
  return SDValue();
2744
450k
}
2745
2746
/// Return true if divmod libcall is available.
2747
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2748
32.8k
                                     const TargetLowering &TLI) {
2749
32.8k
  RTLIB::Libcall LC;
2750
32.8k
  EVT NodeType = Node->getValueType(0);
2751
32.8k
  if (!NodeType.isSimple())
2752
0
    return false;
2753
32.8k
  switch (NodeType.getSimpleVT().SimpleTy) {
2754
0
  default: return false; // No libcall for vector types.
2755
0
  
case MVT::i8: LC= isSigned ? 0
RTLIB::SDIVREM_I80
:
RTLIB::UDIVREM_I80
; break;
2756
4
  
case MVT::i16: LC= isSigned ? 4
RTLIB::SDIVREM_I162
:
RTLIB::UDIVREM_I162
; break;
2757
19.3k
  
case MVT::i32: LC= isSigned ? 19.3k
RTLIB::SDIVREM_I3215.9k
:
RTLIB::UDIVREM_I323.38k
; break;
2758
13.5k
  
case MVT::i64: LC= isSigned ? 13.5k
RTLIB::SDIVREM_I641.02k
:
RTLIB::UDIVREM_I6412.4k
; break;
2759
3
  
case MVT::i128: LC= isSigned ? 3
RTLIB::SDIVREM_I1283
:
RTLIB::UDIVREM_I1280
; break;
2760
32.8k
  }
2761
32.8k
2762
32.8k
  return TLI.getLibcallName(LC) != nullptr;
2763
32.8k
}
2764
2765
/// Issue divrem if both quotient and remainder are needed.
2766
36.9k
SDValue DAGCombiner::useDivRem(SDNode *Node) {
2767
36.9k
  if (Node->use_empty())
2768
0
    return SDValue(); // This is a dead node, leave it alone.
2769
36.9k
2770
36.9k
  unsigned Opcode = Node->getOpcode();
2771
22.5k
  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2772
36.9k
  unsigned DivRemOpc = isSigned ? 
ISD::SDIVREM18.9k
:
ISD::UDIVREM17.9k
;
2773
36.9k
2774
36.9k
  // DivMod lib calls can still work on non-legal types if using lib-calls.
2775
36.9k
  EVT VT = Node->getValueType(0);
2776
36.9k
  if (
VT.isVector() || 36.9k
!VT.isInteger()36.4k
)
2777
497
    return SDValue();
2778
36.4k
2779
36.4k
  
if (36.4k
!TLI.isTypeLegal(VT) && 36.4k
!TLI.isOperationCustom(DivRemOpc, VT)899
)
2780
860
    return SDValue();
2781
35.5k
2782
35.5k
  // If DIVREM is going to get expanded into a libcall,
2783
35.5k
  // but there is no libcall available, then don't combine.
2784
35.5k
  
if (35.5k
!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2785
32.8k
      !isDivRemLibcallAvailable(Node, isSigned, TLI))
2786
32.4k
    return SDValue();
2787
3.06k
2788
3.06k
  // If div is legal, it's better to do the normal expansion
2789
3.06k
  unsigned OtherOpcode = 0;
2790
3.06k
  if (
(Opcode == ISD::SDIV) || 3.06k
(Opcode == ISD::UDIV)2.33k
) {
2791
1.74k
    OtherOpcode = isSigned ? 
ISD::SREM732
:
ISD::UREM1.01k
;
2792
1.74k
    if (TLI.isOperationLegalOrCustom(Opcode, VT))
2793
374
      return SDValue();
2794
1.32k
  } else {
2795
1.32k
    OtherOpcode = isSigned ? 
ISD::SDIV676
:
ISD::UDIV648
;
2796
1.32k
    if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2797
214
      return SDValue();
2798
2.47k
  }
2799
2.47k
2800
2.47k
  SDValue Op0 = Node->getOperand(0);
2801
2.47k
  SDValue Op1 = Node->getOperand(1);
2802
2.47k
  SDValue combined;
2803
2.47k
  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2804
5.61k
         UE = Op0.getNode()->use_end(); 
UI != UE5.61k
;) {
2805
3.13k
    SDNode *User = *UI++;
2806
3.13k
    if (
User == Node || 3.13k
User->use_empty()641
)
2807
2.49k
      continue;
2808
636
    // Convert the other matching node(s), too;
2809
636
    // otherwise, the DIVREM may get target-legalized into something
2810
636
    // target-specific that we won't be able to recognize.
2811
636
    unsigned UserOpc = User->getOpcode();
2812
636
    if (
(UserOpc == Opcode || 636
UserOpc == OtherOpcode556
||
UserOpc == DivRemOpc344
) &&
2813
314
        User->getOperand(0) == Op0 &&
2814
636
        
User->getOperand(1) == Op1168
) {
2815
168
      if (
!combined168
) {
2816
168
        if (
UserOpc == OtherOpcode168
) {
2817
168
          SDVTList VTs = DAG.getVTList(VT, VT);
2818
168
          combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2819
168
        } else 
if (0
UserOpc == DivRemOpc0
) {
2820
0
          combined = SDValue(User, 0);
2821
0
        } else {
2822
0
          assert(UserOpc == Opcode);
2823
0
          continue;
2824
0
        }
2825
168
      }
2826
168
      
if (168
UserOpc == ISD::SDIV || 168
UserOpc == ISD::UDIV80
)
2827
133
        CombineTo(User, combined);
2828
35
      else 
if (35
UserOpc == ISD::SREM || 35
UserOpc == ISD::UREM21
)
2829
35
        CombineTo(User, combined.getValue(1));
2830
168
    }
2831
3.13k
  }
2832
36.9k
  return combined;
2833
36.9k
}
2834
2835
50.1k
static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
2836
50.1k
  SDValue N0 = N->getOperand(0);
2837
50.1k
  SDValue N1 = N->getOperand(1);
2838
50.1k
  EVT VT = N->getValueType(0);
2839
50.1k
  SDLoc DL(N);
2840
50.1k
2841
50.1k
  if (DAG.isUndef(N->getOpcode(), {N0, N1}))
2842
1
    return DAG.getUNDEF(VT);
2843
50.1k
2844
50.1k
  // undef / X -> 0
2845
50.1k
  // undef % X -> 0
2846
50.1k
  
if (50.1k
N0.isUndef()50.1k
)
2847
0
    return DAG.getConstant(0, DL, VT);
2848
50.1k
2849
50.1k
  return SDValue();
2850
50.1k
}
2851
2852
18.9k
SDValue DAGCombiner::visitSDIV(SDNode *N) {
2853
18.9k
  SDValue N0 = N->getOperand(0);
2854
18.9k
  SDValue N1 = N->getOperand(1);
2855
18.9k
  EVT VT = N->getValueType(0);
2856
18.9k
2857
18.9k
  // fold vector ops
2858
18.9k
  if (VT.isVector())
2859
340
    
if (SDValue 340
FoldedVOp340
= SimplifyVBinOp(N))
2860
1
      return FoldedVOp;
2861
18.9k
2862
18.9k
  SDLoc DL(N);
2863
18.9k
2864
18.9k
  // fold (sdiv c1, c2) -> c1/c2
2865
18.9k
  ConstantSDNode *N0C = isConstOrConstSplat(N0);
2866
18.9k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
2867
18.9k
  if (
N0C && 18.9k
N1C5.84k
&&
!N0C->isOpaque()0
&&
!N1C->isOpaque()0
)
2868
0
    return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2869
18.9k
  // fold (sdiv X, 1) -> X
2870
18.9k
  
if (18.9k
N1C && 18.9k
N1C->isOne()4.64k
)
2871
6
    return N0;
2872
18.9k
  // fold (sdiv X, -1) -> 0-X
2873
18.9k
  
if (18.9k
N1C && 18.9k
N1C->isAllOnesValue()4.64k
)
2874
3
    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
2875
18.9k
2876
18.9k
  
if (SDValue 18.9k
V18.9k
= simplifyDivRem(N, DAG))
2877
0
    return V;
2878
18.9k
2879
18.9k
  
if (SDValue 18.9k
NewSel18.9k
= foldBinOpIntoSelect(N))
2880
2
    return NewSel;
2881
18.9k
2882
18.9k
  // If we know the sign bits of both operands are zero, strength reduce to a
2883
18.9k
  // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
2884
18.9k
  
if (18.9k
DAG.SignBitIsZero(N1) && 18.9k
DAG.SignBitIsZero(N0)7.42k
)
2885
16
    return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2886
18.9k
2887
18.9k
  // fold (sdiv X, pow2) -> simple ops after legalize
2888
18.9k
  // FIXME: We check for the exact bit here because the generic lowering gives
2889
18.9k
  // better results in that case. The target-specific lowering should learn how
2890
18.9k
  // to handle exact sdivs efficiently.
2891
18.9k
  
if (18.9k
N1C && 18.9k
!N1C->isNullValue()4.62k
&&
!N1C->isOpaque()4.62k
&&
2892
18.9k
      
!N->getFlags().hasExact()4.62k
&& (N1C->getAPIntValue().isPowerOf2() ||
2893
18.9k
                                    
(-N1C->getAPIntValue()).isPowerOf2()1.52k
)) {
2894
3.08k
    // Target-specific implementation of sdiv x, pow2.
2895
3.08k
    if (SDValue Res = BuildSDIVPow2(N))
2896
2.86k
      return Res;
2897
216
2898
216
    unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2899
216
2900
216
    // Splat the sign bit into the register
2901
216
    SDValue SGN =
2902
216
        DAG.getNode(ISD::SRA, DL, VT, N0,
2903
216
                    DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2904
216
                                    getShiftAmountTy(N0.getValueType())));
2905
216
    AddToWorklist(SGN.getNode());
2906
216
2907
216
    // Add (N0 < 0) ? abs2 - 1 : 0;
2908
216
    SDValue SRL =
2909
216
        DAG.getNode(ISD::SRL, DL, VT, SGN,
2910
216
                    DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2911
216
                                    getShiftAmountTy(SGN.getValueType())));
2912
216
    SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2913
216
    AddToWorklist(SRL.getNode());
2914
216
    AddToWorklist(ADD.getNode());    // Divide by pow2
2915
216
    SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2916
216
                  DAG.getConstant(lg2, DL,
2917
216
                                  getShiftAmountTy(ADD.getValueType())));
2918
216
2919
216
    // If we're dividing by a positive value, we're done.  Otherwise, we must
2920
216
    // negate the result.
2921
216
    if (N1C->getAPIntValue().isNonNegative())
2922
210
      return SRA;
2923
6
2924
6
    AddToWorklist(SRA.getNode());
2925
6
    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2926
6
  }
2927
15.8k
2928
15.8k
  // If integer divide is expensive and we satisfy the requirements, emit an
2929
15.8k
  // alternate sequence.  Targets may check function attributes for size/speed
2930
15.8k
  // trade-offs.
2931
15.8k
  AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2932
15.8k
  if (
N1C && 15.8k
!TLI.isIntDivCheap(N->getValueType(0), Attr)1.54k
)
2933
1.50k
    
if (SDValue 1.50k
Op1.50k
= BuildSDIV(N))
2934
1.34k
      return Op;
2935
14.4k
2936
14.4k
  // sdiv, srem -> sdivrem
2937
14.4k
  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2938
14.4k
  // true.  Otherwise, we break the simplification logic in visitREM().
2939
14.4k
  
if (14.4k
!N1C || 14.4k
TLI.isIntDivCheap(N->getValueType(0), Attr)199
)
2940
14.3k
    
if (SDValue 14.3k
DivRem14.3k
= useDivRem(N))
2941
14
        return DivRem;
2942
14.4k
2943
14.4k
  return SDValue();
2944
14.4k
}
2945
2946
22.7k
SDValue DAGCombiner::visitUDIV(SDNode *N) {
2947
22.7k
  SDValue N0 = N->getOperand(0);
2948
22.7k
  SDValue N1 = N->getOperand(1);
2949
22.7k
  EVT VT = N->getValueType(0);
2950
22.7k
2951
22.7k
  // fold vector ops
2952
22.7k
  if (VT.isVector())
2953
235
    
if (SDValue 235
FoldedVOp235
= SimplifyVBinOp(N))
2954
0
      return FoldedVOp;
2955
22.7k
2956
22.7k
  SDLoc DL(N);
2957
22.7k
2958
22.7k
  // fold (udiv c1, c2) -> c1/c2
2959
22.7k
  ConstantSDNode *N0C = isConstOrConstSplat(N0);
2960
22.7k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
2961
22.7k
  if (
N0C && 22.7k
N1C1.08k
)
2962
0
    
if (SDValue 0
Folded0
= DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2963
0
                                                    N0C, N1C))
2964
0
      return Folded;
2965
22.7k
2966
22.7k
  
if (SDValue 22.7k
V22.7k
= simplifyDivRem(N, DAG))
2967
0
    return V;
2968
22.7k
2969
22.7k
  
if (SDValue 22.7k
NewSel22.7k
= foldBinOpIntoSelect(N))
2970
2
    return NewSel;
2971
22.7k
2972
22.7k
  // fold (udiv x, (1 << c)) -> x >>u c
2973
22.7k
  
if (22.7k
isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2974
22.7k
      
DAG.isKnownToBeAPowerOfTwo(N1)4.30k
) {
2975
39
    SDValue LogBase2 = BuildLogBase2(N1, DL);
2976
39
    AddToWorklist(LogBase2.getNode());
2977
39
2978
39
    EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2979
39
    SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2980
39
    AddToWorklist(Trunc.getNode());
2981
39
    return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
2982
39
  }
2983
22.7k
2984
22.7k
  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2985
22.7k
  
if (22.7k
N1.getOpcode() == ISD::SHL22.7k
) {
2986
41
    SDValue N10 = N1.getOperand(0);
2987
41
    if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
2988
41
        
DAG.isKnownToBeAPowerOfTwo(N10)9
) {
2989
9
      SDValue LogBase2 = BuildLogBase2(N10, DL);
2990
9
      AddToWorklist(LogBase2.getNode());
2991
9
2992
9
      EVT ADDVT = N1.getOperand(1).getValueType();
2993
9
      SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
2994
9
      AddToWorklist(Trunc.getNode());
2995
9
      SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
2996
9
      AddToWorklist(Add.getNode());
2997
9
      return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2998
9
    }
2999
22.7k
  }
3000
22.7k
3001
22.7k
  // fold (udiv x, c) -> alternate
3002
22.7k
  AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
3003
22.7k
  if (
N1C && 22.7k
!TLI.isIntDivCheap(N->getValueType(0), Attr)7.08k
)
3004
7.04k
    
if (SDValue 7.04k
Op7.04k
= BuildUDIV(N))
3005
6.84k
      return Op;
3006
15.8k
3007
15.8k
  // sdiv, srem -> sdivrem
3008
15.8k
  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3009
15.8k
  // true.  Otherwise, we break the simplification logic in visitREM().
3010
15.8k
  
if (15.8k
!N1C || 15.8k
TLI.isIntDivCheap(N->getValueType(0), Attr)242
)
3011
15.6k
    
if (SDValue 15.6k
DivRem15.6k
= useDivRem(N))
3012
21
        return DivRem;
3013
15.8k
3014
15.8k
  return SDValue();
3015
15.8k
}
3016
3017
// handles ISD::SREM and ISD::UREM
3018
8.48k
SDValue DAGCombiner::visitREM(SDNode *N) {
3019
8.48k
  unsigned Opcode = N->getOpcode();
3020
8.48k
  SDValue N0 = N->getOperand(0);
3021
8.48k
  SDValue N1 = N->getOperand(1);
3022
8.48k
  EVT VT = N->getValueType(0);
3023
8.48k
  bool isSigned = (Opcode == ISD::SREM);
3024
8.48k
  SDLoc DL(N);
3025
8.48k
3026
8.48k
  // fold (rem c1, c2) -> c1%c2
3027
8.48k
  ConstantSDNode *N0C = isConstOrConstSplat(N0);
3028
8.48k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
3029
8.48k
  if (
N0C && 8.48k
N1C79
)
3030
4
    
if (SDValue 4
Folded4
= DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3031
4
      return Folded;
3032
8.48k
3033
8.48k
  
if (SDValue 8.48k
V8.48k
= simplifyDivRem(N, DAG))
3034
1
    return V;
3035
8.48k
3036
8.48k
  
if (SDValue 8.48k
NewSel8.48k
= foldBinOpIntoSelect(N))
3037
4
    return NewSel;
3038
8.47k
3039
8.47k
  
if (8.47k
isSigned8.47k
) {
3040
5.44k
    // If we know the sign bits of both operands are zero, strength reduce to a
3041
5.44k
    // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3042
5.44k
    if (
DAG.SignBitIsZero(N1) && 5.44k
DAG.SignBitIsZero(N0)3.70k
)
3043
9
      return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3044
3.03k
  } else {
3045
3.03k
    SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3046
3.03k
    if (
DAG.isKnownToBeAPowerOfTwo(N1)3.03k
) {
3047
36
      // fold (urem x, pow2) -> (and x, pow2-1)
3048
36
      SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3049
36
      AddToWorklist(Add.getNode());
3050
36
      return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3051
36
    }
3052
2.99k
    
if (2.99k
N1.getOpcode() == ISD::SHL &&
3053
2.99k
        
DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))6
) {
3054
6
      // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3055
6
      SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3056
6
      AddToWorklist(Add.getNode());
3057
6
      return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3058
6
    }
3059
8.42k
  }
3060
8.42k
3061
8.42k
  AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
3062
8.42k
3063
8.42k
  // If X/C can be simplified by the division-by-constant logic, lower
3064
8.42k
  // X%C to the equivalent of X-X/C*C.
3065
8.42k
  // To avoid mangling nodes, this simplification requires that the combine()
3066
8.42k
  // call for the speculative DIV must not cause a DIVREM conversion.  We guard
3067
8.42k
  // against this by skipping the simplification if isIntDivCheap().  When
3068
8.42k
  // div is not cheap, combine will not return a DIVREM.  Regardless,
3069
8.42k
  // checking cheapness here makes sense since the simplification results in
3070
8.42k
  // fatter code.
3071
8.42k
  if (
N1C && 8.42k
!N1C->isNullValue()1.74k
&&
!TLI.isIntDivCheap(VT, Attr)1.74k
) {
3072
1.68k
    unsigned DivOpcode = isSigned ? 
ISD::SDIV876
:
ISD::UDIV809
;
3073
1.68k
    SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
3074
1.68k
    AddToWorklist(Div.getNode());
3075
1.68k
    SDValue OptimizedDiv = combine(Div.getNode());
3076
1.68k
    if (
OptimizedDiv.getNode() && 1.68k
OptimizedDiv.getNode() != Div.getNode()1.52k
) {
3077
1.52k
      assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
3078
1.52k
             (OptimizedDiv.getOpcode() != ISD::SDIVREM));
3079
1.52k
      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3080
1.52k
      SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3081
1.52k
      AddToWorklist(Mul.getNode());
3082
1.52k
      return Sub;
3083
1.52k
    }
3084
6.89k
  }
3085
6.89k
3086
6.89k
  // sdiv, srem -> sdivrem
3087
6.89k
  
if (SDValue 6.89k
DivRem6.89k
= useDivRem(N))
3088
133
    return DivRem.getValue(1);
3089
6.76k
3090
6.76k
  return SDValue();
3091
6.76k
}
3092
3093
1.87k
SDValue DAGCombiner::visitMULHS(SDNode *N) {
3094
1.87k
  SDValue N0 = N->getOperand(0);
3095
1.87k
  SDValue N1 = N->getOperand(1);
3096
1.87k
  EVT VT = N->getValueType(0);
3097
1.87k
  SDLoc DL(N);
3098
1.87k
3099
1.87k
  // fold (mulhs x, 0) -> 0
3100
1.87k
  if (isNullConstant(N1))
3101
0
    return N1;
3102
1.87k
  // fold (mulhs x, 1) -> (sra x, size(x)-1)
3103
1.87k
  
if (1.87k
isOneConstant(N1)1.87k
) {
3104
0
    SDLoc DL(N);
3105
0
    return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3106
0
                       DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3107
0
                                       getShiftAmountTy(N0.getValueType())));
3108
0
  }
3109
1.87k
  // fold (mulhs x, undef) -> 0
3110
1.87k
  
if (1.87k
N0.isUndef() || 1.87k
N1.isUndef()1.87k
)
3111
0
    return DAG.getConstant(0, SDLoc(N), VT);
3112
1.87k
3113
1.87k
  // If the type twice as wide is legal, transform the mulhs to a wider multiply
3114
1.87k
  // plus a shift.
3115
1.87k
  
if (1.87k
VT.isSimple() && 1.87k
!VT.isVector()1.87k
) {
3116
1.60k
    MVT Simple = VT.getSimpleVT();
3117
1.60k
    unsigned SimpleSize = Simple.getSizeInBits();
3118
1.60k
    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3119
1.60k
    if (
TLI.isOperationLegal(ISD::MUL, NewVT)1.60k
) {
3120
918
      N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3121
918
      N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3122
918
      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3123
918
      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3124
918
            DAG.getConstant(SimpleSize, DL,
3125
918
                            getShiftAmountTy(N1.getValueType())));
3126
918
      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3127
918
    }
3128
957
  }
3129
957
3130
957
  return SDValue();
3131
957
}
3132
3133
15.8k
SDValue DAGCombiner::visitMULHU(SDNode *N) {
3134
15.8k
  SDValue N0 = N->getOperand(0);
3135
15.8k
  SDValue N1 = N->getOperand(1);
3136
15.8k
  EVT VT = N->getValueType(0);
3137
15.8k
  SDLoc DL(N);
3138
15.8k
3139
15.8k
  // fold (mulhu x, 0) -> 0
3140
15.8k
  if (isNullConstant(N1))
3141
0
    return N1;
3142
15.8k
  // fold (mulhu x, 1) -> 0
3143
15.8k
  
if (15.8k
isOneConstant(N1)15.8k
)
3144
0
    return DAG.getConstant(0, DL, N0.getValueType());
3145
15.8k
  // fold (mulhu x, undef) -> 0
3146
15.8k
  
if (15.8k
N0.isUndef() || 15.8k
N1.isUndef()15.8k
)
3147
0
    return DAG.getConstant(0, DL, VT);
3148
15.8k
3149
15.8k
  // If the type twice as wide is legal, transform the mulhu to a wider multiply
3150
15.8k
  // plus a shift.
3151
15.8k
  
if (15.8k
VT.isSimple() && 15.8k
!VT.isVector()15.8k
) {
3152
15.6k
    MVT Simple = VT.getSimpleVT();
3153
15.6k
    unsigned SimpleSize = Simple.getSizeInBits();
3154
15.6k
    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3155
15.6k
    if (
TLI.isOperationLegal(ISD::MUL, NewVT)15.6k
) {
3156
628
      N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3157
628
      N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3158
628
      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3159
628
      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3160
628
            DAG.getConstant(SimpleSize, DL,
3161
628
                            getShiftAmountTy(N1.getValueType())));
3162
628
      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3163
628
    }
3164
15.2k
  }
3165
15.2k
3166
15.2k
  return SDValue();
3167
15.2k
}
3168
3169
/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3170
/// give the opcodes for the two computations that are being performed. Return
3171
/// true if a simplification was made.
3172
SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3173
3.61k
                                                unsigned HiOp) {
3174
3.61k
  // If the high half is not needed, just compute the low half.
3175
3.61k
  bool HiExists = N->hasAnyUseOfValue(1);
3176
3.61k
  if (!HiExists &&
3177
51
      (!LegalOperations ||
3178
3.61k
       
TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0))7
)) {
3179
44
    SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3180
44
    return CombineTo(N, Res, Res);
3181
44
  }
3182
3.56k
3183
3.56k
  // If the low half is not needed, just compute the high half.
3184
3.56k
  bool LoExists = N->hasAnyUseOfValue(0);
3185
3.56k
  if (!LoExists &&
3186
1.42k
      (!LegalOperations ||
3187
3.56k
       
TLI.isOperationLegal(HiOp, N->getValueType(1))930
)) {
3188
494
    SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3189
494
    return CombineTo(N, Res, Res);
3190
494
  }
3191
3.07k
3192
3.07k
  // If both halves are used, return as it is.
3193
3.07k
  
if (3.07k
LoExists && 3.07k
HiExists2.14k
)
3194
2.13k
    return SDValue();
3195
937
3196
937
  // If the two computed results can be simplified separately, separate them.
3197
937
  
if (937
LoExists937
) {
3198
7
    SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3199
7
    AddToWorklist(Lo.getNode());
3200
7
    SDValue LoOpt = combine(Lo.getNode());
3201
7
    if (
LoOpt.getNode() && 7
LoOpt.getNode() != Lo.getNode()0
&&
3202
0
        (!LegalOperations ||
3203
0
         TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3204
0
      return CombineTo(N, LoOpt, LoOpt);
3205
937
  }
3206
937
3207
937
  
if (937
HiExists937
) {
3208
930
    SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3209
930
    AddToWorklist(Hi.getNode());
3210
930
    SDValue HiOpt = combine(Hi.getNode());
3211
930
    if (
HiOpt.getNode() && 930
HiOpt != Hi256
&&
3212
256
        (!LegalOperations ||
3213
256
         TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3214
256
      return CombineTo(N, HiOpt, HiOpt);
3215
681
  }
3216
681
3217
681
  return SDValue();
3218
681
}
3219
3220
647
SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3221
647
  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3222
255
    return Res;
3223
392
3224
392
  EVT VT = N->getValueType(0);
3225
392
  SDLoc DL(N);
3226
392
3227
392
  // If the type is twice as wide is legal, transform the mulhu to a wider
3228
392
  // multiply plus a shift.
3229
392
  if (
VT.isSimple() && 392
!VT.isVector()392
) {
3230
392
    MVT Simple = VT.getSimpleVT();
3231
392
    unsigned SimpleSize = Simple.getSizeInBits();
3232
392
    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3233
392
    if (
TLI.isOperationLegal(ISD::MUL, NewVT)392
) {
3234
0
      SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3235
0
      SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3236
0
      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3237
0
      // Compute the high part as N1.
3238
0
      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3239
0
            DAG.getConstant(SimpleSize, DL,
3240
0
                            getShiftAmountTy(Lo.getValueType())));
3241
0
      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3242
0
      // Compute the low part as N0.
3243
0
      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3244
0
      return CombineTo(N, Lo, Hi);
3245
0
    }
3246
392
  }
3247
392
3248
392
  return SDValue();
3249
392
}
3250
3251
2.96k
SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3252
2.96k
  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3253
539
    return Res;
3254
2.42k
3255
2.42k
  EVT VT = N->getValueType(0);
3256
2.42k
  SDLoc DL(N);
3257
2.42k
3258
2.42k
  // If the type is twice as wide is legal, transform the mulhu to a wider
3259
2.42k
  // multiply plus a shift.
3260
2.42k
  if (
VT.isSimple() && 2.42k
!VT.isVector()2.42k
) {
3261
2.42k
    MVT Simple = VT.getSimpleVT();
3262
2.42k
    unsigned SimpleSize = Simple.getSizeInBits();
3263
2.42k
    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3264
2.42k
    if (
TLI.isOperationLegal(ISD::MUL, NewVT)2.42k
) {
3265
0
      SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3266
0
      SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3267
0
      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3268
0
      // Compute the high part as N1.
3269
0
      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3270
0
            DAG.getConstant(SimpleSize, DL,
3271
0
                            getShiftAmountTy(Lo.getValueType())));
3272
0
      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3273
0
      // Compute the low part as N0.
3274
0
      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3275
0
      return CombineTo(N, Lo, Hi);
3276
0
    }
3277
2.42k
  }
3278
2.42k
3279
2.42k
  return SDValue();
3280
2.42k
}
3281
3282
82
SDValue DAGCombiner::visitSMULO(SDNode *N) {
3283
82
  // (smulo x, 2) -> (saddo x, x)
3284
82
  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3285
7
    
if (7
C2->getAPIntValue() == 27
)
3286
3
      return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3287
3
                         N->getOperand(0), N->getOperand(0));
3288
79
3289
79
  return SDValue();
3290
79
}
3291
3292
2.49k
SDValue DAGCombiner::visitUMULO(SDNode *N) {
3293
2.49k
  // (umulo x, 2) -> (uaddo x, x)
3294
2.49k
  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3295
2.42k
    
if (2.42k
C2->getAPIntValue() == 22.42k
)
3296
16
      return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3297
16
                         N->getOperand(0), N->getOperand(0));
3298
2.47k
3299
2.47k
  return SDValue();
3300
2.47k
}
3301
3302
12.6k
SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3303
12.6k
  SDValue N0 = N->getOperand(0);
3304
12.6k
  SDValue N1 = N->getOperand(1);
3305
12.6k
  EVT VT = N0.getValueType();
3306
12.6k
3307
12.6k
  // fold vector ops
3308
12.6k
  if (VT.isVector())
3309
8.53k
    
if (SDValue 8.53k
FoldedVOp8.53k
= SimplifyVBinOp(N))
3310
252
      return FoldedVOp;
3311
12.4k
3312
12.4k
  // fold operation with constant operands.
3313
12.4k
  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3314
12.4k
  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3315
12.4k
  if (
N0C && 12.4k
N1C0
)
3316
0
    return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3317
12.4k
3318
12.4k
  // canonicalize constant to RHS
3319
12.4k
  
if (12.4k
DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3320
10
     !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3321
10
    return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3322
12.4k
3323
12.4k
  return SDValue();
3324
12.4k
}
3325
3326
/// If this is a binary operator with two operands of the same opcode, try to
3327
/// simplify it.
3328
154k
SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3329
154k
  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3330
154k
  EVT VT = N0.getValueType();
3331
154k
  assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3332
154k
3333
154k
  // Bail early if none of these transforms apply.
3334
154k
  if (
N0.getNumOperands() == 0154k
)
return SDValue()11
;
3335
154k
3336
154k
  // For each of OP in AND/OR/XOR:
3337
154k
  // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3338
154k
  // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3339
154k
  // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3340
154k
  // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3341
154k
  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3342
154k
  //
3343
154k
  // do not sink logical op inside of a vector extend, since it may combine
3344
154k
  // into a vsetcc.
3345
154k
  EVT Op0VT = N0.getOperand(0).getValueType();
3346
154k
  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3347
153k
       N0.getOpcode() == ISD::SIGN_EXTEND ||
3348
153k
       N0.getOpcode() == ISD::BSWAP ||
3349
154k
       // Avoid infinite looping with PromoteIntBinOp.
3350
153k
       (N0.getOpcode() == ISD::ANY_EXTEND &&
3351
153k
        
(!LegalTypes || 223
TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT)223
)) ||
3352
153k
       (N0.getOpcode() == ISD::TRUNCATE &&
3353
2.23k
        (!TLI.isZExtFree(VT, Op0VT) ||
3354
2.23k
         !TLI.isTruncateFree(Op0VT, VT)) &&
3355
153k
        TLI.isTypeLegal(Op0VT))) &&
3356
1.61k
      !VT.isVector() &&
3357
1.34k
      Op0VT == N1.getOperand(0).getValueType() &&
3358
154k
      
(!LegalOperations || 1.21k
TLI.isOperationLegal(N->getOpcode(), Op0VT)88
)) {
3359
1.21k
    SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3360
1.21k
                                 N0.getOperand(0).getValueType(),
3361
1.21k
                                 N0.getOperand(0), N1.getOperand(0));
3362
1.21k
    AddToWorklist(ORNode.getNode());
3363
1.21k
    return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3364
1.21k
  }
3365
152k
3366
152k
  // For each of OP in SHL/SRL/SRA/AND...
3367
152k
  //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3368
152k
  //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
3369
152k
  //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3370
152k
  
if (152k
(N0.getOpcode() == ISD::SHL || 152k
N0.getOpcode() == ISD::SRL151k
||
3371
152k
       
N0.getOpcode() == ISD::SRA150k
||
N0.getOpcode() == ISD::AND150k
) &&
3372
152k
      
N0.getOperand(1) == N1.getOperand(1)19.8k
) {
3373
204
    SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3374
204
                                 N0.getOperand(0).getValueType(),
3375
204
                                 N0.getOperand(0), N1.getOperand(0));
3376
204
    AddToWorklist(ORNode.getNode());
3377
204
    return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3378
204
                       ORNode, N0.getOperand(1));
3379
204
  }
3380
152k
3381
152k
  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3382
152k
  // Only perform this optimization up until type legalization, before
3383
152k
  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3384
152k
  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3385
152k
  // we don't want to undo this promotion.
3386
152k
  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3387
152k
  // on scalars.
3388
152k
  
if (152k
(N0.getOpcode() == ISD::BITCAST ||
3389
133k
       N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3390
152k
       
Level <= AfterLegalizeTypes19.2k
) {
3391
581
    SDValue In0 = N0.getOperand(0);
3392
581
    SDValue In1 = N1.getOperand(0);
3393
581
    EVT In0Ty = In0.getValueType();
3394
581
    EVT In1Ty = In1.getValueType();
3395
581
    SDLoc DL(N);
3396
581
    // If both incoming values are integers, and the original types are the
3397
581
    // same.
3398
581
    if (
In0Ty.isInteger() && 581
In1Ty.isInteger()116
&&
In0Ty == In1Ty114
) {
3399
112
      SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3400
112
      SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3401
112
      AddToWorklist(Op.getNode());
3402
112
      return BC;
3403
112
    }
3404
152k
  }
3405
152k
3406
152k
  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3407
152k
  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3408
152k
  // If both shuffles use the same mask, and both shuffle within a single
3409
152k
  // vector, then it is worthwhile to move the swizzle after the operation.
3410
152k
  // The type-legalizer generates this pattern when loading illegal
3411
152k
  // vector types from memory. In many cases this allows additional shuffle
3412
152k
  // optimizations.
3413
152k
  // There are other cases where moving the shuffle after the xor/and/or
3414
152k
  // is profitable even if shuffles don't perform a swizzle.
3415
152k
  // If both shuffles use the same mask, and both shuffles have the same first
3416
152k
  // or second operand, then it might still be profitable to move the shuffle
3417
152k
  // after the xor/and/or operation.
3418
152k
  
if (152k
N0.getOpcode() == ISD::VECTOR_SHUFFLE && 152k
Level < AfterLegalizeDAG197
) {
3419
197
    ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3420
197
    ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3421
197
3422
197
    assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3423
197
           "Inputs to shuffles are not the same type");
3424
197
3425
197
    // Check that both shuffles use the same mask. The masks are known to be of
3426
197
    // the same length because the result vector type is the same.
3427
197
    // Check also that shuffles have only one use to avoid introducing extra
3428
197
    // instructions.
3429
197
    if (
SVN0->hasOneUse() && 197
SVN1->hasOneUse()197
&&
3430
197
        
SVN0->getMask().equals(SVN1->getMask())149
) {
3431
133
      SDValue ShOp = N0->getOperand(1);
3432
133
3433
133
      // Don't try to fold this node if it requires introducing a
3434
133
      // build vector of all zeros that might be illegal at this stage.
3435
133
      if (
N->getOpcode() == ISD::XOR && 133
!ShOp.isUndef()32
) {
3436
22
        if (!LegalTypes)
3437
20
          ShOp = DAG.getConstant(0, SDLoc(N), VT);
3438
22
        else
3439
2
          ShOp = SDValue();
3440
22
      }
3441
133
3442
133
      // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
3443
133
      // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
3444
133
      // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
3445
133
      if (
N0.getOperand(1) == N1.getOperand(1) && 133
ShOp.getNode()62
) {
3446
62
        SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3447
62
                                      N0->getOperand(0), N1->getOperand(0));
3448
62
        AddToWorklist(NewNode.getNode());
3449
62
        return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3450
62
                                    SVN0->getMask());
3451
62
      }
3452
71
3453
71
      // Don't try to fold this node if it requires introducing a
3454
71
      // build vector of all zeros that might be illegal at this stage.
3455
71
      ShOp = N0->getOperand(0);
3456
71
      if (
N->getOpcode() == ISD::XOR && 71
!ShOp.isUndef()12
) {
3457
12
        if (!LegalTypes)
3458
10
          ShOp = DAG.getConstant(0, SDLoc(N), VT);
3459
12
        else
3460
2
          ShOp = SDValue();
3461
12
      }
3462
71
3463
71
      // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
3464
71
      // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
3465
71
      // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
3466
71
      if (
N0->getOperand(0) == N1->getOperand(0) && 71
ShOp.getNode()30
) {
3467
30
        SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3468
30
                                      N0->getOperand(1), N1->getOperand(1));
3469
30
        AddToWorklist(NewNode.getNode());
3470
30
        return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3471
30
                                    SVN0->getMask());
3472
30
      }
3473
152k
    }
3474
197
  }
3475
152k
3476
152k
  return SDValue();
3477
152k
}
3478
3479
/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3480
SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3481
1.15M
                                       const SDLoc &DL) {
3482
1.15M
  SDValue LL, LR, RL, RR, N0CC, N1CC;
3483
1.15M
  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3484
322k
      !isSetCCEquivalent(N1, RL, RR, N1CC))
3485
1.12M
    return SDValue();
3486
36.0k
3487
1.15M
  assert(N0.getValueType() == N1.getValueType() &&
3488
36.0k
         "Unexpected operand types for bitwise logic op");
3489
36.0k
  assert(LL.getValueType() == LR.getValueType() &&
3490
36.0k
         RL.getValueType() == RR.getValueType() &&
3491
36.0k
         "Unexpected operand types for setcc");
3492
36.0k
3493
36.0k
  // If we're here post-legalization or the logic op type is not i1, the logic
3494
36.0k
  // op type must match a setcc result type. Also, all folds require new
3495
36.0k
  // operations on the left and right operands, so those types must match.
3496
36.0k
  EVT VT = N0.getValueType();
3497
36.0k
  EVT OpVT = LL.getValueType();
3498
36.0k
  if (
LegalOperations || 36.0k
VT != MVT::i135.6k
)
3499
13.1k
    
if (13.1k
VT != getSetCCResultType(OpVT)13.1k
)
3500
414
      return SDValue();
3501
35.6k
  
if (35.6k
OpVT != RL.getValueType()35.6k
)
3502
11.8k
    return SDValue();
3503
23.7k
3504
23.7k
  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3505
23.7k
  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3506
23.7k
  bool IsInteger = OpVT.isInteger();
3507
23.7k
  if (
LR == RR && 23.7k
CC0 == CC112.0k
&&
IsInteger11.4k
) {
3508
11.1k
    bool IsZero = isNullConstantOrNullSplatConstant(LR);
3509
11.1k
    bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3510
11.1k
3511
11.1k
    // All bits clear?
3512
11.1k
    bool AndEqZero = IsAnd && 
CC1 == ISD::SETEQ10.6k
&&
IsZero10.4k
;
3513
11.1k
    // All sign bits clear?
3514
11.1k
    bool AndGtNeg1 = IsAnd && 
CC1 == ISD::SETGT10.6k
&&
IsNeg147
;
3515
11.1k
    // Any bits set?
3516
11.1k
    bool OrNeZero = !IsAnd && 
CC1 == ISD::SETNE504
&&
IsZero65
;
3517
11.1k
    // Any sign bits set?
3518
11.1k
    bool OrLtZero = !IsAnd && 
CC1 == ISD::SETLT504
&&
IsZero37
;
3519
11.1k
3520
11.1k
    // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
3521
11.1k
    // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3522
11.1k
    // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
3523
11.1k
    // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
3524
11.1k
    if (
AndEqZero || 11.1k
AndGtNeg1719
||
OrNeZero712
||
OrLtZero656
) {
3525
10.5k
      SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3526
10.5k
      AddToWorklist(Or.getNode());
3527
10.5k
      return DAG.getSetCC(DL, VT, Or, LR, CC1);
3528
10.5k
    }
3529
649
3530
649
    // All bits set?
3531
649
    
bool AndEqNeg1 = IsAnd && 649
CC1 == ISD::SETEQ208
&&
IsNeg119
;
3532
649
    // All sign bits set?
3533
649
    bool AndLtZero = IsAnd && 
CC1 == ISD::SETLT208
&&
IsZero13
;
3534
649
    // Any bits clear?
3535
649
    bool OrNeNeg1 = !IsAnd && 
CC1 == ISD::SETNE441
&&
IsNeg19
;
3536
649
    // Any sign bits clear?
3537
649
    bool OrGtNeg1 = !IsAnd && 
CC1 == ISD::SETGT441
&&
IsNeg177
;
3538
649
3539
649
    // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3540
649
    // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
3541
649
    // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3542
649
    // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
3543
649
    if (
AndEqNeg1 || 649
AndLtZero642
||
OrNeNeg1637
||
OrGtNeg1632
) {
3544
22
      SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3545
22
      AddToWorklist(And.getNode());
3546
22
      return DAG.getSetCC(DL, VT, And, LR, CC1);
3547
22
    }
3548
13.2k
  }
3549
13.2k
3550
13.2k
  // TODO: What is the 'or' equivalent of this fold?
3551
13.2k
  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3552
13.2k
  
if (13.2k
IsAnd && 13.2k
LL == RL10.9k
&&
CC0 == CC1427
&&
IsInteger147
&&
CC0 == ISD::SETNE135
&&
3553
102
      
((isNullConstant(LR) && 102
isAllOnesConstant(RR)4
) ||
3554
13.2k
       
(isAllOnesConstant(LR) && 102
isNullConstant(RR)3
))) {
3555
3
    SDValue One = DAG.getConstant(1, DL, OpVT);
3556
3
    SDValue Two = DAG.getConstant(2, DL, OpVT);
3557
3
    SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3558
3
    AddToWorklist(Add.getNode());
3559
3
    return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3560
3
  }
3561
13.2k
3562
13.2k
  // Try more general transforms if the predicates match and the only user of
3563
13.2k
  // the compares is the 'and' or 'or'.
3564
13.2k
  
if (13.2k
IsInteger && 13.2k
TLI.convertSetCCLogicToBitwiseLogic(OpVT)12.2k
&&
CC0 == CC1583
&&
3565
13.2k
      
N0.hasOneUse()295
&&
N1.hasOneUse()279
) {
3566
279
    // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3567
279
    // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3568
279
    if (
(IsAnd && 279
CC1 == ISD::SETEQ34
) ||
(!IsAnd && 272
CC1 == ISD::SETNE245
)) {
3569
75
      SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3570
75
      SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3571
75
      SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3572
75
      SDValue Zero = DAG.getConstant(0, DL, OpVT);
3573
75
      return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3574
75
    }
3575
13.1k
  }
3576
13.1k
3577
13.1k
  // Canonicalize equivalent operands to LL == RL.
3578
13.1k
  
if (13.1k
LL == RR && 13.1k
LR == RL88
) {
3579
0
    CC1 = ISD::getSetCCSwappedOperands(CC1);
3580
0
    std::swap(RL, RR);
3581
0
  }
3582
13.1k
3583
13.1k
  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3584
13.1k
  // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3585
13.1k
  if (
LL == RL && 13.1k
LR == RR829
) {
3586
27
    ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3587
37
                                : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3588
64
    if (NewCC != ISD::SETCC_INVALID &&
3589
59
        (!LegalOperations ||
3590
27
         (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3591
27
          TLI.isOperationLegal(ISD::SETCC, OpVT))))
3592
32
      return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3593
13.1k
  }
3594
13.1k
3595
13.1k
  return SDValue();
3596
13.1k
}
3597
3598
/// This contains all DAGCombine rules which reduce two values combined by
3599
/// an And operation to a single value. This makes them reusable in the context
3600
/// of visitSELECT(). Rules involving constants are not included as
3601
/// visitSELECT() already handles those cases.
3602
903k
SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3603
903k
  EVT VT = N1.getValueType();
3604
903k
  SDLoc DL(N);
3605
903k
3606
903k
  // fold (and x, undef) -> 0
3607
903k
  if (
N0.isUndef() || 903k
N1.isUndef()903k
)
3608
1
    return DAG.getConstant(0, DL, VT);
3609
903k
3610
903k
  
if (SDValue 903k
V903k
= foldLogicOfSetCCs(true, N0, N1, DL))
3611
10.4k
    return V;
3612
892k
3613
892k
  
if (892k
N0.getOpcode() == ISD::ADD && 892k
N1.getOpcode() == ISD::SRL28.7k
&&
3614
892k
      
VT.getSizeInBits() <= 6425
) {
3615
25
    if (ConstantSDNode *
ADDI25
= dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3616
25
      APInt ADDC = ADDI->getAPIntValue();
3617
25
      if (
!TLI.isLegalAddImmediate(ADDC.getSExtValue())25
) {
3618
6
        // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3619
6
        // immediate for an add, but it is legal if its top c2 bits are set,
3620
6
        // transform the ADD so the immediate doesn't need to be materialized
3621
6
        // in a register.
3622
6
        if (ConstantSDNode *
SRLI6
= dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3623
6
          APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3624
6
                                             SRLI->getZExtValue());
3625
6
          if (
DAG.MaskedValueIsZero(N0.getOperand(1), Mask)6
) {
3626
6
            ADDC |= Mask;
3627
6
            if (
TLI.isLegalAddImmediate(ADDC.getSExtValue())6
) {
3628
6
              SDLoc DL0(N0);
3629
6
              SDValue NewAdd =
3630
6
                DAG.getNode(ISD::ADD, DL0, VT,
3631
6
                            N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3632
6
              CombineTo(N0.getNode(), NewAdd);
3633
6
              // Return N so it doesn't get rechecked!
3634
6
              return SDValue(N, 0);
3635
6
            }
3636
892k
          }
3637
6
        }
3638
6
      }
3639
25
    }
3640
25
  }
3641
892k
3642
892k
  // Reduce bit extract of low half of an integer to the narrower type.
3643
892k
  // (and (srl i64:x, K), KMask) ->
3644
892k
  //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3645
892k
  
if (892k
N0.getOpcode() == ISD::SRL && 892k
N0.hasOneUse()44.7k
) {
3646
38.0k
    if (ConstantSDNode *
CAnd38.0k
= dyn_cast<ConstantSDNode>(N1)) {
3647
36.8k
      if (ConstantSDNode *
CShift36.8k
= dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3648
33.3k
        unsigned Size = VT.getSizeInBits();
3649
33.3k
        const APInt &AndMask = CAnd->getAPIntValue();
3650
33.3k
        unsigned ShiftBits = CShift->getZExtValue();
3651
33.3k
3652
33.3k
        // Bail out, this node will probably disappear anyway.
3653
33.3k
        if (ShiftBits == 0)
3654
2
          return SDValue();
3655
33.3k
3656
33.3k
        unsigned MaskBits = AndMask.countTrailingOnes();
3657
33.3k
        EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3658
33.3k
3659
33.3k
        if (AndMask.isMask() &&
3660
33.3k
            // Required bits must not span the two halves of the integer and
3661
33.3k
            // must fit in the half size type.
3662
29.9k
            (ShiftBits + MaskBits <= Size / 2) &&
3663
21.4k
            TLI.isNarrowingProfitable(VT, HalfVT) &&
3664
264
            TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3665
232
            TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3666
232
            TLI.isTruncateFree(VT, HalfVT) &&
3667
33.3k
            
TLI.isZExtFree(HalfVT, VT)232
) {
3668
195
          // The isNarrowingProfitable is to avoid regressions on PPC and
3669
195
          // AArch64 which match a few 64-bit bit insert / bit extract patterns
3670
195
          // on downstream users of this. Those patterns could probably be
3671
195
          // extended to handle extensions mixed in.
3672
195
3673
195
          SDValue SL(N0);
3674
195
          assert(MaskBits <= Size);
3675
195
3676
195
          // Extracting the highest bit of the low half.
3677
195
          EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3678
195
          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3679
195
                                      N0.getOperand(0));
3680
195
3681
195
          SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3682
195
          SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3683
195
          SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3684
195
          SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3685
195
          return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3686
195
        }
3687
892k
      }
3688
36.8k
    }
3689
38.0k
  }
3690
892k
3691
892k
  return SDValue();
3692
892k
}
3693
3694
bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3695
                                   EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
3696
42.8k
                                   bool &NarrowLoad) {
3697
42.8k
  if (!AndC->getAPIntValue().isMask())
3698
24.7k
    return false;
3699
18.1k
3700
18.1k
  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
3701
18.1k
3702
18.1k
  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3703
18.1k
  LoadedVT = LoadN->getMemoryVT();
3704
18.1k
3705
18.1k
  if (ExtVT == LoadedVT &&
3706
988
      (!LegalOperations ||
3707
18.1k
       
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)826
)) {
3708
162
    // ZEXTLOAD will match without needing to change the size of the value being
3709
162
    // loaded.
3710
162
    NarrowLoad = false;
3711
162
    return true;
3712
162
  }
3713
18.0k
3714
18.0k
  // Do not change the width of a volatile load.
3715
18.0k
  
if (18.0k
LoadN->isVolatile()18.0k
)
3716
136
    return false;
3717
17.8k
3718
17.8k
  // Do not generate loads of non-round integer types since these can
3719
17.8k
  // be expensive (and would be wrong if the type is not byte sized).
3720
17.8k
  
if (17.8k
!LoadedVT.bitsGT(ExtVT) || 17.8k
!ExtVT.isRound()17.0k
)
3721
11.0k
    return false;
3722
6.79k
3723
6.79k
  
if (6.79k
LegalOperations &&
3724
6.51k
      !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3725
3.62k
    return false;
3726
3.16k
3727
3.16k
  
if (3.16k
!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)3.16k
)
3728
229
    return false;
3729
2.93k
3730
2.93k
  NarrowLoad = true;
3731
2.93k
  return true;
3732
2.93k
}
3733
3734
1.11M
SDValue DAGCombiner::visitAND(SDNode *N) {
3735
1.11M
  SDValue N0 = N->getOperand(0);
3736
1.11M
  SDValue N1 = N->getOperand(1);
3737
1.11M
  EVT VT = N1.getValueType();
3738
1.11M
3739
1.11M
  // x & x --> x
3740
1.11M
  if (N0 == N1)
3741
12
    return N0;
3742
1.11M
3743
1.11M
  // fold vector ops
3744
1.11M
  
if (1.11M
VT.isVector()1.11M
) {
3745
74.9k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
3746
973
      return FoldedVOp;
3747
74.0k
3748
74.0k
    // fold (and x, 0) -> 0, vector edition
3749
74.0k
    
if (74.0k
ISD::isBuildVectorAllZeros(N0.getNode())74.0k
)
3750
74.0k
      // do not return N0, because undef node may exist in N0
3751
54
      return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
3752
54
                             SDLoc(N), N0.getValueType());
3753
73.9k
    
if (73.9k
ISD::isBuildVectorAllZeros(N1.getNode())73.9k
)
3754
73.9k
      // do not return N1, because undef node may exist in N1
3755
1
      return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
3756
1
                             SDLoc(N), N1.getValueType());
3757
73.9k
3758
73.9k
    // fold (and x, -1) -> x, vector edition
3759
73.9k
    
if (73.9k
ISD::isBuildVectorAllOnes(N0.getNode())73.9k
)
3760
30
      return N1;
3761
73.9k
    
if (73.9k
ISD::isBuildVectorAllOnes(N1.getNode())73.9k
)
3762
1
      return N0;
3763
1.11M
  }
3764
1.11M
3765
1.11M
  // fold (and c1, c2) -> c1&c2
3766
1.11M
  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3767
1.11M
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
3768
1.11M
  if (
N0C && 1.11M
N1C24
&&
!N1C->isOpaque()18
)
3769
18
    return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3770
1.11M
  // canonicalize constant to RHS
3771
1.11M
  
if (1.11M
DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3772
1.34k
     !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3773
1.34k
    return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3774
1.11M
  // fold (and x, -1) -> x
3775
1.11M
  
if (1.11M
isAllOnesConstant(N1)1.11M
)
3776
8
    return N0;
3777
1.11M
  // if (and x, c) is known to be zero, return 0
3778
1.11M
  unsigned BitWidth = VT.getScalarSizeInBits();
3779
1.11M
  if (
N1C && 1.11M
DAG.MaskedValueIsZero(SDValue(N, 0),
3780
962k
                                   APInt::getAllOnesValue(BitWidth)))
3781
29
    return DAG.getConstant(0, SDLoc(N), VT);
3782
1.11M
3783
1.11M
  
if (SDValue 1.11M
NewSel1.11M
= foldBinOpIntoSelect(N))
3784
73
    return NewSel;
3785
1.11M
3786
1.11M
  // reassociate and
3787
1.11M
  
if (SDValue 1.11M
RAND1.11M
= ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
3788
3.15k
    return RAND;
3789
1.10M
  // fold (and (or x, C), D) -> D if (C & D) == D
3790
1.10M
  
if (1.10M
N1C && 1.10M
N0.getOpcode() == ISD::OR959k
)
3791
13.2k
    
if (ConstantSDNode *13.2k
ORI13.2k
= isConstOrConstSplat(N0.getOperand(1)))
3792
5.86k
      
if (5.86k
N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue())5.86k
)
3793
2.25k
        return N1;
3794
1.10M
  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
3795
1.10M
  
if (1.10M
N1C && 1.10M
N0.getOpcode() == ISD::ANY_EXTEND957k
) {
3796
20.9k
    SDValue N0Op0 = N0.getOperand(0);
3797
20.9k
    APInt Mask = ~N1C->getAPIntValue();
3798
20.9k
    Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
3799
20.9k
    if (
DAG.MaskedValueIsZero(N0Op0, Mask)20.9k
) {
3800
9.61k
      SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
3801
9.61k
                                 N0.getValueType(), N0Op0);
3802
9.61k
3803
9.61k
      // Replace uses of the AND with uses of the Zero extend node.
3804
9.61k
      CombineTo(N, Zext);
3805
9.61k
3806
9.61k
      // We actually want to replace all uses of the any_extend with the
3807
9.61k
      // zero_extend, to avoid duplicating things.  This will later cause this
3808
9.61k
      // AND to be folded.
3809
9.61k
      CombineTo(N0.getNode(), Zext);
3810
9.61k
      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3811
9.61k
    }
3812
1.09M
  }
3813
1.09M
  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
3814
1.09M
  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
3815
1.09M
  // already be zero by virtue of the width of the base type of the load.
3816
1.09M
  //
3817
1.09M
  // the 'X' node here can either be nothing or an extract_vector_elt to catch
3818
1.09M
  // more cases.
3819
1.09M
  
if (1.09M
(N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3820
9.30k
       N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
3821
7.60k
       N0.getOperand(0).getOpcode() == ISD::LOAD &&
3822
2.88k
       N0.getOperand(0).getResNo() == 0) ||
3823
1.09M
      
(N0.getOpcode() == ISD::LOAD && 1.09M
N0.getResNo() == 0278k
)) {
3824
281k
    LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
3825
281k
                                         
N0278k
:
N0.getOperand(0)2.88k
);
3826
281k
3827
281k
    // Get the constant (if applicable) the zero'th operand is being ANDed with.
3828
281k
    // This can be a pure constant or a vector splat, in which case we treat the
3829
281k
    // vector as a scalar and use the splat value.
3830
281k
    APInt Constant = APInt::getNullValue(1);
3831
281k
    if (const ConstantSDNode *
C281k
= dyn_cast<ConstantSDNode>(N1)) {
3832
264k
      Constant = C->getAPIntValue();
3833
281k
    } else 
if (BuildVectorSDNode *16.5k
Vector16.5k
= dyn_cast<BuildVectorSDNode>(N1)) {
3834
3.39k
      APInt SplatValue, SplatUndef;
3835
3.39k
      unsigned SplatBitSize;
3836
3.39k
      bool HasAnyUndefs;
3837
3.39k
      bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
3838
3.39k
                                             SplatBitSize, HasAnyUndefs);
3839
3.39k
      if (
IsSplat3.39k
) {
3840
3.39k
        // Undef bits can contribute to a possible optimisation if set, so
3841
3.39k
        // set them.
3842
3.39k
        SplatValue |= SplatUndef;
3843
3.39k
3844
3.39k
        // The splat value may be something like "0x00FFFFFF", which means 0 for
3845
3.39k
        // the first vector value and FF for the rest, repeating. We need a mask
3846
3.39k
        // that will apply equally to all members of the vector, so AND all the
3847
3.39k
        // lanes of the constant together.
3848
3.39k
        EVT VT = Vector->getValueType(0);
3849
3.39k
        unsigned BitWidth = VT.getScalarSizeInBits();
3850
3.39k
3851
3.39k
        // If the splat value has been compressed to a bitlength lower
3852
3.39k
        // than the size of the vector lane, we need to re-expand it to
3853
3.39k
        // the lane size.
3854
3.39k
        if (BitWidth > SplatBitSize)
3855
1
          for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
3856
2
               SplatBitSize < BitWidth;
3857
1
               SplatBitSize = SplatBitSize * 2)
3858
1
            SplatValue |= SplatValue.shl(SplatBitSize);
3859
3.39k
3860
3.39k
        // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
3861
3.39k
        // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
3862
3.39k
        if (
SplatBitSize % BitWidth == 03.39k
) {
3863
3.39k
          Constant = APInt::getAllOnesValue(BitWidth);
3864
6.84k
          for (unsigned i = 0, n = SplatBitSize/BitWidth; 
i < n6.84k
;
++i3.45k
)
3865
3.45k
            Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
3866
3.39k
        }
3867
3.39k
      }
3868
16.5k
    }
3869
281k
3870
281k
    // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
3871
281k
    // actually legal and isn't going to get expanded, else this is a false
3872
281k
    // optimisation.
3873
281k
    bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
3874
281k
                                                    Load->getValueType(0),
3875
281k
                                                    Load->getMemoryVT());
3876
281k
3877
281k
    // Resize the constant to the same size as the original memory access before
3878
281k
    // extension. If it is still the AllOnesValue then this AND is completely
3879
281k
    // unneeded.
3880
281k
    Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
3881
281k
3882
281k
    bool B;
3883
281k
    switch (Load->getExtensionType()) {
3884
2.23k
    default: B = false; break;
3885
191k
    case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
3886
87.4k
    case ISD::ZEXTLOAD:
3887
87.4k
    case ISD::NON_EXTLOAD: B = true; break;
3888
281k
    }
3889
281k
3890
281k
    
if (281k
B && 281k
Constant.isAllOnesValue()274k
) {
3891
190k
      // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
3892
190k
      // preserve semantics once we get rid of the AND.
3893
190k
      SDValue NewLoad(Load, 0);
3894
190k
3895
190k
      // Fold the AND away. NewLoad may get replaced immediately.
3896
190k
      CombineTo(N, (N0.getNode() == Load) ? 
NewLoad190k
:
N01
);
3897
190k
3898
190k
      if (
Load->getExtensionType() == ISD::EXTLOAD190k
) {
3899
181k
        NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
3900
181k
                              Load->getValueType(0), SDLoc(Load),
3901
181k
                              Load->getChain(), Load->getBasePtr(),
3902
181k
                              Load->getOffset(), Load->getMemoryVT(),
3903
181k
                              Load->getMemOperand());
3904
181k
        // Replace uses of the EXTLOAD with the new ZEXTLOAD.
3905
181k
        if (
Load->getNumValues() == 3181k
) {
3906
0
          // PRE/POST_INC loads have 3 values.
3907
0
          SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
3908
0
                           NewLoad.getValue(2) };
3909
0
          CombineTo(Load, To, 3, true);
3910
181k
        } else {
3911
181k
          CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
3912
181k
        }
3913
181k
      }
3914
190k
3915
190k
      return SDValue(N, 0); // Return N so it doesn't get rechecked!
3916
190k
    }
3917
905k
  }
3918
905k
3919
905k
  // fold (and (load x), 255) -> (zextload x, i8)
3920
905k
  // fold (and (extload x, i16), 255) -> (zextload x, i8)
3921
905k
  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
3922
905k
  
if (905k
!VT.isVector() && 905k
N1C834k
&& (N0.getOpcode() == ISD::LOAD ||
3923
663k
                                (N0.getOpcode() == ISD::ANY_EXTEND &&
3924
905k
                                 
N0.getOperand(0).getOpcode() == ISD::LOAD11.0k
))) {
3925
72.9k
    bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
3926
72.9k
    LoadSDNode *LN0 = HasAnyExt
3927
264
      ? cast<LoadSDNode>(N0.getOperand(0))
3928
72.6k
      : cast<LoadSDNode>(N0);
3929
72.9k
    if (LN0->getExtensionType() != ISD::SEXTLOAD &&
3930
72.9k
        
LN0->isUnindexed()70.5k
&&
N0.hasOneUse()70.5k
&&
SDValue(LN0, 0).hasOneUse()42.7k
) {
3931
42.6k
      auto NarrowLoad = false;
3932
42.6k
      EVT LoadResultTy = HasAnyExt ? 
LN0->getValueType(0)30
:
VT42.6k
;
3933
42.6k
      EVT ExtVT, LoadedVT;
3934
42.6k
      if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
3935
42.6k
                           NarrowLoad)) {
3936
3.08k
        if (
!NarrowLoad3.08k
) {
3937
160
          SDValue NewLoad =
3938
160
            DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3939
160
                           LN0->getChain(), LN0->getBasePtr(), ExtVT,
3940
160
                           LN0->getMemOperand());
3941
160
          AddToWorklist(N);
3942
160
          CombineTo(LN0, NewLoad, NewLoad.getValue(1));
3943
160
          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3944
0
        } else {
3945
2.92k
          EVT PtrType = LN0->getOperand(1).getValueType();
3946
2.92k
3947
2.92k
          unsigned Alignment = LN0->getAlignment();
3948
2.92k
          SDValue NewPtr = LN0->getBasePtr();
3949
2.92k
3950
2.92k
          // For big endian targets, we need to add an offset to the pointer
3951
2.92k
          // to load the correct bytes.  For little endian systems, we merely
3952
2.92k
          // need to read fewer bytes from the same pointer.
3953
2.92k
          if (
DAG.getDataLayout().isBigEndian()2.92k
) {
3954
4
            unsigned LVTStoreBytes = LoadedVT.getStoreSize();
3955
4
            unsigned EVTStoreBytes = ExtVT.getStoreSize();
3956
4
            unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
3957
4
            SDLoc DL(LN0);
3958
4
            NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
3959
4
                                 NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
3960
4
            Alignment = MinAlign(Alignment, PtrOff);
3961
4
          }
3962
2.92k
3963
2.92k
          AddToWorklist(NewPtr.getNode());
3964
2.92k
3965
2.92k
          SDValue Load = DAG.getExtLoad(
3966
2.92k
              ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
3967
2.92k
              LN0->getPointerInfo(), ExtVT, Alignment,
3968
2.92k
              LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
3969
2.92k
          AddToWorklist(N);
3970
2.92k
          CombineTo(LN0, Load, Load.getValue(1));
3971
2.92k
          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3972
2.92k
        }
3973
902k
      }
3974
42.6k
    }
3975
72.9k
  }
3976
902k
3977
902k
  
if (SDValue 902k
Combined902k
= visitANDLike(N0, N1, N))
3978
10.6k
    return Combined;
3979
892k
3980
892k
  // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
3981
892k
  
if (892k
N0.getOpcode() == N1.getOpcode()892k
)
3982
63.1k
    
if (SDValue 63.1k
Tmp63.1k
= SimplifyBinOpWithSameOpcodeHands(N))
3983
311
      return Tmp;
3984
891k
3985
891k
  // Masking the negated extension of a boolean is just the zero-extended
3986
891k
  // boolean:
3987
891k
  // and (sub 0, zext(bool X)), 1 --> zext(bool X)
3988
891k
  // and (sub 0, sext(bool X)), 1 --> zext(bool X)
3989
891k
  //
3990
891k
  // Note: the SimplifyDemandedBits fold below can make an information-losing
3991
891k
  // transform, and then we have no way to find this better fold.
3992
891k
  
if (891k
N1C && 891k
N1C->isOne()753k
&&
N0.getOpcode() == ISD::SUB404k
) {
3993
50
    if (
isNullConstantOrNullSplatConstant(N0.getOperand(0))50
) {
3994
8
      SDValue SubRHS = N0.getOperand(1);
3995
8
      if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
3996
3
          SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3997
3
        return SubRHS;
3998
5
      
if (5
SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
3999
3
          SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4000
3
        return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4001
891k
    }
4002
50
  }
4003
891k
4004
891k
  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4005
891k
  // fold (and (sra)) -> (and (srl)) when possible.
4006
891k
  
if (891k
SimplifyDemandedBits(SDValue(N, 0))891k
)
4007
388k
    return SDValue(N, 0);
4008
503k
4009
503k
  // fold (zext_inreg (extload x)) -> (zextload x)
4010
503k
  
if (503k
ISD::isEXTLoad(N0.getNode()) && 503k
ISD::isUNINDEXEDLoad(N0.getNode())9.17k
) {
4011
9.17k
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4012
9.17k
    EVT MemVT = LN0->getMemoryVT();
4013
9.17k
    // If we zero all the possible extended bits, then we can turn this into
4014
9.17k
    // a zextload if we are running before legalize or the operation is legal.
4015
9.17k
    unsigned BitWidth = N1.getScalarValueSizeInBits();
4016
9.17k
    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4017
9.17k
                           BitWidth - MemVT.getScalarSizeInBits())) &&
4018
8.72k
        
((!LegalOperations && 8.72k
!LN0->isVolatile()6.67k
) ||
4019
9.17k
         
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)2.05k
)) {
4020
6.86k
      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4021
6.86k
                                       LN0->getChain(), LN0->getBasePtr(),
4022
6.86k
                                       MemVT, LN0->getMemOperand());
4023
6.86k
      AddToWorklist(N);
4024
6.86k
      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4025
6.86k
      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4026
6.86k
    }
4027
496k
  }
4028
496k
  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4029
496k
  
if (496k
ISD::isSEXTLoad(N0.getNode()) && 496k
ISD::isUNINDEXEDLoad(N0.getNode())2.23k
&&
4030
496k
      
N0.hasOneUse()2.23k
) {
4031
366
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4032
366
    EVT MemVT = LN0->getMemoryVT();
4033
366
    // If we zero all the possible extended bits, then we can turn this into
4034
366
    // a zextload if we are running before legalize or the operation is legal.
4035
366
    unsigned BitWidth = N1.getScalarValueSizeInBits();
4036
366
    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4037
366
                           BitWidth - MemVT.getScalarSizeInBits())) &&
4038
234
        
((!LegalOperations && 234
!LN0->isVolatile()8
) ||
4039
366
         
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)226
)) {
4040
218
      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4041
218
                                       LN0->getChain(), LN0->getBasePtr(),
4042
218
                                       MemVT, LN0->getMemOperand());
4043
218
      AddToWorklist(N);
4044
218
      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4045
218
      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4046
218
    }
4047
495k
  }
4048
495k
  // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4049
495k
  
if (495k
N1C && 495k
N1C->getAPIntValue() == 0xffff360k
&&
N0.getOpcode() == ISD::OR34.8k
) {
4050
509
    if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4051
509
                                           N0.getOperand(1), false))
4052
3
      return BSwap;
4053
495k
  }
4054
495k
4055
495k
  return SDValue();
4056
495k
}
4057
4058
/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4059
SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4060
260k
                                        bool DemandHighBits) {
4061
260k
  if (!LegalOperations)
4062
140k
    return SDValue();
4063
120k
4064
120k
  EVT VT = N->getValueType(0);
4065
120k
  if (
VT != MVT::i64 && 120k
VT != MVT::i3288.8k
&&
VT != MVT::i1618.2k
)
4066
17.6k
    return SDValue();
4067
102k
  
if (102k
!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)102k
)
4068
12.8k
    return SDValue();
4069
89.7k
4070
89.7k
  // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4071
89.7k
  bool LookPassAnd0 = false;
4072
89.7k
  bool LookPassAnd1 = false;
4073
89.7k
  if (
N0.getOpcode() == ISD::AND && 89.7k
N0.getOperand(0).getOpcode() == ISD::SRL7.70k
)
4074
1.11k
      std::swap(N0, N1);
4075
89.7k
  if (
N1.getOpcode() == ISD::AND && 89.7k
N1.getOperand(0).getOpcode() == ISD::SHL6.68k
)
4076
500
      std::swap(N0, N1);
4077
89.7k
  if (
N0.getOpcode() == ISD::AND89.7k
) {
4078
7.16k
    if (!N0.getNode()->hasOneUse())
4079
993
      return SDValue();
4080
6.16k
    ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4081
6.16k
    if (
!N01C || 6.16k
N01C->getZExtValue() != 0xFF005.75k
)
4082
6.08k
      return SDValue();
4083
85
    N0 = N0.getOperand(0);
4084
85
    LookPassAnd0 = true;
4085
85
  }
4086
89.7k
4087
82.6k
  
if (82.6k
N1.getOpcode() == ISD::AND82.6k
) {
4088
3.80k
    if (!N1.getNode()->hasOneUse())
4089
131
      return SDValue();
4090
3.67k
    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4091
3.67k
    if (
!N11C || 3.67k
N11C->getZExtValue() != 0xFF3.41k
)
4092
3.52k
      return SDValue();
4093
144
    N1 = N1.getOperand(0);
4094
144
    LookPassAnd1 = true;
4095
144
  }
4096
82.6k
4097
78.9k
  
if (78.9k
N0.getOpcode() == ISD::SRL && 78.9k
N1.getOpcode() == ISD::SHL2.42k
)
4098
1.60k
    std::swap(N0, N1);
4099
78.9k
  if (
N0.getOpcode() != ISD::SHL || 78.9k
N1.getOpcode() != ISD::SRL15.1k
)
4100
76.3k
    return SDValue();
4101
2.66k
  
if (2.66k
!N0.getNode()->hasOneUse() || 2.66k
!N1.getNode()->hasOneUse()2.66k
)
4102
119
    return SDValue();
4103
2.54k
4104
2.54k
  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4105
2.54k
  ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4106
2.54k
  if (
!N01C || 2.54k
!N11C1.50k
)
4107
1.07k
    return SDValue();
4108
1.46k
  
if (1.46k
N01C->getZExtValue() != 8 || 1.46k
N11C->getZExtValue() != 842
)
4109
1.43k
    return SDValue();
4110
34
4111
34
  // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4112
34
  SDValue N00 = N0->getOperand(0);
4113
34
  if (
!LookPassAnd0 && 34
N00.getOpcode() == ISD::AND21
) {
4114
6
    if (!N00.getNode()->hasOneUse())
4115
0
      return SDValue();
4116
6
    ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4117
6
    if (
!N001C || 6
N001C->getZExtValue() != 0xFF6
)
4118
2
      return SDValue();
4119
4
    N00 = N00.getOperand(0);
4120
4
    LookPassAnd0 = true;
4121
4
  }
4122
34
4123
32
  SDValue N10 = N1->getOperand(0);
4124
32
  if (
!LookPassAnd1 && 32
N10.getOpcode() == ISD::AND24
) {
4125
5
    if (!N10.getNode()->hasOneUse())
4126
0
      return SDValue();
4127
5
    ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4128
5
    if (
!N101C || 5
N101C->getZExtValue() != 0xFF005
)
4129
0
      return SDValue();
4130
5
    N10 = N10.getOperand(0);
4131
5
    LookPassAnd1 = true;
4132
5
  }
4133
32
4134
32
  
if (32
N00 != N1032
)
4135
0
    return SDValue();
4136
32
4137
32
  // Make sure everything beyond the low halfword gets set to zero since the SRL
4138
32
  // 16 will clear the top bits.
4139
32
  unsigned OpSizeInBits = VT.getSizeInBits();
4140
32
  if (
DemandHighBits && 32
OpSizeInBits > 1621
) {
4141
21
    // If the left-shift isn't masked out then the only way this is a bswap is
4142
21
    // if all bits beyond the low 8 are 0. In that case the entire pattern
4143
21
    // reduces to a left shift anyway: leave it for other parts of the combiner.
4144
21
    if (!LookPassAnd0)
4145
4
      return SDValue();
4146
17
4147
17
    // However, if the right shift isn't masked out then it might be because
4148
17
    // it's not needed. See if we can spot that too.
4149
17
    
if (17
!LookPassAnd1 &&
4150
10
        !DAG.MaskedValueIsZero(
4151
10
            N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4152
0
      return SDValue();
4153
28
  }
4154
28
4155
28
  SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4156
28
  if (
OpSizeInBits > 1628
) {
4157
28
    SDLoc DL(N);
4158
28
    Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4159
28
                      DAG.getConstant(OpSizeInBits - 16, DL,
4160
28
                                      getShiftAmountTy(VT)));
4161
28
  }
4162
260k
  return Res;
4163
260k
}
4164
4165
/// Return true if the specified node is an element that makes up a 32-bit
4166
/// packed halfword byteswap.
4167
/// ((x & 0x000000ff) << 8) |
4168
/// ((x & 0x0000ff00) >> 8) |
4169
/// ((x & 0x00ff0000) << 8) |
4170
/// ((x & 0xff000000) >> 8)
4171
5.52k
static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
4172
5.52k
  if (!N.getNode()->hasOneUse())
4173
575
    return false;
4174
4.95k
4175
4.95k
  unsigned Opc = N.getOpcode();
4176
4.95k
  if (
Opc != ISD::AND && 4.95k
Opc != ISD::SHL4.46k
&&
Opc != ISD::SRL4.12k
)
4177
3.88k
    return false;
4178
1.07k
4179
1.07k
  SDValue N0 = N.getOperand(0);
4180
1.07k
  unsigned Opc0 = N0.getOpcode();
4181
1.07k
  if (
Opc0 != ISD::AND && 1.07k
Opc0 != ISD::SHL1.05k
&&
Opc0 != ISD::SRL952
)
4182
733
    return false;
4183
338
4184
338
  ConstantSDNode *N1C = nullptr;
4185
338
  // SHL or SRL: look upstream for AND mask operand
4186
338
  if (Opc == ISD::AND)
4187
319
    N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4188
19
  else 
if (19
Opc0 == ISD::AND19
)
4189
19
    N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4190
338
  if (!N1C)
4191
75
    return false;
4192
263
4193
263
  unsigned MaskByteOffset;
4194
263
  switch (N1C->getZExtValue()) {
4195
207
  default:
4196
207
    return false;
4197
10
  case 0xFF:       MaskByteOffset = 0; break;
4198
18
  case 0xFF00:     MaskByteOffset = 1; break;
4199
20
  case 0xFF0000:   MaskByteOffset = 2; break;
4200
8
  case 0xFF000000: MaskByteOffset = 3; break;
4201
56
  }
4202
56
4203
56
  // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4204
56
  
if (56
Opc == ISD::AND56
) {
4205
42
    if (
MaskByteOffset == 0 || 42
MaskByteOffset == 236
) {
4206
22
      // (x >> 8) & 0xff
4207
22
      // (x >> 8) & 0xff0000
4208
22
      if (Opc0 != ISD::SRL)
4209
10
        return false;
4210
12
      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4211
12
      if (
!C || 12
C->getZExtValue() != 812
)
4212
0
        return false;
4213
20
    } else {
4214
20
      // (x << 8) & 0xff00
4215
20
      // (x << 8) & 0xff000000
4216
20
      if (Opc0 != ISD::SHL)
4217
8
        return false;
4218
12
      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4219
12
      if (
!C || 12
C->getZExtValue() != 812
)
4220
0
        return false;
4221
56
    }
4222
14
  } else 
if (14
Opc == ISD::SHL14
) {
4223
8
    // (x & 0xff) << 8
4224
8
    // (x & 0xff0000) << 8
4225
8
    if (
MaskByteOffset != 0 && 8
MaskByteOffset != 24
)
4226
0
      return false;
4227
8
    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4228
8
    if (
!C || 8
C->getZExtValue() != 88
)
4229
0
      return false;
4230
6
  } else { // Opc == ISD::SRL
4231
6
    // (x & 0xff00) >> 8
4232
6
    // (x & 0xff000000) >> 8
4233
6
    if (
MaskByteOffset != 1 && 6
MaskByteOffset != 32
)
4234
0
      return false;
4235
6
    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4236
6
    if (
!C || 6
C->getZExtValue() != 86
)
4237
0
      return false;
4238
38
  }
4239
38
4240
38
  
if (38
Parts[MaskByteOffset]38
)
4241
0
    return false;
4242
38
4243
38
  Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4244
38
  return true;
4245
38
}
4246
4247
/// Match a 32-bit packed halfword bswap. That is
4248
/// ((x & 0x000000ff) << 8) |
4249
/// ((x & 0x0000ff00) >> 8) |
4250
/// ((x & 0x00ff0000) << 8) |
4251
/// ((x & 0xff000000) >> 8)
4252
/// => (rotl (bswap x), 16)
4253
254k
SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4254
254k
  if (!LegalOperations)
4255
137k
    return SDValue();
4256
117k
4257
117k
  EVT VT = N->getValueType(0);
4258
117k
  if (VT != MVT::i32)
4259
49.6k
    return SDValue();
4260
67.5k
  
if (67.5k
!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)67.5k
)
4261
9.55k
    return SDValue();
4262
57.9k
4263
57.9k
  // Look for either
4264
57.9k
  // (or (or (and), (and)), (or (and), (and)))
4265
57.9k
  // (or (or (or (and), (and)), (and)), (and))
4266
57.9k
  
if (57.9k
N0.getOpcode() != ISD::OR57.9k
)
4267
52.4k
    return SDValue();
4268
5.49k
  SDValue N00 = N0.getOperand(0);
4269
5.49k
  SDValue N01 = N0.getOperand(1);
4270
5.49k
  SDNode *Parts[4] = {};
4271
5.49k
4272
5.49k
  if (N1.getOpcode() == ISD::OR &&
4273
5.49k
      
N00.getNumOperands() == 289
&&
N01.getNumOperands() == 288
) {
4274
86
    // (or (or (and), (and)), (or (and), (and)))
4275
86
    if (!isBSwapHWordElement(N00, Parts))
4276
80
      return SDValue();
4277
6
4278
6
    
if (6
!isBSwapHWordElement(N01, Parts)6
)
4279
0
      return SDValue();
4280
6
    SDValue N10 = N1.getOperand(0);
4281
6
    if (!isBSwapHWordElement(N10, Parts))
4282
0
      return SDValue();
4283
6
    SDValue N11 = N1.getOperand(1);
4284
6
    if (!isBSwapHWordElement(N11, Parts))
4285
2
      return SDValue();
4286
5.41k
  } else {
4287
5.41k
    // (or (or (or (and), (and)), (and)), (and))
4288
5.41k
    if (!isBSwapHWordElement(N1, Parts))
4289
5.40k
      return SDValue();
4290
4
    
if (4
!isBSwapHWordElement(N01, Parts)4
)
4291
0
      return SDValue();
4292
4
    
if (4
N00.getOpcode() != ISD::OR4
)
4293
0
      return SDValue();
4294
4
    SDValue N000 = N00.getOperand(0);
4295
4
    if (!isBSwapHWordElement(N000, Parts))
4296
0
      return SDValue();
4297
4
    SDValue N001 = N00.getOperand(1);
4298
4
    if (!isBSwapHWordElement(N001, Parts))
4299
0
      return SDValue();
4300
8
  }
4301
8
4302
8
  // Make sure the parts are all coming from the same node.
4303
8
  
if (8
Parts[0] != Parts[1] || 8
Parts[0] != Parts[2]8
||
Parts[0] != Parts[3]8
)
4304
0
    return SDValue();
4305
8
4306
8
  SDLoc DL(N);
4307
8
  SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4308
8
                              SDValue(Parts[0], 0));
4309
8
4310
8
  // Result of the bswap should be rotated by 16. If it's not legal, then
4311
8
  // do  (x << 16) | (x >> 16).
4312
8
  SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4313
8
  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4314
4
    return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4315
4
  
if (4
TLI.isOperationLegalOrCustom(ISD::ROTR, VT)4
)
4316
4
    return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4317
0
  return DAG.getNode(ISD::OR, DL, VT,
4318
0
                     DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4319
0
                     DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4320
0
}
4321
4322
/// This contains all DAGCombine rules which reduce two values combined by
4323
/// an Or operation to a single value \see visitANDLike().
4324
254k
SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4325
254k
  EVT VT = N1.getValueType();
4326
254k
  SDLoc DL(N);
4327
254k
4328
254k
  // fold (or x, undef) -> -1
4329
254k
  if (
!LegalOperations && 254k
(N0.isUndef() || 137k
N1.isUndef()137k
))
4330
11
    return DAG.getAllOnesConstant(DL, VT);
4331
254k
4332
254k
  
if (SDValue 254k
V254k
= foldLogicOfSetCCs(false, N0, N1, DL))
4333
159
    return V;
4334
254k
4335
254k
  // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
4336
254k
  
if (254k
N0.getOpcode() == ISD::AND && 254k
N1.getOpcode() == ISD::AND36.3k
&&
4337
254k
      // Don't increase # computations.
4338
254k
      
(N0.getNode()->hasOneUse() || 17.4k
N1.getNode()->hasOneUse()81
)) {
4339
17.4k
    // We can only do this xform if we know that bits from X that are set in C2
4340
17.4k
    // but not in C1 are already zero.  Likewise for Y.
4341
17.4k
    if (const ConstantSDNode *N0O1C =
4342
8.63k
        getAsNonOpaqueConstant(N0.getOperand(1))) {
4343
8.63k
      if (const ConstantSDNode *N1O1C =
4344
8.49k
          getAsNonOpaqueConstant(N1.getOperand(1))) {
4345
8.49k
        // We can only do this xform if we know that bits from X that are set in
4346
8.49k
        // C2 but not in C1 are already zero.  Likewise for Y.
4347
8.49k
        const APInt &LHSMask = N0O1C->getAPIntValue();
4348
8.49k
        const APInt &RHSMask = N1O1C->getAPIntValue();
4349
8.49k
4350
8.49k
        if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4351
8.49k
            
DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)389
) {
4352
41
          SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4353
41
                                  N0.getOperand(0), N1.getOperand(0));
4354
41
          return DAG.getNode(ISD::AND, DL, VT, X,
4355
41
                             DAG.getConstant(LHSMask | RHSMask, DL, VT));
4356
41
        }
4357
254k
      }
4358
8.63k
    }
4359
17.4k
  }
4360
254k
4361
254k
  // (or (and X, M), (and X, N)) -> (and X, (or M, N))
4362
254k
  
if (254k
N0.getOpcode() == ISD::AND &&
4363
36.2k
      N1.getOpcode() == ISD::AND &&
4364
17.4k
      N0.getOperand(0) == N1.getOperand(0) &&
4365
254k
      // Don't increase # computations.
4366
254k
      
(N0.getNode()->hasOneUse() || 88
N1.getNode()->hasOneUse()4
)) {
4367
86
    SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4368
86
                            N0.getOperand(1), N1.getOperand(1));
4369
86
    return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
4370
86
  }
4371
254k
4372
254k
  return SDValue();
4373
254k
}
4374
4375
255k
SDValue DAGCombiner::visitOR(SDNode *N) {
4376
255k
  SDValue N0 = N->getOperand(0);
4377
255k
  SDValue N1 = N->getOperand(1);
4378
255k
  EVT VT = N1.getValueType();
4379
255k
4380
255k
  // x | x --> x
4381
255k
  if (N0 == N1)
4382
9
    return N0;
4383
255k
4384
255k
  // fold vector ops
4385
255k
  
if (255k
VT.isVector()255k
) {
4386
22.1k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
4387
7
      return FoldedVOp;
4388
22.1k
4389
22.1k
    // fold (or x, 0) -> x, vector edition
4390
22.1k
    
if (22.1k
ISD::isBuildVectorAllZeros(N0.getNode())22.1k
)
4391
58
      return N1;
4392
22.1k
    
if (22.1k
ISD::isBuildVectorAllZeros(N1.getNode())22.1k
)
4393
66
      return N0;
4394
22.0k
4395
22.0k
    // fold (or x, -1) -> -1, vector edition
4396
22.0k
    
if (22.0k
ISD::isBuildVectorAllOnes(N0.getNode())22.0k
)
4397
22.0k
      // do not return N0, because undef node may exist in N0
4398
1
      return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
4399
22.0k
    
if (22.0k
ISD::isBuildVectorAllOnes(N1.getNode())22.0k
)
4400
22.0k
      // do not return N1, because undef node may exist in N1
4401
0
      return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
4402
22.0k
4403
22.0k
    // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
4404
22.0k
    // Do this only if the resulting shuffle is legal.
4405
22.0k
    
if (22.0k
isa<ShuffleVectorSDNode>(N0) &&
4406
203
        isa<ShuffleVectorSDNode>(N1) &&
4407
22.0k
        // Avoid folding a node with illegal type.
4408
22.0k
        
TLI.isTypeLegal(VT)109
) {
4409
108
      bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
4410
108
      bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
4411
108
      bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4412
108
      bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
4413
108
      // Ensure both shuffles have a zero input.
4414
108
      if (
(ZeroN00 != ZeroN01) && 108
(ZeroN10 != ZeroN11)61
) {
4415
61
        assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
4416
61
        assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
4417
61
        const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
4418
61
        const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
4419
61
        bool CanFold = true;
4420
61
        int NumElts = VT.getVectorNumElements();
4421
61
        SmallVector<int, 4> Mask(NumElts);
4422
61
4423
275
        for (int i = 0; 
i != NumElts275
;
++i214
) {
4424
219
          int M0 = SV0->getMaskElt(i);
4425
219
          int M1 = SV1->getMaskElt(i);
4426
219
4427
219
          // Determine if either index is pointing to a zero vector.
4428
218
          bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
4429
218
          bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
4430
219
4431
219
          // If one element is zero and the otherside is undef, keep undef.
4432
219
          // This also handles the case that both are undef.
4433
219
          if (
(M0Zero && 219
M1 < 089
) ||
(M1Zero && 218
M0 < 0129
)) {
4434
1
            Mask[i] = -1;
4435
1
            continue;
4436
1
          }
4437
218
4438
218
          // Make sure only one of the elements is zero.
4439
218
          
if (218
M0Zero == M1Zero218
) {
4440
5
            CanFold = false;
4441
5
            break;
4442
5
          }
4443
213
4444
218
          assert((M0 >= 0 || M1 >= 0) && "Undef index!");
4445
213
4446
213
          // We have a zero and non-zero element. If the non-zero came from
4447
213
          // SV0 make the index a LHS index. If it came from SV1, make it
4448
213
          // a RHS index. We need to mod by NumElts because we don't care
4449
213
          // which operand it came from in the original shuffles.
4450
213
          Mask[i] = M1Zero ? 
M0 % NumElts127
:
(M1 % NumElts) + NumElts86
;
4451
219
        }
4452
61
4453
61
        if (
CanFold61
) {
4454
56
          SDValue NewLHS = ZeroN00 ? 
N0.getOperand(1)2
:
N0.getOperand(0)54
;
4455
56
          SDValue NewRHS = ZeroN10 ? 
N1.getOperand(1)2
:
N1.getOperand(0)54
;
4456
56
4457
56
          bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4458
56
          if (
!LegalMask56
) {
4459
0
            std::swap(NewLHS, NewRHS);
4460
0
            ShuffleVectorSDNode::commuteMask(Mask);
4461
0
            LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4462
0
          }
4463
56
4464
56
          if (LegalMask)
4465
56
            return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
4466
255k
        }
4467
61
      }
4468
108
    }
4469
22.1k
  }
4470
255k
4471
255k
  // fold (or c1, c2) -> c1|c2
4472
255k
  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4473
255k
  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4474
255k
  if (
N0C && 255k
N1C895
&&
!N1C->isOpaque()124
)
4475
124
    return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
4476
255k
  // canonicalize constant to RHS
4477
255k
  
if (255k
DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4478
779
     !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4479
775
    return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
4480
254k
  // fold (or x, 0) -> x
4481
254k
  
if (254k
isNullConstant(N1)254k
)
4482
205
    return N0;
4483
254k
  // fold (or x, -1) -> -1
4484
254k
  
if (254k
isAllOnesConstant(N1)254k
)
4485
80
    return N1;
4486
254k
4487
254k
  
if (SDValue 254k
NewSel254k
= foldBinOpIntoSelect(N))
4488
41
    return NewSel;
4489
254k
4490
254k
  // fold (or x, c) -> c iff (x & ~c) == 0
4491
254k
  
if (254k
N1C && 254k
DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())88.4k
)
4492
2
    return N1;
4493
254k
4494
254k
  
if (SDValue 254k
Combined254k
= visitORLike(N0, N1, N))
4495
291
    return Combined;
4496
254k
4497
254k
  // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
4498
254k
  
if (SDValue 254k
BSwap254k
= MatchBSwapHWord(N, N0, N1))
4499
8
    return BSwap;
4500
254k
  
if (SDValue 254k
BSwap254k
= MatchBSwapHWordLow(N, N0, N1))
4501
17
    return BSwap;
4502
254k
4503
254k
  // reassociate or
4504
254k
  
if (SDValue 254k
ROR254k
= ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
4505
60
    return ROR;
4506
254k
4507
254k
  // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
4508
254k
  // iff (c1 & c2) != 0.
4509
254k
  
if (254k
N1C && 254k
N0.getOpcode() == ISD::AND88.3k
&&
N0.getNode()->hasOneUse()5.52k
) {
4510
4.09k
    if (ConstantSDNode *
C14.09k
= dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4511
4.05k
      if (
C1->getAPIntValue().intersects(N1C->getAPIntValue())4.05k
) {
4512
22
        if (SDValue COR =
4513
22
                DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1))
4514
22
          return DAG.getNode(
4515
22
              ISD::AND, SDLoc(N), VT,
4516
22
              DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
4517
0
        return SDValue();
4518
0
      }
4519
4.05k
    }
4520
4.09k
  }
4521
254k
4522
254k
  // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
4523
254k
  
if (254k
N0.getOpcode() == N1.getOpcode()254k
)
4524
75.6k
    
if (SDValue 75.6k
Tmp75.6k
= SimplifyBinOpWithSameOpcodeHands(N))
4525
903
      return Tmp;
4526
253k
4527
253k
  // See if this is some rotate idiom.
4528
253k
  
if (SDNode *253k
Rot253k
= MatchRotate(N0, N1, SDLoc(N)))
4529
1.96k
    return SDValue(Rot, 0);
4530
251k
4531
251k
  
if (SDValue 251k
Load251k
= MatchLoadCombine(N))
4532
756
    return Load;
4533
250k
4534
250k
  // Simplify the operands using demanded-bits information.
4535
250k
  
if (250k
SimplifyDemandedBits(SDValue(N, 0))250k
)
4536
4.48k
    return SDValue(N, 0);
4537
246k
4538
246k
  return SDValue();
4539
246k
}
4540
4541
/// Match "(X shl/srl V1) & V2" where V2 may not be present.
4542
258k
bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
4543
258k
  if (
Op.getOpcode() == ISD::AND258k
) {
4544
26.8k
    if (
DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))26.8k
) {
4545
24.3k
      Mask = Op.getOperand(1);
4546
24.3k
      Op = Op.getOperand(0);
4547
26.8k
    } else {
4548
2.44k
      return false;
4549
2.44k
    }
4550
256k
  }
4551
256k
4552
256k
  
if (256k
Op.getOpcode() == ISD::SRL || 256k
Op.getOpcode() == ISD::SHL240k
) {
4553
54.0k
    Shift = Op;
4554
54.0k
    return true;
4555
54.0k
  }
4556
201k
4557
201k
  return false;
4558
201k
}
4559
4560
// Return true if we can prove that, whenever Neg and Pos are both in the
4561
// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
4562
// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
4563
//
4564
//     (or (shift1 X, Neg), (shift2 X, Pos))
4565
//
4566
// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
4567
// in direction shift1 by Neg.  The range [0, EltSize) means that we only need
4568
// to consider shift amounts with defined behavior.
4569
708
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
4570
708
  // If EltSize is a power of 2 then:
4571
708
  //
4572
708
  //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
4573
708
  //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
4574
708
  //
4575
708
  // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
4576
708
  // for the stronger condition:
4577
708
  //
4578
708
  //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
4579
708
  //
4580
708
  // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
4581
708
  // we can just replace Neg with Neg' for the rest of the function.
4582
708
  //
4583
708
  // In other cases we check for the even stronger condition:
4584
708
  //
4585
708
  //     Neg == EltSize - Pos                                    [B]
4586
708
  //
4587
708
  // for all Neg and Pos.  Note that the (or ...) then invokes undefined
4588
708
  // behavior if Pos == 0 (and consequently Neg == EltSize).
4589
708
  //
4590
708
  // We could actually use [A] whenever EltSize is a power of 2, but the
4591
708
  // only extra cases that it would match are those uninteresting ones
4592
708
  // where Neg and Pos are never in range at the same time.  E.g. for
4593
708
  // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
4594
708
  // as well as (sub 32, Pos), but:
4595
708
  //
4596
708
  //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4597
708
  //
4598
708
  // always invokes undefined behavior for 32-bit X.
4599
708
  //
4600
708
  // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4601
708
  unsigned MaskLoBits = 0;
4602
708
  if (
Neg.getOpcode() == ISD::AND && 708
isPowerOf2_64(EltSize)42
) {
4603
42
    if (ConstantSDNode *
NegC42
= isConstOrConstSplat(Neg.getOperand(1))) {
4604
42
      if (
NegC->getAPIntValue() == EltSize - 142
) {
4605
40
        Neg = Neg.getOperand(0);
4606
40
        MaskLoBits = Log2_64(EltSize);
4607
40
      }
4608
42
    }
4609
42
  }
4610
708
4611
708
  // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4612
708
  if (Neg.getOpcode() != ISD::SUB)
4613
414
    return false;
4614
294
  ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
4615
294
  if (!NegC)
4616
0
    return false;
4617
294
  SDValue NegOp1 = Neg.getOperand(1);
4618
294
4619
294
  // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4620
294
  // Pos'.  The truncation is redundant for the purpose of the equality.
4621
294
  if (
MaskLoBits && 294
Pos.getOpcode() == ISD::AND22
)
4622
16
    
if (ConstantSDNode *16
PosC16
= isConstOrConstSplat(Pos.getOperand(1)))
4623
16
      
if (16
PosC->getAPIntValue() == EltSize - 116
)
4624
16
        Pos = Pos.getOperand(0);
4625
294
4626
294
  // The condition we need is now:
4627
294
  //
4628
294
  //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
4629
294
  //
4630
294
  // If NegOp1 == Pos then we need:
4631
294
  //
4632
294
  //              EltSize & Mask == NegC & Mask
4633
294
  //
4634
294
  // (because "x & Mask" is a truncation and distributes through subtraction).
4635
294
  APInt Width;
4636
294
  if (Pos == NegOp1)
4637
247
    Width = NegC->getAPIntValue();
4638
294
4639
294
  // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
4640
294
  // Then the condition we want to prove becomes:
4641
294
  //
4642
294
  //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
4643
294
  //
4644
294
  // which, again because "x & Mask" is a truncation, becomes:
4645
294
  //
4646
294
  //                NegC & Mask == (EltSize - PosC) & Mask
4647
294
  //             EltSize & Mask == (NegC + PosC) & Mask
4648
47
  else 
if (47
Pos.getOpcode() == ISD::ADD && 47
Pos.getOperand(0) == NegOp111
) {
4649
11
    if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4650
11
      Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4651
11
    else
4652
0
      return false;
4653
47
  } else
4654
36
    return false;
4655
258
4656
258
  // Now we just need to check that EltSize & Mask == Width & Mask.
4657
258
  
if (258
MaskLoBits258
)
4658
258
    // EltSize & Mask is 0 since Mask is EltSize - 1.
4659
22
    return Width.getLoBits(MaskLoBits) == 0;
4660
236
  return Width == EltSize;
4661
236
}
4662
4663
// A subroutine of MatchRotate used once we have found an OR of two opposite
4664
// shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
4665
// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4666
// former being preferred if supported.  InnerPos and InnerNeg are Pos and
4667
// Neg with outer conversions stripped away.
4668
SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
4669
                                       SDValue Neg, SDValue InnerPos,
4670
                                       SDValue InnerNeg, unsigned PosOpcode,
4671
708
                                       unsigned NegOpcode, const SDLoc &DL) {
4672
708
  // fold (or (shl x, (*ext y)),
4673
708
  //          (srl x, (*ext (sub 32, y)))) ->
4674
708
  //   (rotl x, y) or (rotr x, (sub 32, y))
4675
708
  //
4676
708
  // fold (or (shl x, (*ext (sub 32, y))),
4677
708
  //          (srl x, (*ext y))) ->
4678
708
  //   (rotr x, y) or (rotl x, (sub 32, y))
4679
708
  EVT VT = Shifted.getValueType();
4680
708
  if (
matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())708
) {
4681
238
    bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
4682
238
    return DAG.getNode(HasPos ? 
PosOpcode201
:
NegOpcode37
, DL, VT, Shifted,
4683
238
                       HasPos ? 
Pos201
:
Neg37
).getNode();
4684
238
  }
4685
470
4686
470
  return nullptr;
4687
470
}
4688
4689
// MatchRotate - Handle an 'or' of two operands.  If this is one of the many
4690
// idioms for rotate, and if the target supports rotation instructions, generate
4691
// a rot[lr].
4692
253k
SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
4693
253k
  // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
4694
253k
  EVT VT = LHS.getValueType();
4695
253k
  if (
!TLI.isTypeLegal(VT)253k
)
return nullptr9.63k
;
4696
243k
4697
243k
  // The target must have at least one rotate flavor.
4698
243k
  bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
4699
243k
  bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
4700
243k
  if (
!HasROTL && 243k
!HasROTR213k
)
return nullptr27.0k
;
4701
216k
4702
216k
  // Match "(X shl/srl V1) & V2" where V2 may not be present.
4703
216k
  SDValue LHSShift;   // The shift.
4704
216k
  SDValue LHSMask;    // AND value if any.
4705
216k
  if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
4706
174k
    return nullptr; // Not part of a rotate.
4707
41.8k
4708
41.8k
  SDValue RHSShift;   // The shift.
4709
41.8k
  SDValue RHSMask;    // AND value if any.
4710
41.8k
  if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
4711
29.7k
    return nullptr; // Not part of a rotate.
4712
12.1k
4713
12.1k
  
if (12.1k
LHSShift.getOperand(0) != RHSShift.getOperand(0)12.1k
)
4714
9.47k
    return nullptr;   // Not shifting the same value.
4715
2.66k
4716
2.66k
  
if (2.66k
LHSShift.getOpcode() == RHSShift.getOpcode()2.66k
)
4717
262
    return nullptr;   // Shifts must disagree.
4718
2.39k
4719
2.39k
  // Canonicalize shl to left side in a shl/srl pair.
4720
2.39k
  
if (2.39k
RHSShift.getOpcode() == ISD::SHL2.39k
) {
4721
963
    std::swap(LHS, RHS);
4722
963
    std::swap(LHSShift, RHSShift);
4723
963
    std::swap(LHSMask, RHSMask);
4724
963
  }
4725
2.39k
4726
2.39k
  unsigned EltSizeInBits = VT.getScalarSizeInBits();
4727
2.39k
  SDValue LHSShiftArg = LHSShift.getOperand(0);
4728
2.39k
  SDValue LHSShiftAmt = LHSShift.getOperand(1);
4729
2.39k
  SDValue RHSShiftArg = RHSShift.getOperand(0);
4730
2.39k
  SDValue RHSShiftAmt = RHSShift.getOperand(1);
4731
2.39k
4732
2.39k
  // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4733
2.39k
  // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4734
2.39k
  auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
4735
3.01k
                                        ConstantSDNode *RHS) {
4736
3.01k
    return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
4737
3.01k
  };
4738
2.39k
  if (
matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)2.39k
) {
4739
1.72k
    SDValue Rot = DAG.getNode(HasROTL ? 
ISD::ROTL308
:
ISD::ROTR1.42k
, DL, VT,
4740
1.72k
                              LHSShiftArg, HasROTL ? 
LHSShiftAmt308
:
RHSShiftAmt1.42k
);
4741
1.72k
4742
1.72k
    // If there is an AND of either shifted operand, apply it to the result.
4743
1.72k
    if (
LHSMask.getNode() || 1.72k
RHSMask.getNode()1.68k
) {
4744
47
      SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4745
47
      SDValue Mask = AllOnes;
4746
47
4747
47
      if (
LHSMask.getNode()47
) {
4748
47
        SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
4749
47
        Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4750
47
                           DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
4751
47
      }
4752
47
      if (
RHSMask.getNode()47
) {
4753
34
        SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
4754
34
        Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4755
34
                           DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
4756
34
      }
4757
47
4758
47
      Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4759
47
    }
4760
1.72k
4761
1.72k
    return Rot.getNode();
4762
1.72k
  }
4763
670
4764
670
  // If there is a mask here, and we have a variable shift, we can't be sure
4765
670
  // that we're masking out the right stuff.
4766
670
  
if (670
LHSMask.getNode() || 670
RHSMask.getNode()552
)
4767
253
    return nullptr;
4768
417
4769
417
  // If the shift amount is sign/zext/any-extended just peel it off.
4770
417
  SDValue LExtOp0 = LHSShiftAmt;
4771
417
  SDValue RExtOp0 = RHSShiftAmt;
4772
417
  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4773
417
       LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4774
385
       LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4775
385
       LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4776
87
      (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4777
87
       RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4778
55
       RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4779
417
       
RHSShiftAmt.getOpcode() == ISD::TRUNCATE55
)) {
4780
79
    LExtOp0 = LHSShiftAmt.getOperand(0);
4781
79
    RExtOp0 = RHSShiftAmt.getOperand(0);
4782
79
  }
4783
417
4784
417
  SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4785
417
                                   LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4786
417
  if (TryL)
4787
126
    return TryL;
4788
291
4789
291
  SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4790
291
                                   RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4791
291
  if (TryR)
4792
112
    return TryR;
4793
179
4794
179
  return nullptr;
4795
179
}
4796
4797
namespace {
4798
4799
/// Represents known origin of an individual byte in load combine pattern. The
4800
/// value of the byte is either constant zero or comes from memory.
4801
struct ByteProvider {
4802
  // For constant zero providers Load is set to nullptr. For memory providers
4803
  // Load represents the node which loads the byte from memory.
4804
  // ByteOffset is the offset of the byte in the value produced by the load.
4805
  LoadSDNode *Load = nullptr;
4806
  unsigned ByteOffset = 0;
4807
4808
  ByteProvider() = default;
4809
4810
65.5k
  static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
4811
65.5k
    return ByteProvider(Load, ByteOffset);
4812
65.5k
  }
4813
4814
56.6k
  static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
4815
4816
74.7k
  bool isConstantZero() const { return !Load; }
4817
31.9k
  bool isMemory() const { return Load; }
4818
4819
0
  bool operator==(const ByteProvider &Other) const {
4820
0
    return Other.Load == Load && Other.ByteOffset == ByteOffset;
4821
0
  }
4822
4823
private:
4824
  ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
4825
122k
      : Load(Load), ByteOffset(ByteOffset) {}
4826
};
4827
4828
} // end anonymous namespace
4829
4830
/// Recursively traverses the expression calculating the origin of the requested
4831
/// byte of the given value. Returns None if the provider can't be calculated.
4832
///
4833
/// For all the values except the root of the expression verifies that the value
4834
/// has exactly one use and if it's not true return None. This way if the origin
4835
/// of the byte is returned it's guaranteed that the values which contribute to
4836
/// the byte are not used outside of this expression.
4837
///
4838
/// Because the parts of the expression are not allowed to have more than one
4839
/// use this function iterates over trees, not DAGs. So it never visits the same
4840
/// node more than once.
4841
static const Optional<ByteProvider>
4842
calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
4843
648k
                      bool Root = false) {
4844
648k
  // Typical i64 by i8 pattern requires recursion up to 8 calls depth
4845
648k
  if (Depth == 10)
4846
839
    return None;
4847
647k
4848
647k
  
if (647k
!Root && 647k
!Op.hasOneUse()412k
)
4849
72.6k
    return None;
4850
575k
4851
647k
  assert(Op.getValueType().isScalarInteger() && "can't handle other types");
4852
575k
  unsigned BitWidth = Op.getValueSizeInBits();
4853
575k
  if (BitWidth % 8 != 0)
4854
0
    return None;
4855
575k
  unsigned ByteWidth = BitWidth / 8;
4856
575k
  assert(Index < ByteWidth && "invalid index requested");
4857
575k
  (void) ByteWidth;
4858
575k
4859
575k
  switch (Op.getOpcode()) {
4860
275k
  case ISD::OR: {
4861
275k
    auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
4862
275k
    if (!LHS)
4863
187k
      return None;
4864
88.6k
    auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
4865
88.6k
    if (!RHS)
4866
38.7k
      return None;
4867
49.8k
4868
49.8k
    
if (49.8k
LHS->isConstantZero()49.8k
)
4869
25.0k
      return RHS;
4870
24.8k
    
if (24.8k
RHS->isConstantZero()24.8k
)
4871
23.3k
      return LHS;
4872
1.55k
    return None;
4873
1.55k
  }
4874
76.2k
  case ISD::SHL: {
4875
76.2k
    auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
4876
76.2k
    if (!ShiftOp)
4877
4.02k
      return None;
4878
72.2k
4879
72.2k
    uint64_t BitShift = ShiftOp->getZExtValue();
4880
72.2k
    if (BitShift % 8 != 0)
4881
16.5k
      return None;
4882
55.6k
    uint64_t ByteShift = BitShift / 8;
4883
55.6k
4884
55.6k
    return Index < ByteShift
4885
30.6k
               ? ByteProvider::getConstantZero()
4886
24.9k
               : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
4887
24.9k
                                       Depth + 1);
4888
55.6k
  }
4889
35.6k
  case ISD::ANY_EXTEND:
4890
35.6k
  case ISD::SIGN_EXTEND:
4891
35.6k
  case ISD::ZERO_EXTEND: {
4892
35.6k
    SDValue NarrowOp = Op->getOperand(0);
4893
35.6k
    unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
4894
35.6k
    if (NarrowBitWidth % 8 != 0)
4895
240
      return None;
4896
35.4k
    uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4897
35.4k
4898
35.4k
    if (Index >= NarrowByteWidth)
4899
11.5k
      return Op.getOpcode() == ISD::ZERO_EXTEND
4900
11.5k
                 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4901
16
                 : None;
4902
23.9k
    return calculateByteProvider(NarrowOp, Index, Depth + 1);
4903
23.9k
  }
4904
50
  case ISD::BSWAP:
4905
50
    return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
4906
50
                                 Depth + 1);
4907
81.1k
  case ISD::LOAD: {
4908
81.1k
    auto L = cast<LoadSDNode>(Op.getNode());
4909
81.1k
    if (
L->isVolatile() || 81.1k
L->isIndexed()81.1k
)
4910
328
      return None;
4911
80.8k
4912
80.8k
    unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
4913
80.8k
    if (NarrowBitWidth % 8 != 0)
4914
124
      return None;
4915
80.7k
    uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4916
80.7k
4917
80.7k
    if (Index >= NarrowByteWidth)
4918
15.2k
      return L->getExtensionType() == ISD::ZEXTLOAD
4919
14.4k
                 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4920
744
                 : None;
4921
65.5k
    return ByteProvider::getMemory(L, Index);
4922
65.5k
  }
4923
106k
  }
4924
106k
4925
106k
  return None;
4926
106k
}
4927
4928
/// Match a pattern where a wide type scalar value is loaded by several narrow
4929
/// loads and combined by shifts and ors. Fold it into a single load or a load
4930
/// and a BSWAP if the targets supports it.
4931
///
4932
/// Assuming little endian target:
4933
///  i8 *a = ...
4934
///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4935
/// =>
4936
///  i32 val = *((i32)a)
4937
///
4938
///  i8 *a = ...
4939
///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4940
/// =>
4941
///  i32 val = BSWAP(*((i32)a))
4942
///
4943
/// TODO: This rule matches complex patterns with OR node roots and doesn't
4944
/// interact well with the worklist mechanism. When a part of the pattern is
4945
/// updated (e.g. one of the loads) its direct users are put into the worklist,
4946
/// but the root node of the pattern which triggers the load combine is not
4947
/// necessarily a direct user of the changed node. For example, once the address
4948
/// of t28 load is reassociated load combine won't be triggered:
4949
///             t25: i32 = add t4, Constant:i32<2>
4950
///           t26: i64 = sign_extend t25
4951
///        t27: i64 = add t2, t26
4952
///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
4953
///     t29: i32 = zero_extend t28
4954
///   t32: i32 = shl t29, Constant:i8<8>
4955
/// t33: i32 = or t23, t32
4956
/// As a possible fix visitLoad can check if the load can be a part of a load
4957
/// combine pattern and add corresponding OR roots to the worklist.
4958
251k
SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
4959
251k
  assert(N->getOpcode() == ISD::OR &&
4960
251k
         "Can only match load combining against OR nodes");
4961
251k
4962
251k
  // Handles simple types only
4963
251k
  EVT VT = N->getValueType(0);
4964
251k
  if (
VT != MVT::i16 && 251k
VT != MVT::i32247k
&&
VT != MVT::i64101k
)
4965
27.0k
    return SDValue();
4966
224k
  unsigned ByteWidth = VT.getSizeInBits() / 8;
4967
224k
4968
224k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4969
224k
  // Before legalize we can introduce too wide illegal loads which will be later
4970
224k
  // split into legal sized loads. This enables us to combine i64 load by i8
4971
224k
  // patterns to a couple of i32 loads on 32 bit targets.
4972
224k
  if (
LegalOperations && 224k
!TLI.isOperationLegal(ISD::LOAD, VT)99.2k
)
4973
12.6k
    return SDValue();
4974
211k
4975
211k
  std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
4976
34.2k
    unsigned BW, unsigned i) { return i; };
4977
211k
  std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
4978
8.39k
    unsigned BW, unsigned i) { return BW - i - 1; };
4979
211k
4980
211k
  bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
4981
27.0k
  auto MemoryByteOffset = [&] (ByteProvider P) {
4982
27.0k
    assert(P.isMemory() && "Must be a memory byte provider");
4983
27.0k
    unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
4984
27.0k
    assert(LoadBitWidth % 8 == 0 &&
4985
27.0k
           "can only analyze providers for individual bytes not bit");
4986
27.0k
    unsigned LoadByteWidth = LoadBitWidth / 8;
4987
27.0k
    return IsBigEndianTarget
4988
606
            ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
4989
26.4k
            : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
4990
27.0k
  };
4991
211k
4992
211k
  Optional<BaseIndexOffset> Base;
4993
211k
  SDValue Chain;
4994
211k
4995
211k
  SmallSet<LoadSDNode *, 8> Loads;
4996
211k
  Optional<ByteProvider> FirstByteProvider;
4997
211k
  int64_t FirstOffset = INT64_MAX;
4998
211k
4999
211k
  // Check if all the bytes of the OR we are looking at are loaded from the same
5000
211k
  // base address. Collect bytes offsets from Base address in ByteOffsets.
5001
211k
  SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
5002
236k
  for (unsigned i = 0; 
i < ByteWidth236k
;
i++25.3k
) {
5003
235k
    auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
5004
235k
    if (
!P || 235k
!P->isMemory()31.9k
) // All the bytes must be loaded from memory
5005
206k
      return SDValue();
5006
28.5k
5007
28.5k
    LoadSDNode *L = P->Load;
5008
28.5k
    assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
5009
28.5k
           "Must be enforced by calculateByteProvider");
5010
28.5k
    assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
5011
28.5k
5012
28.5k
    // All loads must share the same chain
5013
28.5k
    SDValue LChain = L->getChain();
5014
28.5k
    if (!Chain)
5015
9.42k
      Chain = LChain;
5016
19.1k
    else 
if (19.1k
Chain != LChain19.1k
)
5017
1.95k
      return SDValue();
5018
26.6k
5019
26.6k
    // Loads must share the same base address
5020
26.6k
    BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
5021
26.6k
    int64_t ByteOffsetFromBase = 0;
5022
26.6k
    if (!Base)
5023
9.42k
      Base = Ptr;
5024
17.1k
    else 
if (17.1k
!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)17.1k
)
5025
1.32k
      return SDValue();
5026
25.3k
5027
25.3k
    // Calculate the offset of the current byte from the base address
5028
25.3k
    ByteOffsetFromBase += MemoryByteOffset(*P);
5029
25.3k
    ByteOffsets[i] = ByteOffsetFromBase;
5030
25.3k
5031
25.3k
    // Remember the first byte load
5032
25.3k
    if (
ByteOffsetFromBase < FirstOffset25.3k
) {
5033
12.3k
      FirstByteProvider = P;
5034
12.3k
      FirstOffset = ByteOffsetFromBase;
5035
12.3k
    }
5036
235k
5037
235k
    Loads.insert(L);
5038
235k
  }
5039
1.78k
  assert(!Loads.empty() && "All the bytes of the value must be loaded from "
5040
1.78k
         "memory, so there must be at least one load which produces the value");
5041
1.78k
  assert(Base && "Base address of the accessed memory location must be set");
5042
1.78k
  assert(FirstOffset != INT64_MAX && "First byte offset must be set");
5043
1.78k
5044
1.78k
  // Check if the bytes of the OR we are looking at match with either big or
5045
1.78k
  // little endian value load
5046
1.78k
  bool BigEndian = true, LittleEndian = true;
5047
9.55k
  for (unsigned i = 0; 
i < ByteWidth9.55k
;
i++7.76k
) {
5048
7.78k
    int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
5049
7.78k
    LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
5050
7.78k
    BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
5051
7.78k
    if (
!BigEndian && 7.78k
!LittleEndian6.30k
)
5052
22
      return SDValue();
5053
7.78k
  }
5054
1.76k
  assert((BigEndian != LittleEndian) && "should be either or");
5055
1.76k
  assert(FirstByteProvider && "must be set");
5056
1.76k
5057
1.76k
  // Ensure that the first byte is loaded from zero offset of the first load.
5058
1.76k
  // So the combined value can be loaded from the first load address.
5059
1.76k
  if (MemoryByteOffset(*FirstByteProvider) != 0)
5060
6
    return SDValue();
5061
1.75k
  LoadSDNode *FirstLoad = FirstByteProvider->Load;
5062
1.75k
5063
1.75k
  // The node we are looking at matches with the pattern, check if we can
5064
1.75k
  // replace it with a single load and bswap if needed.
5065
1.75k
5066
1.75k
  // If the load needs byte swap check if the target supports it
5067
1.75k
  bool NeedsBswap = IsBigEndianTarget != BigEndian;
5068
1.75k
5069
1.75k
  // Before legalize we can introduce illegal bswaps which will be later
5070
1.75k
  // converted to an explicit bswap sequence. This way we end up with a single
5071
1.75k
  // load and byte shuffling instead of several loads and byte shuffling.
5072
1.75k
  if (
NeedsBswap && 1.75k
LegalOperations322
&&
!TLI.isOperationLegal(ISD::BSWAP, VT)4
)
5073
0
    return SDValue();
5074
1.75k
5075
1.75k
  // Check that a load of the wide type is both allowed and fast on the target
5076
1.75k
  bool Fast = false;
5077
1.75k
  bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
5078
1.75k
                                        VT, FirstLoad->getAddressSpace(),
5079
1.75k
                                        FirstLoad->getAlignment(), &Fast);
5080
1.75k
  if (
!Allowed || 1.75k
!Fast880
)
5081
1.00k
    return SDValue();
5082
756
5083
756
  SDValue NewLoad =
5084
756
      DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
5085
756
                  FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
5086
756
5087
756
  // Transfer chain users from old loads to the new load.
5088
756
  for (LoadSDNode *L : Loads)
5089
2.97k
    DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
5090
756
5091
756
  return NeedsBswap ? 
DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad)322
:
NewLoad434
;
5092
251k
}
5093
5094
874k
SDValue DAGCombiner::visitXOR(SDNode *N) {
5095
874k
  SDValue N0 = N->getOperand(0);
5096
874k
  SDValue N1 = N->getOperand(1);
5097
874k
  EVT VT = N0.getValueType();
5098
874k
5099
874k
  // fold vector ops
5100
874k
  if (
VT.isVector()874k
) {
5101
19.0k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
5102
1
      return FoldedVOp;
5103
19.0k
5104
19.0k
    // fold (xor x, 0) -> x, vector edition
5105
19.0k
    
if (19.0k
ISD::isBuildVectorAllZeros(N0.getNode())19.0k
)
5106
2
      return N1;
5107
19.0k
    
if (19.0k
ISD::isBuildVectorAllZeros(N1.getNode())19.0k
)
5108
7
      return N0;
5109
874k
  }
5110
874k
5111
874k
  // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
5112
874k
  
if (874k
N0.isUndef() && 874k
N1.isUndef()0
)
5113
0
    return DAG.getConstant(0, SDLoc(N), VT);
5114
874k
  // fold (xor x, undef) -> undef
5115
874k
  
if (874k
N0.isUndef()874k
)
5116
0
    return N0;
5117
874k
  
if (874k
N1.isUndef()874k
)
5118
0
    return N1;
5119
874k
  // fold (xor c1, c2) -> c1^c2
5120
874k
  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5121
874k
  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
5122
874k
  if (
N0C && 874k
N1C869
)
5123
868
    return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
5124
873k
  // canonicalize constant to RHS
5125
873k
  
if (873k
DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5126
10
     !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5127
9
    return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
5128
873k
  // fold (xor x, 0) -> x
5129
873k
  
if (873k
isNullConstant(N1)873k
)
5130
0
    return N0;
5131
873k
5132
873k
  
if (SDValue 873k
NewSel873k
= foldBinOpIntoSelect(N))
5133
5
    return NewSel;
5134
873k
5135
873k
  // reassociate xor
5136
873k
  
if (SDValue 873k
RXOR873k
= ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
5137
350
    return RXOR;
5138
872k
5139
872k
  // fold !(x cc y) -> (x !cc y)
5140
872k
  SDValue LHS, RHS, CC;
5141
872k
  if (
TLI.isConstTrueVal(N1.getNode()) && 872k
isSetCCEquivalent(N0, LHS, RHS, CC)774k
) {
5142
764k
    bool isInt = LHS.getValueType().isInteger();
5143
764k
    ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
5144
764k
                                               isInt);
5145
764k
5146
764k
    if (!LegalOperations ||
5147
764k
        
TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())7
) {
5148
764k
      switch (N0.getOpcode()) {
5149
0
      default:
5150
0
        llvm_unreachable("Unhandled SetCC Equivalent!");
5151
764k
      case ISD::SETCC:
5152
764k
        return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
5153
0
      case ISD::SELECT_CC:
5154
0
        return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
5155
0
                               N0.getOperand(3), NotCC);
5156
108k
      }
5157
108k
    }
5158
764k
  }
5159
108k
5160
108k
  // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
5161
108k
  
if (108k
isOneConstant(N1) && 108k
N0.getOpcode() == ISD::ZERO_EXTEND2.03k
&&
5162
16
      N0.getNode()->hasOneUse() &&
5163
108k
      
isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)7
){
5164
3
    SDValue V = N0.getOperand(0);
5165
3
    SDLoc DL(N0);
5166
3
    V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
5167
3
                    DAG.getConstant(1, DL, V.getValueType()));
5168
3
    AddToWorklist(V.getNode());
5169
3
    return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
5170
3
  }
5171
108k
5172
108k
  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
5173
108k
  
if (108k
isOneConstant(N1) && 108k
VT == MVT::i12.03k
&&
5174
108k
      
(N0.getOpcode() == ISD::OR || 392
N0.getOpcode() == ISD::AND326
)) {
5175
97
    SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5176
97
    if (
isOneUseSetCC(RHS) || 97
isOneUseSetCC(LHS)61
) {
5177
73
      unsigned NewOpcode = N0.getOpcode() == ISD::AND ? 
ISD::OR25
:
ISD::AND48
;
5178
73
      LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5179
73
      RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5180
73
      AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5181
73
      return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5182
73
    }
5183
107k
  }
5184
107k
  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
5185
107k
  
if (107k
isAllOnesConstant(N1) &&
5186
107k
      
(N0.getOpcode() == ISD::OR || 29.2k
N0.getOpcode() == ISD::AND28.6k
)) {
5187
818
    SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5188
818
    if (
isa<ConstantSDNode>(RHS) || 818
isa<ConstantSDNode>(LHS)710
) {
5189
108
      unsigned NewOpcode = N0.getOpcode() == ISD::AND ? 
ISD::OR104
:
ISD::AND4
;
5190
108
      LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5191
108
      RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5192
108
      AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5193
108
      return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5194
108
    }
5195
107k
  }
5196
107k
  // fold (xor (and x, y), y) -> (and (not x), y)
5197
107k
  
if (107k
N0.getOpcode() == ISD::AND && 107k
N0.getNode()->hasOneUse()3.02k
&&
5198
107k
      
N0->getOperand(1) == N12.72k
) {
5199
543
    SDValue X = N0->getOperand(0);
5200
543
    SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
5201
543
    AddToWorklist(NotX.getNode());
5202
543
    return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
5203
543
  }
5204
107k
  // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
5205
107k
  
if (107k
N1C && 107k
N0.getOpcode() == ISD::XOR42.9k
) {
5206
344
    if (const ConstantSDNode *
N00C344
= getAsNonOpaqueConstant(N0.getOperand(0))) {
5207
0
      SDLoc DL(N);
5208
0
      return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
5209
0
                         DAG.getConstant(N1C->getAPIntValue() ^
5210
0
                                         N00C->getAPIntValue(), DL, VT));
5211
0
    }
5212
344
    
if (const ConstantSDNode *344
N01C344
= getAsNonOpaqueConstant(N0.getOperand(1))) {
5213
0
      SDLoc DL(N);
5214
0
      return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
5215
0
                         DAG.getConstant(N1C->getAPIntValue() ^
5216
0
                                         N01C->getAPIntValue(), DL, VT));
5217
0
    }
5218
107k
  }
5219
107k
5220
107k
  // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
5221
107k
  unsigned OpSizeInBits = VT.getScalarSizeInBits();
5222
107k
  if (
N0.getOpcode() == ISD::ADD && 107k
N0.getOperand(1) == N117.4k
&&
5223
107k
      
N1.getOpcode() == ISD::SRA16.2k
&&
N1.getOperand(0) == N0.getOperand(0)15.5k
&&
5224
107k
      
TLI.isOperationLegalOrCustom(ISD::ABS, VT)15.4k
) {
5225
231
    if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
5226
229
      
if (229
C->getAPIntValue() == (OpSizeInBits - 1)229
)
5227
229
        return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
5228
107k
  }
5229
107k
5230
107k
  // fold (xor x, x) -> 0
5231
107k
  
if (107k
N0 == N1107k
)
5232
13
    return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
5233
107k
5234
107k
  // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
5235
107k
  // Here is a concrete example of this equivalence:
5236
107k
  // i16   x ==  14
5237
107k
  // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
5238
107k
  // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
5239
107k
  //
5240
107k
  // =>
5241
107k
  //
5242
107k
  // i16     ~1      == 0b1111111111111110
5243
107k
  // i16 rol(~1, 14) == 0b1011111111111111
5244
107k
  //
5245
107k
  // Some additional tips to help conceptualize this transform:
5246
107k
  // - Try to see the operation as placing a single zero in a value of all ones.
5247
107k
  // - There exists no value for x which would allow the result to contain zero.
5248
107k
  // - Values of x larger than the bitwidth are undefined and do not require a
5249
107k
  //   consistent result.
5250
107k
  // - Pushing the zero left requires shifting one bits in from the right.
5251
107k
  // A rotate left of ~1 is a nice way of achieving the desired result.
5252
107k
  
if (107k
TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && 107k
N0.getOpcode() == ISD::SHL13.4k
5253
107k
      && 
isAllOnesConstant(N1)363
&&
isOneConstant(N0.getOperand(0))198
) {
5254
185
    SDLoc DL(N);
5255
185
    return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
5256
185
                       N0.getOperand(1));
5257
185
  }
5258
106k
5259
106k
  // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
5260
106k
  
if (106k
N0.getOpcode() == N1.getOpcode()106k
)
5261
15.2k
    
if (SDValue 15.2k
Tmp15.2k
= SimplifyBinOpWithSameOpcodeHands(N))
5262
409
      return Tmp;
5263
106k
5264
106k
  // Simplify the expression using non-local knowledge.
5265
106k
  
if (106k
SimplifyDemandedBits(SDValue(N, 0))106k
)
5266
405
    return SDValue(N, 0);
5267
106k
5268
106k
  return SDValue();
5269
106k
}
5270
5271
/// Handle transforms common to the three shifts, when the shift amount is a
5272
/// constant.
5273
987k
SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
5274
987k
  SDNode *LHS = N->getOperand(0).getNode();
5275
987k
  if (
!LHS->hasOneUse()987k
)
return SDValue()499k
;
5276
488k
5277
488k
  // We want to pull some binops through shifts, so that we have (and (shift))
5278
488k
  // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
5279
488k
  // thing happens with address calculations, so it's important to canonicalize
5280
488k
  // it.
5281
488k
  bool HighBitSet = false;  // Can we transform this if the high bit is set?
5282
488k
5283
488k
  switch (LHS->getOpcode()) {
5284
440k
  default: return SDValue();
5285
3.05k
  case ISD::OR:
5286
3.05k
  case ISD::XOR:
5287
3.05k
    HighBitSet = false; // We can only transform sra if the high bit is clear.
5288
3.05k
    break;
5289
16.0k
  case ISD::AND:
5290
16.0k
    HighBitSet = true;  // We can only transform sra if the high bit is set.
5291
16.0k
    break;
5292
28.7k
  case ISD::ADD:
5293
28.7k
    if (N->getOpcode() != ISD::SHL)
5294
18.7k
      return SDValue(); // only shl(add) not sr[al](add).
5295
9.94k
    HighBitSet = false; // We can only transform sra if the high bit is clear.
5296
9.94k
    break;
5297
29.0k
  }
5298
29.0k
5299
29.0k
  // We require the RHS of the binop to be a constant and not opaque as well.
5300
29.0k
  ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
5301
29.0k
  if (
!BinOpCst29.0k
)
return SDValue()13.0k
;
5302
15.9k
5303
15.9k
  // FIXME: disable this unless the input to the binop is a shift by a constant
5304
15.9k
  // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
5305
15.9k
  SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
5306
15.9k
  bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
5307
15.7k
                 BinOpLHSVal->getOpcode() == ISD::SRA ||
5308
15.7k
                 BinOpLHSVal->getOpcode() == ISD::SRL;
5309
15.9k
  bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
5310
14.8k
                        BinOpLHSVal->getOpcode() == ISD::SELECT;
5311
15.9k
5312
15.9k
  if (
(!isShift || 15.9k
!isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))2.43k
) &&
5313
13.5k
      !isCopyOrSelect)
5314
12.3k
    return SDValue();
5315
3.58k
5316
3.58k
  
if (3.58k
isCopyOrSelect && 3.58k
N->hasOneUse()1.16k
)
5317
1.09k
    return SDValue();
5318
2.49k
5319
2.49k
  EVT VT = N->getValueType(0);
5320
2.49k
5321
2.49k
  // If this is a signed shift right, and the high bit is modified by the
5322
2.49k
  // logical operation, do not perform the transformation. The highBitSet
5323
2.49k
  // boolean indicates the value of the high bit of the constant which would
5324
2.49k
  // cause it to be modified for this operation.
5325
2.49k
  if (
N->getOpcode() == ISD::SRA2.49k
) {
5326
24
    bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
5327
24
    if (BinOpRHSSignSet != HighBitSet)
5328
22
      return SDValue();
5329
2.47k
  }
5330
2.47k
5331
2.47k
  
if (2.47k
!TLI.isDesirableToCommuteWithShift(LHS)2.47k
)
5332
1.64k
    return SDValue();
5333
822
5334
822
  // Fold the constants, shifting the binop RHS by the shift amount.
5335
822
  SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
5336
822
                               N->getValueType(0),
5337
822
                               LHS->getOperand(1), N->getOperand(1));
5338
822
  assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
5339
822
5340
822
  // Create the new shift.
5341
822
  SDValue NewShift = DAG.getNode(N->getOpcode(),
5342
822
                                 SDLoc(LHS->getOperand(0)),
5343
822
                                 VT, LHS->getOperand(0), N->getOperand(1));
5344
822
5345
822
  // Create the new binop.
5346
822
  return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
5347
822
}
5348
5349
676
SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
5350
676
  assert(N->getOpcode() == ISD::TRUNCATE);
5351
676
  assert(N->getOperand(0).getOpcode() == ISD::AND);
5352
676
5353
676
  // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
5354
676
  if (
N->hasOneUse() && 676
N->getOperand(0).hasOneUse()661
) {
5355
259
    SDValue N01 = N->getOperand(0).getOperand(1);
5356
259
    if (
isConstantOrConstantVector(N01, /* NoOpaques */ true)259
) {
5357
256
      SDLoc DL(N);
5358
256
      EVT TruncVT = N->getValueType(0);
5359
256
      SDValue N00 = N->getOperand(0).getOperand(0);
5360
256
      SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
5361
256
      SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
5362
256
      AddToWorklist(Trunc00.getNode());
5363
256
      AddToWorklist(Trunc01.getNode());
5364
256
      return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
5365
256
    }
5366
420
  }
5367
420
5368
420
  return SDValue();
5369
420
}
5370
5371
5.68k
SDValue DAGCombiner::visitRotate(SDNode *N) {
5372
5.68k
  SDLoc dl(N);
5373
5.68k
  SDValue N0 = N->getOperand(0);
5374
5.68k
  SDValue N1 = N->getOperand(1);
5375
5.68k
  EVT VT = N->getValueType(0);
5376
5.68k
  unsigned Bitsize = VT.getScalarSizeInBits();
5377
5.68k
5378
5.68k
  // fold (rot x, 0) -> x
5379
5.68k
  if (isNullConstantOrNullSplatConstant(N1))
5380
2
    return N0;
5381
5.68k
5382
5.68k
  // fold (rot x, c) -> (rot x, c % BitSize)
5383
5.68k
  
if (ConstantSDNode *5.68k
Cst5.68k
= isConstOrConstSplat(N1)) {
5384
4.17k
    if (
Cst->getAPIntValue().uge(Bitsize)4.17k
) {
5385
2
      uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
5386
2
      return DAG.getNode(N->getOpcode(), dl, VT, N0,
5387
2
                         DAG.getConstant(RotAmt, dl, N1.getValueType()));
5388
2
    }
5389
5.67k
  }
5390
5.67k
5391
5.67k
  // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
5392
5.67k
  
if (5.67k
N1.getOpcode() == ISD::TRUNCATE &&
5393
5.67k
      
N1.getOperand(0).getOpcode() == ISD::AND618
) {
5394
24
    if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5395
24
      return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
5396
5.65k
  }
5397
5.65k
5398
5.65k
  unsigned NextOp = N0.getOpcode();
5399
5.65k
  // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
5400
5.65k
  if (
NextOp == ISD::ROTL || 5.65k
NextOp == ISD::ROTR5.64k
) {
5401
11
    SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
5402
11
    SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
5403
11
    if (
C1 && 11
C211
&&
C1->getValueType(0) == C2->getValueType(0)11
) {
5404
11
      EVT ShiftVT = C1->getValueType(0);
5405
11
      bool SameSide = (N->getOpcode() == NextOp);
5406
11
      unsigned CombineOp = SameSide ? 
ISD::ADD11
:
ISD::SUB0
;
5407
11
      if (SDValue CombinedShift =
5408
11
              DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
5409
11
        SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
5410
11
        SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
5411
11
            ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
5412
11
            BitsizeC.getNode());
5413
11
        return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
5414
11
                           CombinedShiftNorm);
5415
11
      }
5416
5.64k
    }
5417
11
  }
5418
5.64k
  return SDValue();
5419
5.64k
}
5420
5421
740k
SDValue DAGCombiner::visitSHL(SDNode *N) {
5422
740k
  SDValue N0 = N->getOperand(0);
5423
740k
  SDValue N1 = N->getOperand(1);
5424
740k
  EVT VT = N0.getValueType();
5425
740k
  unsigned OpSizeInBits = VT.getScalarSizeInBits();
5426
740k
5427
740k
  // fold vector ops
5428
740k
  if (
VT.isVector()740k
) {
5429
4.56k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
5430
4
      return FoldedVOp;
5431
4.55k
5432
4.55k
    BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
5433
4.55k
    // If setcc produces all-one true value then:
5434
4.55k
    // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
5435
4.55k
    if (
N1CV && 4.55k
N1CV->isConstant()3.11k
) {
5436
2.92k
      if (
N0.getOpcode() == ISD::AND2.92k
) {
5437
26
        SDValue N00 = N0->getOperand(0);
5438
26
        SDValue N01 = N0->getOperand(1);
5439
26
        BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
5440
26
5441
26
        if (
N01CV && 26
N01CV->isConstant()26
&&
N00.getOpcode() == ISD::SETCC26
&&
5442
12
            TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
5443
26
                TargetLowering::ZeroOrNegativeOneBooleanContent) {
5444
12
          if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
5445
12
                                                     N01CV, N1CV))
5446
12
            return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
5447
739k
        }
5448
26
      }
5449
2.92k
    }
5450
4.56k
  }
5451
739k
5452
739k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
5453
739k
5454
739k
  // fold (shl c1, c2) -> c1<<c2
5455
739k
  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5456
739k
  if (
N0C && 739k
N1C20.5k
&&
!N1C->isOpaque()3.67k
)
5457
3.67k
    return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
5458
736k
  // fold (shl 0, x) -> 0
5459
736k
  
if (736k
isNullConstantOrNullSplatConstant(N0)736k
)
5460
193
    return N0;
5461
736k
  // fold (shl x, c >= size(x)) -> undef
5462
736k
  // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5463
736k
  
auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) 736k
{
5464
690k
    return Val->getAPIntValue().uge(OpSizeInBits);
5465
690k
  };
5466
736k
  if (matchUnaryPredicate(N1, MatchShiftTooBig))
5467
43
    return DAG.getUNDEF(VT);
5468
736k
  // fold (shl x, 0) -> x
5469
736k
  
if (736k
N1C && 736k
N1C->isNullValue()689k
)
5470
414
    return N0;
5471
735k
  // fold (shl undef, x) -> 0
5472
735k
  
if (735k
N0.isUndef()735k
)
5473
28
    return DAG.getConstant(0, SDLoc(N), VT);
5474
735k
5475
735k
  
if (SDValue 735k
NewSel735k
= foldBinOpIntoSelect(N))
5476
56
    return NewSel;
5477
735k
5478
735k
  // if (shl x, c) is known to be zero, return 0
5479
735k
  
if (735k
DAG.MaskedValueIsZero(SDValue(N, 0),
5480
735k
                            APInt::getAllOnesValue(OpSizeInBits)))
5481
2.07k
    return DAG.getConstant(0, SDLoc(N), VT);
5482
733k
  // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
5483
733k
  
if (733k
N1.getOpcode() == ISD::TRUNCATE &&
5484
733k
      
N1.getOperand(0).getOpcode() == ISD::AND3.76k
) {
5485
568
    if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5486
163
      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
5487
733k
  }
5488
733k
5489
733k
  
if (733k
N1C && 733k
SimplifyDemandedBits(SDValue(N, 0))686k
)
5490
5.96k
    return SDValue(N, 0);
5491
727k
5492
727k
  // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
5493
727k
  
if (727k
N0.getOpcode() == ISD::SHL727k
) {
5494
886
    auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5495
460
                                          ConstantSDNode *RHS) {
5496
460
      APInt c1 = LHS->getAPIntValue();
5497
460
      APInt c2 = RHS->getAPIntValue();
5498
460
      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5499
460
      return (c1 + c2).uge(OpSizeInBits);
5500
460
    };
5501
886
    if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5502
4
      return DAG.getConstant(0, SDLoc(N), VT);
5503
882
5504
882
    auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5505
463
                                       ConstantSDNode *RHS) {
5506
463
      APInt c1 = LHS->getAPIntValue();
5507
463
      APInt c2 = RHS->getAPIntValue();
5508
463
      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5509
463
      return (c1 + c2).ult(OpSizeInBits);
5510
463
    };
5511
882
    if (
matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)882
) {
5512
448
      SDLoc DL(N);
5513
448
      EVT ShiftVT = N1.getValueType();
5514
448
      SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5515
448
      return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
5516
448
    }
5517
726k
  }
5518
726k
5519
726k
  // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
5520
726k
  // For this to be valid, the second form must not preserve any of the bits
5521
726k
  // that are shifted out by the inner shift in the first form.  This means
5522
726k
  // the outer shift size must be >= the number of bits added by the ext.
5523
726k
  // As a corollary, we don't care what kind of ext it is.
5524
726k
  
if (726k
N1C && 726k
(N0.getOpcode() == ISD::ZERO_EXTEND ||
5525
627k
              N0.getOpcode() == ISD::ANY_EXTEND ||
5526
680k
              N0.getOpcode() == ISD::SIGN_EXTEND) &&
5527
726k
      
N0.getOperand(0).getOpcode() == ISD::SHL137k
) {
5528
887
    SDValue N0Op0 = N0.getOperand(0);
5529
887
    if (ConstantSDNode *
N0Op0C1887
= isConstOrConstSplat(N0Op0.getOperand(1))) {
5530
807
      APInt c1 = N0Op0C1->getAPIntValue();
5531
807
      APInt c2 = N1C->getAPIntValue();
5532
807
      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5533
807
5534
807
      EVT InnerShiftVT = N0Op0.getValueType();
5535
807
      uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5536
807
      if (
c2.uge(OpSizeInBits - InnerShiftSize)807
) {
5537
21
        SDLoc DL(N0);
5538
21
        APInt Sum = c1 + c2;
5539
21
        if (Sum.uge(OpSizeInBits))
5540
4
          return DAG.getConstant(0, DL, VT);
5541
17
5542
17
        return DAG.getNode(
5543
17
            ISD::SHL, DL, VT,
5544
17
            DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
5545
17
            DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5546
17
      }
5547
807
    }
5548
887
  }
5549
726k
5550
726k
  // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
5551
726k
  // Only fold this if the inner zext has no other uses to avoid increasing
5552
726k
  // the total number of instructions.
5553
726k
  
if (726k
N1C && 726k
N0.getOpcode() == ISD::ZERO_EXTEND680k
&&
N0.hasOneUse()52.6k
&&
5554
726k
      
N0.getOperand(0).getOpcode() == ISD::SRL39.3k
) {
5555
1.37k
    SDValue N0Op0 = N0.getOperand(0);
5556
1.37k
    if (ConstantSDNode *
N0Op0C11.37k
= isConstOrConstSplat(N0Op0.getOperand(1))) {
5557
1.19k
      if (
N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())1.19k
) {
5558
1.19k
        uint64_t c1 = N0Op0C1->getZExtValue();
5559
1.19k
        uint64_t c2 = N1C->getZExtValue();
5560
1.19k
        if (
c1 == c21.19k
) {
5561
50
          SDValue NewOp0 = N0.getOperand(0);
5562
50
          EVT CountVT = NewOp0.getOperand(1).getValueType();
5563
50
          SDLoc DL(N);
5564
50
          SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
5565
50
                                       NewOp0,
5566
50
                                       DAG.getConstant(c2, DL, CountVT));
5567
50
          AddToWorklist(NewSHL.getNode());
5568
50
          return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
5569
50
        }
5570
726k
      }
5571
1.19k
    }
5572
1.37k
  }
5573
726k
5574
726k
  // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
5575
726k
  // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
5576
726k
  
if (726k
N1C && 726k
(N0.getOpcode() == ISD::SRL || 680k
N0.getOpcode() == ISD::SRA677k
) &&
5577
726k
      
N0->getFlags().hasExact()5.21k
) {
5578
661
    if (ConstantSDNode *
N0C1661
= isConstOrConstSplat(N0.getOperand(1))) {
5579
653
      uint64_t C1 = N0C1->getZExtValue();
5580
653
      uint64_t C2 = N1C->getZExtValue();
5581
653
      SDLoc DL(N);
5582
653
      if (C1 <= C2)
5583
56
        return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5584
56
                           DAG.getConstant(C2 - C1, DL, N1.getValueType()));
5585
597
      return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
5586
597
                         DAG.getConstant(C1 - C2, DL, N1.getValueType()));
5587
597
    }
5588
661
  }
5589
726k
5590
726k
  // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
5591
726k
  //                               (and (srl x, (sub c1, c2), MASK)
5592
726k
  // Only fold this if the inner shift has no other uses -- if it does, folding
5593
726k
  // this will increase the total number of instructions.
5594
726k
  
if (726k
N1C && 726k
N0.getOpcode() == ISD::SRL679k
&&
N0.hasOneUse()3.07k
) {
5595
2.06k
    if (ConstantSDNode *
N0C12.06k
= isConstOrConstSplat(N0.getOperand(1))) {
5596
1.99k
      uint64_t c1 = N0C1->getZExtValue();
5597
1.99k
      if (
c1 < OpSizeInBits1.99k
) {
5598
1.99k
        uint64_t c2 = N1C->getZExtValue();
5599
1.99k
        APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
5600
1.99k
        SDValue Shift;
5601
1.99k
        if (
c2 > c11.99k
) {
5602
236
          Mask <<= c2 - c1;
5603
236
          SDLoc DL(N);
5604
236
          Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5605
236
                              DAG.getConstant(c2 - c1, DL, N1.getValueType()));
5606
1.99k
        } else {
5607
1.76k
          Mask.lshrInPlace(c1 - c2);
5608
1.76k
          SDLoc DL(N);
5609
1.76k
          Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
5610
1.76k
                              DAG.getConstant(c1 - c2, DL, N1.getValueType()));
5611
1.76k
        }
5612
1.99k
        SDLoc DL(N0);
5613
1.99k
        return DAG.getNode(ISD::AND, DL, VT, Shift,
5614
1.99k
                           DAG.getConstant(Mask, DL, VT));
5615
1.99k
      }
5616
724k
    }
5617
2.06k
  }
5618
724k
5619
724k
  // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
5620
724k
  
if (724k
N0.getOpcode() == ISD::SRA && 724k
N1 == N0.getOperand(1)1.51k
&&
5621
724k
      
isConstantOrConstantVector(N1, /* No Opaques */ true)175
) {
5622
163
    SDLoc DL(N);
5623
163
    SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
5624
163
    SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
5625
163
    return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
5626
163
  }
5627
724k
5628
724k
  // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
5629
724k
  // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
5630
724k
  // Variant of version done on multiply, except mul by a power of 2 is turned
5631
724k
  // into a shift.
5632
724k
  
if (724k
(N0.getOpcode() == ISD::ADD || 724k
N0.getOpcode() == ISD::OR696k
) &&
5633
31.3k
      N0.getNode()->hasOneUse() &&
5634
23.2k
      isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5635
724k
      
isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)22.7k
) {
5636
11.2k
    SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
5637
11.2k
    SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5638
11.2k
    AddToWorklist(Shl0.getNode());
5639
11.2k
    AddToWorklist(Shl1.getNode());
5640
11.2k
    return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
5641
11.2k
  }
5642
712k
5643
712k
  // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
5644
712k
  
if (712k
N0.getOpcode() == ISD::MUL && 712k
N0.getNode()->hasOneUse()2.27k
&&
5645
1.69k
      isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5646
712k
      
isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)1.12k
) {
5647
334
    SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5648
334
    if (isConstantOrConstantVector(Shl))
5649
334
      return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
5650
712k
  }
5651
712k
5652
712k
  
if (712k
N1C && 712k
!N1C->isOpaque()666k
)
5653
666k
    
if (SDValue 666k
NewSHL666k
= visitShiftByConstant(N, N1C))
5654
186
      return NewSHL;
5655
712k
5656
712k
  return SDValue();
5657
712k
}
5658
5659
86.4k
SDValue DAGCombiner::visitSRA(SDNode *N) {
5660
86.4k
  SDValue N0 = N->getOperand(0);
5661
86.4k
  SDValue N1 = N->getOperand(1);
5662
86.4k
  EVT VT = N0.getValueType();
5663
86.4k
  unsigned OpSizeInBits = VT.getScalarSizeInBits();
5664
86.4k
5665
86.4k
  // Arithmetic shifting an all-sign-bit value is a no-op.
5666
86.4k
  // fold (sra 0, x) -> 0
5667
86.4k
  // fold (sra -1, x) -> -1
5668
86.4k
  if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
5669
448
    return N0;
5670
86.0k
5671
86.0k
  // fold vector ops
5672
86.0k
  
if (86.0k
VT.isVector()86.0k
)
5673
3.27k
    
if (SDValue 3.27k
FoldedVOp3.27k
= SimplifyVBinOp(N))
5674
2
      return FoldedVOp;
5675
86.0k
5676
86.0k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
5677
86.0k
5678
86.0k
  // fold (sra c1, c2) -> (sra c1, c2)
5679
86.0k
  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5680
86.0k
  if (
N0C && 86.0k
N1C126
&&
!N1C->isOpaque()0
)
5681
0
    return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
5682
86.0k
  // fold (sra x, c >= size(x)) -> undef
5683
86.0k
  // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5684
86.0k
  
auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) 86.0k
{
5685
78.3k
    return Val->getAPIntValue().uge(OpSizeInBits);
5686
78.3k
  };
5687
86.0k
  if (matchUnaryPredicate(N1, MatchShiftTooBig))
5688
21
    return DAG.getUNDEF(VT);
5689
86.0k
  // fold (sra x, 0) -> x
5690
86.0k
  
if (86.0k
N1C && 86.0k
N1C->isNullValue()78.1k
)
5691
8
    return N0;
5692
86.0k
5693
86.0k
  
if (SDValue 86.0k
NewSel86.0k
= foldBinOpIntoSelect(N))
5694
2
    return NewSel;
5695
86.0k
5696
86.0k
  // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
5697
86.0k
  // sext_inreg.
5698
86.0k
  
if (86.0k
N1C && 86.0k
N0.getOpcode() == ISD::SHL78.1k
&&
N1 == N0.getOperand(1)15.4k
) {
5699
12.2k
    unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
5700
12.2k
    EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
5701
12.2k
    if (VT.isVector())
5702
126
      ExtVT = EVT::getVectorVT(*DAG.getContext(),
5703
126
                               ExtVT, VT.getVectorNumElements());
5704
12.2k
    if ((!LegalOperations ||
5705
1.73k
         TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
5706
10.4k
      return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
5707
10.4k
                         N0.getOperand(0), DAG.getValueType(ExtVT));
5708
75.5k
  }
5709
75.5k
5710
75.5k
  // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
5711
75.5k
  
if (75.5k
N0.getOpcode() == ISD::SRA75.5k
) {
5712
1.19k
    SDLoc DL(N);
5713
1.19k
    EVT ShiftVT = N1.getValueType();
5714
1.19k
5715
1.19k
    auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5716
947
                                          ConstantSDNode *RHS) {
5717
947
      APInt c1 = LHS->getAPIntValue();
5718
947
      APInt c2 = RHS->getAPIntValue();
5719
947
      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5720
947
      return (c1 + c2).uge(OpSizeInBits);
5721
947
    };
5722
1.19k
    if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5723
925
      return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
5724
925
                         DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
5725
268
5726
268
    auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5727
27
                                       ConstantSDNode *RHS) {
5728
27
      APInt c1 = LHS->getAPIntValue();
5729
27
      APInt c2 = RHS->getAPIntValue();
5730
27
      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5731
27
      return (c1 + c2).ult(OpSizeInBits);
5732
27
    };
5733
268
    if (
matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)268
) {
5734
10
      SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5735
10
      return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
5736
10
    }
5737
74.5k
  }
5738
74.5k
5739
74.5k
  // fold (sra (shl X, m), (sub result_size, n))
5740
74.5k
  // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
5741
74.5k
  // result_size - n != m.
5742
74.5k
  // If truncate is free for the target sext(shl) is likely to result in better
5743
74.5k
  // code.
5744
74.5k
  
if (74.5k
N0.getOpcode() == ISD::SHL && 74.5k
N1C5.21k
) {
5745
4.97k
    // Get the two constanst of the shifts, CN0 = m, CN = n.
5746
4.97k
    const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
5747
4.97k
    if (
N01C4.97k
) {
5748
4.84k
      LLVMContext &Ctx = *DAG.getContext();
5749
4.84k
      // Determine what the truncate's result bitsize and type would be.
5750
4.84k
      EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
5751
4.84k
5752
4.84k
      if (VT.isVector())
5753
43
        TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
5754
4.84k
5755
4.84k
      // Determine the residual right-shift amount.
5756
4.84k
      int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
5757
4.84k
5758
4.84k
      // If the shift is not a no-op (in which case this should be just a sign
5759
4.84k
      // extend already), the truncated to type is legal, sign_extend is legal
5760
4.84k
      // on that type, and the truncate to that type is both legal and free,
5761
4.84k
      // perform the transform.
5762
4.84k
      if ((ShiftAmt > 0) &&
5763
2.19k
          TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
5764
154
          TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
5765
4.84k
          
TLI.isTruncateFree(VT, TruncVT)154
) {
5766
84
        SDLoc DL(N);
5767
84
        SDValue Amt = DAG.getConstant(ShiftAmt, DL,
5768
84
            getShiftAmountTy(N0.getOperand(0).getValueType()));
5769
84
        SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
5770
84
                                    N0.getOperand(0), Amt);
5771
84
        SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
5772
84
                                    Shift);
5773
84
        return DAG.getNode(ISD::SIGN_EXTEND, DL,
5774
84
                           N->getValueType(0), Trunc);
5775
84
      }
5776
74.5k
    }
5777
4.97k
  }
5778
74.5k
5779
74.5k
  // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
5780
74.5k
  
if (74.5k
N1.getOpcode() == ISD::TRUNCATE &&
5781
74.5k
      
N1.getOperand(0).getOpcode() == ISD::AND348
) {
5782
15
    if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5783
13
      return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
5784
74.4k
  }
5785
74.4k
5786
74.4k
  // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
5787
74.4k
  //      if c1 is equal to the number of bits the trunc removes
5788
74.4k
  
if (74.4k
N0.getOpcode() == ISD::TRUNCATE &&
5789
6.25k
      (N0.getOperand(0).getOpcode() == ISD::SRL ||
5790
6.25k
       N0.getOperand(0).getOpcode() == ISD::SRA) &&
5791
3.26k
      N0.getOperand(0).hasOneUse() &&
5792
2.84k
      N0.getOperand(0).getOperand(1).hasOneUse() &&
5793
74.4k
      
N1C413
) {
5794
408
    SDValue N0Op0 = N0.getOperand(0);
5795
408
    if (ConstantSDNode *
LargeShift408
= isConstOrConstSplat(N0Op0.getOperand(1))) {
5796
408
      unsigned LargeShiftVal = LargeShift->getZExtValue();
5797
408
      EVT LargeVT = N0Op0.getValueType();
5798
408
5799
408
      if (
LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal408
) {
5800
401
        SDLoc DL(N);
5801
401
        SDValue Amt =
5802
401
          DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
5803
401
                          getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
5804
401
        SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
5805
401
                                  N0Op0.getOperand(0), Amt);
5806
401
        return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
5807
401
      }
5808
74.0k
    }
5809
408
  }
5810
74.0k
5811
74.0k
  // Simplify, based on bits shifted out of the LHS.
5812
74.0k
  
if (74.0k
N1C && 74.0k
SimplifyDemandedBits(SDValue(N, 0))66.2k
)
5813
1.18k
    return SDValue(N, 0);
5814
72.9k
5815
72.9k
  // If the sign bit is known to be zero, switch this to a SRL.
5816
72.9k
  
if (72.9k
DAG.SignBitIsZero(N0)72.9k
)
5817
117
    return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
5818
72.7k
5819
72.7k
  
if (72.7k
N1C && 72.7k
!N1C->isOpaque()65.0k
)
5820
65.0k
    
if (SDValue 65.0k
NewSRA65.0k
= visitShiftByConstant(N, N1C))
5821
2
      return NewSRA;
5822
72.7k
5823
72.7k
  return SDValue();
5824
72.7k
}
5825
5826
303k
SDValue DAGCombiner::visitSRL(SDNode *N) {
5827
303k
  SDValue N0 = N->getOperand(0);
5828
303k
  SDValue N1 = N->getOperand(1);
5829
303k
  EVT VT = N0.getValueType();
5830
303k
  unsigned OpSizeInBits = VT.getScalarSizeInBits();
5831
303k
5832
303k
  // fold vector ops
5833
303k
  if (VT.isVector())
5834
5.84k
    
if (SDValue 5.84k
FoldedVOp5.84k
= SimplifyVBinOp(N))
5835
2
      return FoldedVOp;
5836
303k
5837
303k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
5838
303k
5839
303k
  // fold (srl c1, c2) -> c1 >>u c2
5840
303k
  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5841
303k
  if (
N0C && 303k
N1C4.18k
&&
!N1C->isOpaque()2.51k
)
5842
2.51k
    return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
5843
301k
  // fold (srl 0, x) -> 0
5844
301k
  
if (301k
isNullConstantOrNullSplatConstant(N0)301k
)
5845
64
    return N0;
5846
301k
  // fold (srl x, c >= size(x)) -> undef
5847
301k
  // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5848
301k
  
auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) 301k
{
5849
278k
    return Val->getAPIntValue().uge(OpSizeInBits);
5850
278k
  };
5851
301k
  if (matchUnaryPredicate(N1, MatchShiftTooBig))
5852
20
    return DAG.getUNDEF(VT);
5853
301k
  // fold (srl x, 0) -> x
5854
301k
  
if (301k
N1C && 301k
N1C->isNullValue()278k
)
5855
1.73k
    return N0;
5856
299k
5857
299k
  
if (SDValue 299k
NewSel299k
= foldBinOpIntoSelect(N))
5858
3
    return NewSel;
5859
299k
5860
299k
  // if (srl x, c) is known to be zero, return 0
5861
299k
  
if (299k
N1C && 299k
DAG.MaskedValueIsZero(SDValue(N, 0),
5862
276k
                                   APInt::getAllOnesValue(OpSizeInBits)))
5863
342
    return DAG.getConstant(0, SDLoc(N), VT);
5864
299k
5865
299k
  // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
5866
299k
  
if (299k
N0.getOpcode() == ISD::SRL299k
) {
5867
5.43k
    auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5868
4.38k
                                          ConstantSDNode *RHS) {
5869
4.38k
      APInt c1 = LHS->getAPIntValue();
5870
4.38k
      APInt c2 = RHS->getAPIntValue();
5871
4.38k
      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5872
4.38k
      return (c1 + c2).uge(OpSizeInBits);
5873
4.38k
    };
5874
5.43k
    if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5875
4
      return DAG.getConstant(0, SDLoc(N), VT);
5876
5.43k
5877
5.43k
    auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5878
4.39k
                                       ConstantSDNode *RHS) {
5879
4.39k
      APInt c1 = LHS->getAPIntValue();
5880
4.39k
      APInt c2 = RHS->getAPIntValue();
5881
4.39k
      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5882
4.39k
      return (c1 + c2).ult(OpSizeInBits);
5883
4.39k
    };
5884
5.43k
    if (
matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)5.43k
) {
5885
4.37k
      SDLoc DL(N);
5886
4.37k
      EVT ShiftVT = N1.getValueType();
5887
4.37k
      SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5888
4.37k
      return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
5889
4.37k
    }
5890
294k
  }
5891
294k
5892
294k
  // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
5893
294k
  
if (294k
N1C && 294k
N0.getOpcode() == ISD::TRUNCATE271k
&&
5894
294k
      
N0.getOperand(0).getOpcode() == ISD::SRL16.0k
) {
5895
4.83k
    if (auto 
N001C4.83k
= isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
5896
4.83k
      uint64_t c1 = N001C->getZExtValue();
5897
4.83k
      uint64_t c2 = N1C->getZExtValue();
5898
4.83k
      EVT InnerShiftVT = N0.getOperand(0).getValueType();
5899
4.83k
      EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
5900
4.83k
      uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5901
4.83k
      // This is only valid if the OpSizeInBits + c1 = size of inner shift.
5902
4.83k
      if (
c1 + OpSizeInBits == InnerShiftSize4.83k
) {
5903
4.23k
        SDLoc DL(N0);
5904
4.23k
        if (c1 + c2 >= InnerShiftSize)
5905
0
          return DAG.getConstant(0, DL, VT);
5906
4.23k
        return DAG.getNode(ISD::TRUNCATE, DL, VT,
5907
4.23k
                           DAG.getNode(ISD::SRL, DL, InnerShiftVT,
5908
4.23k
                                       N0.getOperand(0).getOperand(0),
5909
4.23k
                                       DAG.getConstant(c1 + c2, DL,
5910
4.23k
                                                       ShiftCountVT)));
5911
4.23k
      }
5912
4.83k
    }
5913
4.83k
  }
5914
290k
5915
290k
  // fold (srl (shl x, c), c) -> (and x, cst2)
5916
290k
  
if (290k
N0.getOpcode() == ISD::SHL && 290k
N0.getOperand(1) == N11.01k
&&
5917
290k
      
isConstantOrConstantVector(N1, /* NoOpaques */ true)216
) {
5918
186
    SDLoc DL(N);
5919
186
    SDValue Mask =
5920
186
        DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
5921
186
    AddToWorklist(Mask.getNode());
5922
186
    return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
5923
186
  }
5924
290k
5925
290k
  // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
5926
290k
  
if (290k
N1C && 290k
N0.getOpcode() == ISD::ANY_EXTEND267k
) {
5927
1.23k
    // Shifting in all undef bits?
5928
1.23k
    EVT SmallVT = N0.getOperand(0).getValueType();
5929
1.23k
    unsigned BitSize = SmallVT.getScalarSizeInBits();
5930
1.23k
    if (N1C->getZExtValue() >= BitSize)
5931
0
      return DAG.getUNDEF(VT);
5932
1.23k
5933
1.23k
    
if (1.23k
!LegalTypes || 1.23k
TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)857
) {
5934
457
      uint64_t ShiftAmt = N1C->getZExtValue();
5935
457
      SDLoc DL0(N0);
5936
457
      SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
5937
457
                                       N0.getOperand(0),
5938
457
                          DAG.getConstant(ShiftAmt, DL0,
5939
457
                                          getShiftAmountTy(SmallVT)));
5940
457
      AddToWorklist(SmallShift.getNode());
5941
457
      APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
5942
457
      SDLoc DL(N);
5943
457
      return DAG.getNode(ISD::AND, DL, VT,
5944
457
                         DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
5945
457
                         DAG.getConstant(Mask, DL, VT));
5946
457
    }
5947
289k
  }
5948
289k
5949
289k
  // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
5950
289k
  // bit, which is unmodified by sra.
5951
289k
  
if (289k
N1C && 289k
N1C->getZExtValue() + 1 == OpSizeInBits266k
) {
5952
17.2k
    if (N0.getOpcode() == ISD::SRA)
5953
1.13k
      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
5954
288k
  }
5955
288k
5956
288k
  // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
5957
288k
  
if (288k
N1C && 288k
N0.getOpcode() == ISD::CTLZ265k
&&
5958
288k
      
N1C->getAPIntValue() == Log2_32(OpSizeInBits)49
) {
5959
49
    KnownBits Known;
5960
49
    DAG.computeKnownBits(N0.getOperand(0), Known);
5961
49
5962
49
    // If any of the input bits are KnownOne, then the input couldn't be all
5963
49
    // zeros, thus the result of the srl will always be zero.
5964
49
    if (
Known.One.getBoolValue()49
)
return DAG.getConstant(0, SDLoc(N0), VT)0
;
5965
49
5966
49
    // If all of the bits input the to ctlz node are known to be zero, then
5967
49
    // the result of the ctlz is "32" and the result of the shift is one.
5968
49
    APInt UnknownBits = ~Known.Zero;
5969
49
    if (
UnknownBits == 049
)
return DAG.getConstant(1, SDLoc(N0), VT)0
;
5970
49
5971
49
    // Otherwise, check to see if there is exactly one bit input to the ctlz.
5972
49
    
if (49
UnknownBits.isPowerOf2()49
) {
5973
2
      // Okay, we know that only that the single bit specified by UnknownBits
5974
2
      // could be set on input to the CTLZ node. If this bit is set, the SRL
5975
2
      // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
5976
2
      // to an SRL/XOR pair, which is likely to simplify more.
5977
2
      unsigned ShAmt = UnknownBits.countTrailingZeros();
5978
2
      SDValue Op = N0.getOperand(0);
5979
2
5980
2
      if (
ShAmt2
) {
5981
2
        SDLoc DL(N0);
5982
2
        Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5983
2
                  DAG.getConstant(ShAmt, DL,
5984
2
                                  getShiftAmountTy(Op.getValueType())));
5985
2
        AddToWorklist(Op.getNode());
5986
2
      }
5987
2
5988
2
      SDLoc DL(N);
5989
2
      return DAG.getNode(ISD::XOR, DL, VT,
5990
2
                         Op, DAG.getConstant(1, DL, VT));
5991
2
    }
5992
288k
  }
5993
288k
5994
288k
  // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
5995
288k
  
if (288k
N1.getOpcode() == ISD::TRUNCATE &&
5996
288k
      
N1.getOperand(0).getOpcode() == ISD::AND2.22k
) {
5997
69
    if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5998
56
      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
5999
288k
  }
6000
288k
6001
288k
  // fold operands of srl based on knowledge that the low bits are not
6002
288k
  // demanded.
6003
288k
  
if (288k
N1C && 288k
SimplifyDemandedBits(SDValue(N, 0))265k
)
6004
8.82k
    return SDValue(N, 0);
6005
279k
6006
279k
  
if (279k
N1C && 279k
!N1C->isOpaque()256k
)
6007
256k
    
if (SDValue 256k
NewSRL256k
= visitShiftByConstant(N, N1C))
6008
634
      return NewSRL;
6009
279k
6010
279k
  // Attempt to convert a srl of a load into a narrower zero-extending load.
6011
279k
  
if (SDValue 279k
NarrowLoad279k
= ReduceLoadWidth(N))
6012
143
    return NarrowLoad;
6013
278k
6014
278k
  // Here is a common situation. We want to optimize:
6015
278k
  //
6016
278k
  //   %a = ...
6017
278k
  //   %b = and i32 %a, 2
6018
278k
  //   %c = srl i32 %b, 1
6019
278k
  //   brcond i32 %c ...
6020
278k
  //
6021
278k
  // into
6022
278k
  //
6023
278k
  //   %a = ...
6024
278k
  //   %b = and %a, 2
6025
278k
  //   %c = setcc eq %b, 0
6026
278k
  //   brcond %c ...
6027
278k
  //
6028
278k
  // However when after the source operand of SRL is optimized into AND, the SRL
6029
278k
  // itself may not be optimized further. Look for it and add the BRCOND into
6030
278k
  // the worklist.
6031
278k
  
if (278k
N->hasOneUse()278k
) {
6032
248k
    SDNode *Use = *N->use_begin();
6033
248k
    if (Use->getOpcode() == ISD::BRCOND)
6034
336
      AddToWorklist(Use);
6035
248k
    else 
if (248k
Use->getOpcode() == ISD::TRUNCATE && 248k
Use->hasOneUse()94.0k
) {
6036
80.9k
      // Also look pass the truncate.
6037
80.9k
      Use = *Use->use_begin();
6038
80.9k
      if (Use->getOpcode() == ISD::BRCOND)
6039
57
        AddToWorklist(Use);
6040
248k
    }
6041
248k
  }
6042
303k
6043
303k
  return SDValue();
6044
303k
}
6045
6046
1.11k
SDValue DAGCombiner::visitABS(SDNode *N) {
6047
1.11k
  SDValue N0 = N->getOperand(0);
6048
1.11k
  EVT VT = N->getValueType(0);
6049
1.11k
6050
1.11k
  // fold (abs c1) -> c2
6051
1.11k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6052
0
    return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
6053
1.11k
  // fold (abs (abs x)) -> (abs x)
6054
1.11k
  
if (1.11k
N0.getOpcode() == ISD::ABS1.11k
)
6055
8
    return N0;
6056
1.11k
  // fold (abs x) -> x iff not-negative
6057
1.11k
  
if (1.11k
DAG.SignBitIsZero(N0)1.11k
)
6058
8
    return N0;
6059
1.10k
  return SDValue();
6060
1.10k
}
6061
6062
2.66k
SDValue DAGCombiner::visitBSWAP(SDNode *N) {
6063
2.66k
  SDValue N0 = N->getOperand(0);
6064
2.66k
  EVT VT = N->getValueType(0);
6065
2.66k
6066
2.66k
  // fold (bswap c1) -> c2
6067
2.66k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6068
0
    return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
6069
2.66k
  // fold (bswap (bswap x)) -> x
6070
2.66k
  
if (2.66k
N0.getOpcode() == ISD::BSWAP2.66k
)
6071
28
    return N0->getOperand(0);
6072
2.64k
  return SDValue();
6073
2.64k
}
6074
6075
634
SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
6076
634
  SDValue N0 = N->getOperand(0);
6077
634
  EVT VT = N->getValueType(0);
6078
634
6079
634
  // fold (bitreverse c1) -> c2
6080
634
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6081
0
    return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
6082
634
  // fold (bitreverse (bitreverse x)) -> x
6083
634
  
if (634
N0.getOpcode() == ISD::BITREVERSE634
)
6084
4
    return N0.getOperand(0);
6085
630
  return SDValue();
6086
630
}
6087
6088
5.57k
SDValue DAGCombiner::visitCTLZ(SDNode *N) {
6089
5.57k
  SDValue N0 = N->getOperand(0);
6090
5.57k
  EVT VT = N->getValueType(0);
6091
5.57k
6092
5.57k
  // fold (ctlz c1) -> c2
6093
5.57k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6094
0
    return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
6095
5.57k
  return SDValue();
6096
5.57k
}
6097
6098
4.14k
SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
6099
4.14k
  SDValue N0 = N->getOperand(0);
6100
4.14k
  EVT VT = N->getValueType(0);
6101
4.14k
6102
4.14k
  // fold (ctlz_zero_undef c1) -> c2
6103
4.14k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6104
0
    return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6105
4.14k
  return SDValue();
6106
4.14k
}
6107
6108
728
SDValue DAGCombiner::visitCTTZ(SDNode *N) {
6109
728
  SDValue N0 = N->getOperand(0);
6110
728
  EVT VT = N->getValueType(0);
6111
728
6112
728
  // fold (cttz c1) -> c2
6113
728
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6114
0
    return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
6115
728
  return SDValue();
6116
728
}
6117
6118
1.00k
SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
6119
1.00k
  SDValue N0 = N->getOperand(0);
6120
1.00k
  EVT VT = N->getValueType(0);
6121
1.00k
6122
1.00k
  // fold (cttz_zero_undef c1) -> c2
6123
1.00k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6124
0
    return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6125
1.00k
  return SDValue();
6126
1.00k
}
6127
6128
1.18k
SDValue DAGCombiner::visitCTPOP(SDNode *N) {
6129
1.18k
  SDValue N0 = N->getOperand(0);
6130
1.18k
  EVT VT = N->getValueType(0);
6131
1.18k
6132
1.18k
  // fold (ctpop c1) -> c2
6133
1.18k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6134
0
    return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
6135
1.18k
  return SDValue();
6136
1.18k
}
6137
6138
/// \brief Generate Min/Max node
6139
static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
6140
                                   SDValue RHS, SDValue True, SDValue False,
6141
                                   ISD::CondCode CC, const TargetLowering &TLI,
6142
99
                                   SelectionDAG &DAG) {
6143
99
  if (
!(LHS == True && 99
RHS == False50
) &&
!(LHS == False && 50
RHS == True34
))
6144
16
    return SDValue();
6145
83
6146
83
  switch (CC) {
6147
43
  case ISD::SETOLT:
6148
43
  case ISD::SETOLE:
6149
43
  case ISD::SETLT:
6150
43
  case ISD::SETLE:
6151
43
  case ISD::SETULT:
6152
43
  case ISD::SETULE: {
6153
43
    unsigned Opcode = (LHS == True) ? 
ISD::FMINNUM26
:
ISD::FMAXNUM17
;
6154
43
    if (TLI.isOperationLegal(Opcode, VT))
6155
15
      return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6156
28
    return SDValue();
6157
28
  }
6158
40
  case ISD::SETOGT:
6159
40
  case ISD::SETOGE:
6160
40
  case ISD::SETGT:
6161
40
  case ISD::SETGE:
6162
40
  case ISD::SETUGT:
6163
40
  case ISD::SETUGE: {
6164
40
    unsigned Opcode = (LHS == True) ? 
ISD::FMAXNUM23
:
ISD::FMINNUM17
;
6165
40
    if (TLI.isOperationLegal(Opcode, VT))
6166
12
      return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6167
28
    return SDValue();
6168
28
  }
6169
0
  default:
6170
0
    return SDValue();
6171
0
  }
6172
0
}
6173
6174
177k
SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
6175
177k
  SDValue Cond = N->getOperand(0);
6176
177k
  SDValue N1 = N->getOperand(1);
6177
177k
  SDValue N2 = N->getOperand(2);
6178
177k
  EVT VT = N->getValueType(0);
6179
177k
  EVT CondVT = Cond.getValueType();
6180
177k
  SDLoc DL(N);
6181
177k
6182
177k
  if (!VT.isInteger())
6183
8.39k
    return SDValue();
6184
169k
6185
169k
  auto *C1 = dyn_cast<ConstantSDNode>(N1);
6186
169k
  auto *C2 = dyn_cast<ConstantSDNode>(N2);
6187
169k
  if (
!C1 || 169k
!C248.8k
)
6188
145k
    return SDValue();
6189
24.1k
6190
24.1k
  // Only do this before legalization to avoid conflicting with target-specific
6191
24.1k
  // transforms in the other direction (create a select from a zext/sext). There
6192
24.1k
  // is also a target-independent combine here in DAGCombiner in the other
6193
24.1k
  // direction for (select Cond, -1, 0) when the condition is not i1.
6194
24.1k
  
if (24.1k
CondVT == MVT::i1 && 24.1k
!LegalOperations18.6k
) {
6195
16.9k
    if (
C1->isNullValue() && 16.9k
C2->isOne()7.34k
) {
6196
31
      // select Cond, 0, 1 --> zext (!Cond)
6197
31
      SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6198
31
      if (VT != MVT::i1)
6199
30
        NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
6200
31
      return NotCond;
6201
31
    }
6202
16.9k
    
if (16.9k
C1->isNullValue() && 16.9k
C2->isAllOnesValue()7.31k
) {
6203
25
      // select Cond, 0, -1 --> sext (!Cond)
6204
25
      SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6205
25
      if (VT != MVT::i1)
6206
25
        NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
6207
25
      return NotCond;
6208
25
    }
6209
16.9k
    
if (16.9k
C1->isOne() && 16.9k
C2->isNullValue()439
) {
6210
35
      // select Cond, 1, 0 --> zext (Cond)
6211
35
      if (VT != MVT::i1)
6212
35
        Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6213
35
      return Cond;
6214
35
    }
6215
16.8k
    
if (16.8k
C1->isAllOnesValue() && 16.8k
C2->isNullValue()228
) {
6216
56
      // select Cond, -1, 0 --> sext (Cond)
6217
56
      if (VT != MVT::i1)
6218
56
        Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6219
56
      return Cond;
6220
56
    }
6221
16.8k
6222
16.8k
    // For any constants that differ by 1, we can transform the select into an
6223
16.8k
    // extend and add. Use a target hook because some targets may prefer to
6224
16.8k
    // transform in the other direction.
6225
16.8k
    
if (16.8k
TLI.convertSelectOfConstantsToMath(VT)16.8k
) {
6226
540
      if (
C1->getAPIntValue() - 1 == C2->getAPIntValue()540
) {
6227
54
        // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6228
54
        if (VT != MVT::i1)
6229
54
          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6230
54
        return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6231
54
      }
6232
486
      
if (486
C1->getAPIntValue() + 1 == C2->getAPIntValue()486
) {
6233
78
        // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6234
78
        if (VT != MVT::i1)
6235
78
          Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6236
78
        return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6237
78
      }
6238
16.6k
    }
6239
16.6k
6240
16.6k
    return SDValue();
6241
16.6k
  }
6242
7.18k
6243
7.18k
  // fold (select Cond, 0, 1) -> (xor Cond, 1)
6244
7.18k
  // We can't do this reliably if integer based booleans have different contents
6245
7.18k
  // to floating point based booleans. This is because we can't tell whether we
6246
7.18k
  // have an integer-based boolean or a floating-point-based boolean unless we
6247
7.18k
  // can find the SETCC that produced it and inspect its operands. This is
6248
7.18k
  // fairly easy if C is the SETCC node, but it can potentially be
6249
7.18k
  // undiscoverable (or not reasonably discoverable). For example, it could be
6250
7.18k
  // in another basic block or it could require searching a complicated
6251
7.18k
  // expression.
6252
7.18k
  
if (7.18k
CondVT.isInteger() &&
6253
7.18k
      TLI.getBooleanContents(false, true) ==
6254
7.18k
          TargetLowering::ZeroOrOneBooleanContent &&
6255
5.34k
      TLI.getBooleanContents(false, false) ==
6256
5.34k
          TargetLowering::ZeroOrOneBooleanContent &&
6257
7.18k
      
C1->isNullValue()5.34k
&&
C2->isOne()399
) {
6258
0
    SDValue NotCond =
6259
0
        DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
6260
0
    if (VT.bitsEq(CondVT))
6261
0
      return NotCond;
6262
0
    return DAG.getZExtOrTrunc(NotCond, DL, VT);
6263
0
  }
6264
7.18k
6265
7.18k
  return SDValue();
6266
7.18k
}
6267
6268
177k
SDValue DAGCombiner::visitSELECT(SDNode *N) {
6269
177k
  SDValue N0 = N->getOperand(0);
6270
177k
  SDValue N1 = N->getOperand(1);
6271
177k
  SDValue N2 = N->getOperand(2);
6272
177k
  EVT VT = N->getValueType(0);
6273
177k
  EVT VT0 = N0.getValueType();
6274
177k
  SDLoc DL(N);
6275
177k
6276
177k
  // fold (select C, X, X) -> X
6277
177k
  if (N1 == N2)
6278
59
    return N1;
6279
177k
6280
177k
  
if (const ConstantSDNode *177k
N0C177k
= dyn_cast<const ConstantSDNode>(N0)) {
6281
58
    // fold (select true, X, Y) -> X
6282
58
    // fold (select false, X, Y) -> Y
6283
58
    return !N0C->isNullValue() ? 
N17
:
N251
;
6284
58
  }
6285
177k
6286
177k
  // fold (select X, X, Y) -> (or X, Y)
6287
177k
  // fold (select X, 1, Y) -> (or C, Y)
6288
177k
  
if (177k
VT == VT0 && 177k
VT == MVT::i126.9k
&&
(N0 == N1 || 83
isOneConstant(N1)77
))
6289
9
    return DAG.getNode(ISD::OR, DL, VT, N0, N2);
6290
177k
6291
177k
  
if (SDValue 177k
V177k
= foldSelectOfConstants(N))
6292
279
    return V;
6293
177k
6294
177k
  // fold (select C, 0, X) -> (and (not C), X)
6295
177k
  
if (177k
VT == VT0 && 177k
VT == MVT::i126.9k
&&
isNullConstant(N1)73
) {
6296
6
    SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6297
6
    AddToWorklist(NOTNode.getNode());
6298
6
    return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
6299
6
  }
6300
177k
  // fold (select C, X, 1) -> (or (not C), X)
6301
177k
  
if (177k
VT == VT0 && 177k
VT == MVT::i126.9k
&&
isOneConstant(N2)67
) {
6302
2
    SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6303
2
    AddToWorklist(NOTNode.getNode());
6304
2
    return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
6305
2
  }
6306
177k
  // fold (select X, Y, X) -> (and X, Y)
6307
177k
  // fold (select X, Y, 0) -> (and X, Y)
6308
177k
  
if (177k
VT == VT0 && 177k
VT == MVT::i126.9k
&&
(N0 == N2 || 65
isNullConstant(N2)63
))
6309
3
    return DAG.getNode(ISD::AND, DL, VT, N0, N1);
6310
177k
6311
177k
  // If we can fold this based on the true/false value, do so.
6312
177k
  
if (177k
SimplifySelectOps(N, N1, N2)177k
)
6313
168
    return SDValue(N, 0); // Don't revisit N.
6314
177k
6315
177k
  
if (177k
VT0 == MVT::i1177k
) {
6316
143k
    // The code in this block deals with the following 2 equivalences:
6317
143k
    //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
6318
143k
    //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
6319
143k
    // The target can specify its preferred form with the
6320
143k
    // shouldNormalizeToSelectSequence() callback. However we always transform
6321
143k
    // to the right anyway if we find the inner select exists in the DAG anyway
6322
143k
    // and we always transform to the left side if we know that we can further
6323
143k
    // optimize the combination of the conditions.
6324
143k
    bool normalizeToSequence =
6325
143k
        TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
6326
143k
    // select (and Cond0, Cond1), X, Y
6327
143k
    //   -> select Cond0, (select Cond1, X, Y), Y
6328
143k
    if (
N0->getOpcode() == ISD::AND && 143k
N0->hasOneUse()9.60k
) {
6329
3.50k
      SDValue Cond0 = N0->getOperand(0);
6330
3.50k
      SDValue Cond1 = N0->getOperand(1);
6331
3.50k
      SDValue InnerSelect =
6332
3.50k
          DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6333
3.50k
      if (
normalizeToSequence || 3.50k
!InnerSelect.use_empty()3.32k
)
6334
181
        return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
6335
181
                           InnerSelect, N2);
6336
143k
    }
6337
143k
    // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
6338
143k
    
if (143k
N0->getOpcode() == ISD::OR && 143k
N0->hasOneUse()564
) {
6339
443
      SDValue Cond0 = N0->getOperand(0);
6340
443
      SDValue Cond1 = N0->getOperand(1);
6341
443
      SDValue InnerSelect =
6342
443
          DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6343
443
      if (
normalizeToSequence || 443
!InnerSelect.use_empty()424
)
6344
20
        return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
6345
20
                           InnerSelect);
6346
143k
    }
6347
143k
6348
143k
    // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
6349
143k
    
if (143k
N1->getOpcode() == ISD::SELECT && 143k
N1->hasOneUse()4.06k
) {
6350
1.16k
      SDValue N1_0 = N1->getOperand(0);
6351
1.16k
      SDValue N1_1 = N1->getOperand(1);
6352
1.16k
      SDValue N1_2 = N1->getOperand(2);
6353
1.16k
      if (
N1_2 == N2 && 1.16k
N0.getValueType() == N1_0.getValueType()321
) {
6354
321
        // Create the actual and node if we can generate good code for it.
6355
321
        if (
!normalizeToSequence321
) {
6356
0
          SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
6357
0
          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
6358
0
        }
6359
321
        // Otherwise see if we can optimize the "and" to a better pattern.
6360
321
        
if (SDValue 321
Combined321
= visitANDLike(N0, N1_0, N))
6361
2
          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
6362
2
                             N2);
6363
143k
      }
6364
1.16k
    }
6365
143k
    // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
6366
143k
    
if (143k
N2->getOpcode() == ISD::SELECT && 143k
N2->hasOneUse()7.77k
) {
6367
1.69k
      SDValue N2_0 = N2->getOperand(0);
6368
1.69k
      SDValue N2_1 = N2->getOperand(1);
6369
1.69k
      SDValue N2_2 = N2->getOperand(2);
6370
1.69k
      if (
N2_1 == N1 && 1.69k
N0.getValueType() == N2_0.getValueType()43
) {
6371
43
        // Create the actual or node if we can generate good code for it.
6372
43
        if (
!normalizeToSequence43
) {
6373
17
          SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
6374
17
          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
6375
17
        }
6376
26
        // Otherwise see if we can optimize to a better pattern.
6377
26
        
if (SDValue 26
Combined26
= visitORLike(N0, N2_0, N))
6378
6
          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
6379
6
                             N2_2);
6380
176k
      }
6381
1.69k
    }
6382
143k
  }
6383
176k
6384
176k
  // select (xor Cond, 1), X, Y -> select Cond, Y, X
6385
176k
  
if (176k
VT0 == MVT::i1176k
) {
6386
143k
    if (
N0->getOpcode() == ISD::XOR143k
) {
6387
61
      if (auto *
C61
= dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
6388
8
        SDValue Cond0 = N0->getOperand(0);
6389
8
        if (C->isOne())
6390
8
          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1);
6391
176k
      }
6392
61
    }
6393
143k
  }
6394
176k
6395
176k
  // fold selects based on a setcc into other things, such as min/max/abs
6396
176k
  
if (176k
N0.getOpcode() == ISD::SETCC176k
) {
6397
151k
    // select x, y (fcmp lt x, y) -> fminnum x, y
6398
151k
    // select x, y (fcmp gt x, y) -> fmaxnum x, y
6399
151k
    //
6400
151k
    // This is OK if we don't care about what happens if either operand is a
6401
151k
    // NaN.
6402
151k
    //
6403
151k
6404
151k
    // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
6405
151k
    // no signed zeros as well as no nans.
6406
151k
    const TargetOptions &Options = DAG.getTarget().Options;
6407
151k
    if (
Options.UnsafeFPMath && 151k
VT.isFloatingPoint()411
&&
N0.hasOneUse()186
&&
6408
151k
        
DAG.isKnownNeverNaN(N1)182
&&
DAG.isKnownNeverNaN(N2)126
) {
6409
99
      ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6410
99
6411
99
      if (SDValue FMinMax = combineMinNumMaxNum(
6412
99
              DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
6413
27
        return FMinMax;
6414
151k
    }
6415
151k
6416
151k
    
if (151k
(!LegalOperations &&
6417
137k
         TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
6418
38.3k
        TLI.isOperationLegal(ISD::SELECT_CC, VT))
6419
112k
      return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
6420
112k
                         N0.getOperand(1), N1, N2, N0.getOperand(2));
6421
38.3k
    return SimplifySelect(DL, N0, N1, N2);
6422
38.3k
  }
6423
25.5k
6424
25.5k
  return SDValue();
6425
25.5k
}
6426
6427
static
6428
8
std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
6429
8
  SDLoc DL(N);
6430
8
  EVT LoVT, HiVT;
6431
8
  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
6432
8
6433
8
  // Split the inputs.
6434
8
  SDValue Lo, Hi, LL, LH, RL, RH;
6435
8
  std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
6436
8
  std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
6437
8
6438
8
  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
6439
8
  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
6440
8
6441
8
  return std::make_pair(Lo, Hi);
6442
8
}
6443
6444
// This function assumes all the vselect's arguments are CONCAT_VECTOR
6445
// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
6446
42
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
6447
42
  SDLoc DL(N);
6448
42
  SDValue Cond = N->getOperand(0);
6449
42
  SDValue LHS = N->getOperand(1);
6450
42
  SDValue RHS = N->getOperand(2);
6451
42
  EVT VT = N->getValueType(0);
6452
42
  int NumElems = VT.getVectorNumElements();
6453
42
  assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
6454
42
         RHS.getOpcode() == ISD::CONCAT_VECTORS &&
6455
42
         Cond.getOpcode() == ISD::BUILD_VECTOR);
6456
42
6457
42
  // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
6458
42
  // binary ones here.
6459
42
  if (
LHS->getNumOperands() != 2 || 42
RHS->getNumOperands() != 232
)
6460
10
    return SDValue();
6461
32
6462
32
  // We're sure we have an even number of elements due to the
6463
32
  // concat_vectors we have as arguments to vselect.
6464
32
  // Skip BV elements until we find one that's not an UNDEF
6465
32
  // After we find an UNDEF element, keep looping until we get to half the
6466
32
  // length of the BV and see if all the non-undef nodes are the same.
6467
32
  ConstantSDNode *BottomHalf = nullptr;
6468
98
  for (int i = 0; 
i < NumElems / 298
;
++i66
) {
6469
85
    if (Cond->getOperand(i)->isUndef())
6470
0
      continue;
6471
85
6472
85
    
if (85
BottomHalf == nullptr85
)
6473
32
      BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6474
53
    else 
if (53
Cond->getOperand(i).getNode() != BottomHalf53
)
6475
19
      return SDValue();
6476
85
  }
6477
32
6478
32
  // Do the same for the second half of the BuildVector
6479
13
  ConstantSDNode *TopHalf = nullptr;
6480
48
  for (int i = NumElems / 2; 
i < NumElems48
;
++i35
) {
6481
38
    if (Cond->getOperand(i)->isUndef())
6482
0
      continue;
6483
38
6484
38
    
if (38
TopHalf == nullptr38
)
6485
13
      TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6486
25
    else 
if (25
Cond->getOperand(i).getNode() != TopHalf25
)
6487
3
      return SDValue();
6488
38
  }
6489
13
6490
10
  assert(TopHalf && BottomHalf &&
6491
10
         "One half of the selector was all UNDEFs and the other was all the "
6492
10
         "same value. This should have been addressed before this function.");
6493
10
  return DAG.getNode(
6494
10
      ISD::CONCAT_VECTORS, DL, VT,
6495
10
      BottomHalf->isNullValue() ? 
RHS->getOperand(0)8
:
LHS->getOperand(0)2
,
6496
10
      TopHalf->isNullValue() ? 
RHS->getOperand(1)2
:
LHS->getOperand(1)8
);
6497
42
}
6498
6499
282
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
6500
282
  if (Level >= AfterLegalizeTypes)
6501
217
    return SDValue();
6502
65
6503
65
  MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
6504
65
  SDValue Mask = MSC->getMask();
6505
65
  SDValue Data  = MSC->getValue();
6506
65
  SDLoc DL(N);
6507
65
6508
65
  // If the MSCATTER data type requires splitting and the mask is provided by a
6509
65
  // SETCC, then split both nodes and its operands before legalization. This
6510
65
  // prevents the type legalizer from unrolling SETCC into scalar comparisons
6511
65
  // and enables future optimizations (e.g. min/max pattern matching on X86).
6512
65
  if (Mask.getOpcode() != ISD::SETCC)
6513
65
    return SDValue();
6514
0
6515
0
  // Check if any splitting is required.
6516
0
  
if (0
TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
6517
0
      TargetLowering::TypeSplitVector)
6518
0
    return SDValue();
6519
0
  SDValue MaskLo, MaskHi, Lo, Hi;
6520
0
  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6521
0
6522
0
  EVT LoVT, HiVT;
6523
0
  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
6524
0
6525
0
  SDValue Chain = MSC->getChain();
6526
0
6527
0
  EVT MemoryVT = MSC->getMemoryVT();
6528
0
  unsigned Alignment = MSC->getOriginalAlignment();
6529
0
6530
0
  EVT LoMemVT, HiMemVT;
6531
0
  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6532
0
6533
0
  SDValue DataLo, DataHi;
6534
0
  std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6535
0
6536
0
  SDValue BasePtr = MSC->getBasePtr();
6537
0
  SDValue IndexLo, IndexHi;
6538
0
  std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
6539
0
6540
0
  MachineMemOperand *MMO = DAG.getMachineFunction().
6541
0
    getMachineMemOperand(MSC->getPointerInfo(),
6542
0
                          MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6543
0
                          Alignment, MSC->getAAInfo(), MSC->getRanges());
6544
0
6545
0
  SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
6546
0
  Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
6547
0
                            DL, OpsLo, MMO);
6548
0
6549
0
  SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
6550
0
  Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
6551
0
                            DL, OpsHi, MMO);
6552
0
6553
0
  AddToWorklist(Lo.getNode());
6554
0
  AddToWorklist(Hi.getNode());
6555
0
6556
0
  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6557
0
}
6558
6559
369
SDValue DAGCombiner::visitMSTORE(SDNode *N) {
6560
369
  if (Level >= AfterLegalizeTypes)
6561
239
    return SDValue();
6562
130
6563
130
  MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
6564
130
  SDValue Mask = MST->getMask();
6565
130
  SDValue Data  = MST->getValue();
6566
130
  EVT VT = Data.getValueType();
6567
130
  SDLoc DL(N);
6568
130
6569
130
  // If the MSTORE data type requires splitting and the mask is provided by a
6570
130
  // SETCC, then split both nodes and its operands before legalization. This
6571
130
  // prevents the type legalizer from unrolling SETCC into scalar comparisons
6572
130
  // and enables future optimizations (e.g. min/max pattern matching on X86).
6573
130
  if (
Mask.getOpcode() == ISD::SETCC130
) {
6574
36
    // Check if any splitting is required.
6575
36
    if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6576
36
        TargetLowering::TypeSplitVector)
6577
34
      return SDValue();
6578
2
6579
2
    SDValue MaskLo, MaskHi, Lo, Hi;
6580
2
    std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6581
2
6582
2
    SDValue Chain = MST->getChain();
6583
2
    SDValue Ptr   = MST->getBasePtr();
6584
2
6585
2
    EVT MemoryVT = MST->getMemoryVT();
6586
2
    unsigned Alignment = MST->getOriginalAlignment();
6587
2
6588
2
    // if Alignment is equal to the vector size,
6589
2
    // take the half of it for the second part
6590
2
    unsigned SecondHalfAlignment =
6591
2
      (Alignment == VT.getSizeInBits() / 8) ? 
Alignment / 22
:
Alignment0
;
6592
36
6593
36
    EVT LoMemVT, HiMemVT;
6594
36
    std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6595
36
6596
36
    SDValue DataLo, DataHi;
6597
36
    std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6598
36
6599
36
    MachineMemOperand *MMO = DAG.getMachineFunction().
6600
36
      getMachineMemOperand(MST->getPointerInfo(),
6601
36
                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6602
36
                           Alignment, MST->getAAInfo(), MST->getRanges());
6603
36
6604
36
    Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
6605
36
                            MST->isTruncatingStore(),
6606
36
                            MST->isCompressingStore());
6607
36
6608
36
    Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6609
36
                                     MST->isCompressingStore());
6610
36
6611
36
    MMO = DAG.getMachineFunction().
6612
36
      getMachineMemOperand(MST->getPointerInfo(),
6613
36
                           MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
6614
36
                           SecondHalfAlignment, MST->getAAInfo(),
6615
36
                           MST->getRanges());
6616
36
6617
36
    Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
6618
36
                            MST->isTruncatingStore(),
6619
36
                            MST->isCompressingStore());
6620
36
6621
36
    AddToWorklist(Lo.getNode());
6622
36
    AddToWorklist(Hi.getNode());
6623
36
6624
36
    return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6625
36
  }
6626
94
  return SDValue();
6627
94
}
6628
6629
642
SDValue DAGCombiner::visitMGATHER(SDNode *N) {
6630
642
  if (Level >= AfterLegalizeTypes)
6631
435
    return SDValue();
6632
207
6633
207
  MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
6634
207
  SDValue Mask = MGT->getMask();
6635
207
  SDLoc DL(N);
6636
207
6637
207
  // If the MGATHER result requires splitting and the mask is provided by a
6638
207
  // SETCC, then split both nodes and its operands before legalization. This
6639
207
  // prevents the type legalizer from unrolling SETCC into scalar comparisons
6640
207
  // and enables future optimizations (e.g. min/max pattern matching on X86).
6641
207
6642
207
  if (Mask.getOpcode() != ISD::SETCC)
6643
207
    return SDValue();
6644
0
6645
0
  EVT VT = N->getValueType(0);
6646
0
6647
0
  // Check if any splitting is required.
6648
0
  if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6649
0
      TargetLowering::TypeSplitVector)
6650
0
    return SDValue();
6651
0
6652
0
  SDValue MaskLo, MaskHi, Lo, Hi;
6653
0
  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6654
0
6655
0
  SDValue Src0 = MGT->getValue();
6656
0
  SDValue Src0Lo, Src0Hi;
6657
0
  std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6658
0
6659
0
  EVT LoVT, HiVT;
6660
0
  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
6661
0
6662
0
  SDValue Chain = MGT->getChain();
6663
0
  EVT MemoryVT = MGT->getMemoryVT();
6664
0
  unsigned Alignment = MGT->getOriginalAlignment();
6665
0
6666
0
  EVT LoMemVT, HiMemVT;
6667
0
  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6668
0
6669
0
  SDValue BasePtr = MGT->getBasePtr();
6670
0
  SDValue Index = MGT->getIndex();
6671
0
  SDValue IndexLo, IndexHi;
6672
0
  std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
6673
0
6674
0
  MachineMemOperand *MMO = DAG.getMachineFunction().
6675
0
    getMachineMemOperand(MGT->getPointerInfo(),
6676
0
                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6677
0
                          Alignment, MGT->getAAInfo(), MGT->getRanges());
6678
0
6679
0
  SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
6680
0
  Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
6681
0
                            MMO);
6682
0
6683
0
  SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
6684
0
  Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
6685
0
                            MMO);
6686
0
6687
0
  AddToWorklist(Lo.getNode());
6688
0
  AddToWorklist(Hi.getNode());
6689
0
6690
0
  // Build a factor node to remember that this load is independent of the
6691
0
  // other one.
6692
0
  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6693
0
                      Hi.getValue(1));
6694
0
6695
0
  // Legalized the chain result - switch anything that used the old chain to
6696
0
  // use the new one.
6697
0
  DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
6698
0
6699
0
  SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6700
0
6701
0
  SDValue RetOps[] = { GatherRes, Chain };
6702
0
  return DAG.getMergeValues(RetOps, DL);
6703
0
}
6704
6705
725
SDValue DAGCombiner::visitMLOAD(SDNode *N) {
6706
725
  if (Level >= AfterLegalizeTypes)
6707
463
    return SDValue();
6708
262
6709
262
  MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
6710
262
  SDValue Mask = MLD->getMask();
6711
262
  SDLoc DL(N);
6712
262
6713
262
  // If the MLOAD result requires splitting and the mask is provided by a
6714
262
  // SETCC, then split both nodes and its operands before legalization. This
6715
262
  // prevents the type legalizer from unrolling SETCC into scalar comparisons
6716
262
  // and enables future optimizations (e.g. min/max pattern matching on X86).
6717
262
  if (
Mask.getOpcode() == ISD::SETCC262
) {
6718
76
    EVT VT = N->getValueType(0);
6719
76
6720
76
    // Check if any splitting is required.
6721
76
    if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6722
76
        TargetLowering::TypeSplitVector)
6723
70
      return SDValue();
6724
6
6725
6
    SDValue MaskLo, MaskHi, Lo, Hi;
6726
6
    std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6727
6
6728
6
    SDValue Src0 = MLD->getSrc0();
6729
6
    SDValue Src0Lo, Src0Hi;
6730
6
    std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6731
6
6732
6
    EVT LoVT, HiVT;
6733
6
    std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
6734
6
6735
6
    SDValue Chain = MLD->getChain();
6736
6
    SDValue Ptr   = MLD->getBasePtr();
6737
6
    EVT MemoryVT = MLD->getMemoryVT();
6738
6
    unsigned Alignment = MLD->getOriginalAlignment();
6739
6
6740
6
    // if Alignment is equal to the vector size,
6741
6
    // take the half of it for the second part
6742
6
    unsigned SecondHalfAlignment =
6743
6
      (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
6744
6
         
Alignment/24
:
Alignment2
;
6745
76
6746
76
    EVT LoMemVT, HiMemVT;
6747
76
    std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6748
76
6749
76
    MachineMemOperand *MMO = DAG.getMachineFunction().
6750
76
    getMachineMemOperand(MLD->getPointerInfo(),
6751
76
                         MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6752
76
                         Alignment, MLD->getAAInfo(), MLD->getRanges());
6753
76
6754
76
    Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
6755
76
                           ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6756
76
6757
76
    Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6758
76
                                     MLD->isExpandingLoad());
6759
76
6760
76
    MMO = DAG.getMachineFunction().
6761
76
    getMachineMemOperand(MLD->getPointerInfo(),
6762
76
                         MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
6763
76
                         SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
6764
76
6765
76
    Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
6766
76
                           ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6767
76
6768
76
    AddToWorklist(Lo.getNode());
6769
76
    AddToWorklist(Hi.getNode());
6770
76
6771
76
    // Build a factor node to remember that this load is independent of the
6772
76
    // other one.
6773
76
    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6774
76
                        Hi.getValue(1));
6775
76
6776
76
    // Legalized the chain result - switch anything that used the old chain to
6777
76
    // use the new one.
6778
76
    DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
6779
76
6780
76
    SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6781
76
6782
76
    SDValue RetOps[] = { LoadRes, Chain };
6783
76
    return DAG.getMergeValues(RetOps, DL);
6784
76
  }
6785
186
  return SDValue();
6786
186
}
6787
6788
/// A vector select of 2 constant vectors can be simplified to math/logic to
6789
/// avoid a variable select instruction and possibly avoid constant loads.
6790
29.8k
SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
6791
29.8k
  SDValue Cond = N->getOperand(0);
6792
29.8k
  SDValue N1 = N->getOperand(1);
6793
29.8k
  SDValue N2 = N->getOperand(2);
6794
29.8k
  EVT VT = N->getValueType(0);
6795
29.8k
  if (
!Cond.hasOneUse() || 29.8k
Cond.getScalarValueSizeInBits() != 127.0k
||
6796
17.4k
      !TLI.convertSelectOfConstantsToMath(VT) ||
6797
1.47k
      !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
6798
217
      !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
6799
29.7k
    return SDValue();
6800
158
6801
158
  // Check if we can use the condition value to increment/decrement a single
6802
158
  // constant value. This simplifies a select to an add and removes a constant
6803
158
  // load/materialization from the general case.
6804
158
  bool AllAddOne = true;
6805
158
  bool AllSubOne = true;
6806
158
  unsigned Elts = VT.getVectorNumElements();
6807
910
  for (unsigned i = 0; 
i != Elts910
;
++i752
) {
6808
752
    SDValue N1Elt = N1.getOperand(i);
6809
752
    SDValue N2Elt = N2.getOperand(i);
6810
752
    if (
N1Elt.isUndef() || 752
N2Elt.isUndef()752
)
6811
0
      continue;
6812
752
6813
752
    const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
6814
752
    const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
6815
752
    if (C1 != C2 + 1)
6816
662
      AllAddOne = false;
6817
752
    if (C1 != C2 - 1)
6818
612
      AllSubOne = false;
6819
752
  }
6820
158
6821
158
  // Further simplifications for the extra-special cases where the constants are
6822
158
  // all 0 or all -1 should be implemented as folds of these patterns.
6823
158
  SDLoc DL(N);
6824
158
  if (
AllAddOne || 158
AllSubOne140
) {
6825
40
    // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
6826
40
    // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
6827
40
    auto ExtendOpcode = AllAddOne ? 
ISD::ZERO_EXTEND18
:
ISD::SIGN_EXTEND22
;
6828
40
    SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
6829
40
    return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
6830
40
  }
6831
118
6832
118
  // The general case for select-of-constants:
6833
118
  // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
6834
118
  // ...but that only makes sense if a vselect is slower than 2 logic ops, so
6835
118
  // leave that to a machine-specific pass.
6836
118
  return SDValue();
6837
118
}
6838
6839
30.3k
SDValue DAGCombiner::visitVSELECT(SDNode *N) {
6840
30.3k
  SDValue N0 = N->getOperand(0);
6841
30.3k
  SDValue N1 = N->getOperand(1);
6842
30.3k
  SDValue N2 = N->getOperand(2);
6843
30.3k
  SDLoc DL(N);
6844
30.3k
6845
30.3k
  // fold (vselect C, X, X) -> X
6846
30.3k
  if (N1 == N2)
6847
4
    return N1;
6848
30.3k
6849
30.3k
  // Canonicalize integer abs.
6850
30.3k
  // vselect (setg[te] X,  0),  X, -X ->
6851
30.3k
  // vselect (setgt    X, -1),  X, -X ->
6852
30.3k
  // vselect (setl[te] X,  0), -X,  X ->
6853
30.3k
  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
6854
30.3k
  
if (30.3k
N0.getOpcode() == ISD::SETCC30.3k
) {
6855
6.45k
    SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6856
6.45k
    ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6857
6.45k
    bool isAbs = false;
6858
6.45k
    bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
6859
6.45k
6860
6.45k
    if (
((RHSIsAllZeros && 6.45k
(CC == ISD::SETGT || 1.63k
CC == ISD::SETGE1.39k
)) ||
6861
6.17k
         
(ISD::isBuildVectorAllOnes(RHS.getNode()) && 6.17k
CC == ISD::SETGT39
)) &&
6862
6.45k
        
N1 == LHS312
&&
N2.getOpcode() == ISD::SUB246
&&
N1 == N2.getOperand(1)246
)
6863
246
      isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
6864
6.20k
    else 
if (6.20k
(RHSIsAllZeros && 6.20k
(CC == ISD::SETLT || 1.42k
CC == ISD::SETLE1.18k
)) &&
6865
6.20k
             
N2 == LHS284
&&
N1.getOpcode() == ISD::SUB129
&&
N2 == N1.getOperand(1)129
)
6866
129
      isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6867
6.45k
6868
6.45k
    if (
isAbs6.45k
) {
6869
375
      EVT VT = LHS.getValueType();
6870
375
      if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
6871
247
        return DAG.getNode(ISD::ABS, DL, VT, LHS);
6872
128
6873
128
      SDValue Shift = DAG.getNode(
6874
128
          ISD::SRA, DL, VT, LHS,
6875
128
          DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
6876
128
      SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
6877
128
      AddToWorklist(Shift.getNode());
6878
128
      AddToWorklist(Add.getNode());
6879
128
      return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
6880
128
    }
6881
6.45k
  }
6882
29.9k
6883
29.9k
  
if (29.9k
SimplifySelectOps(N, N1, N2)29.9k
)
6884
6
    return SDValue(N, 0);  // Don't revisit N.
6885
29.9k
6886
29.9k
  // Fold (vselect (build_vector all_ones), N1, N2) -> N1
6887
29.9k
  
if (29.9k
ISD::isBuildVectorAllOnes(N0.getNode())29.9k
)
6888
33
    return N1;
6889
29.9k
  // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
6890
29.9k
  
if (29.9k
ISD::isBuildVectorAllZeros(N0.getNode())29.9k
)
6891
56
    return N2;
6892
29.8k
6893
29.8k
  // The ConvertSelectToConcatVector function is assuming both the above
6894
29.8k
  // checks for (vselect (build_vector all{ones,zeros) ...) have been made
6895
29.8k
  // and addressed.
6896
29.8k
  
if (29.8k
N1.getOpcode() == ISD::CONCAT_VECTORS &&
6897
714
      N2.getOpcode() == ISD::CONCAT_VECTORS &&
6898
29.8k
      
ISD::isBuildVectorOfConstantSDNodes(N0.getNode())391
) {
6899
42
    if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
6900
10
      return CV;
6901
29.8k
  }
6902
29.8k
6903
29.8k
  
if (SDValue 29.8k
V29.8k
= foldVSelectOfConstants(N))
6904
40
    return V;
6905
29.8k
6906
29.8k
  return SDValue();
6907
29.8k
}
6908
6909
195k
SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
6910
195k
  SDValue N0 = N->getOperand(0);
6911
195k
  SDValue N1 = N->getOperand(1);
6912
195k
  SDValue N2 = N->getOperand(2);
6913
195k
  SDValue N3 = N->getOperand(3);
6914
195k
  SDValue N4 = N->getOperand(4);
6915
195k
  ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
6916
195k
6917
195k
  // fold select_cc lhs, rhs, x, x, cc -> x
6918
195k
  if (N2 == N3)
6919
24
    return N2;
6920
195k
6921
195k
  // Determine if the condition we're dealing with is constant
6922
195k
  
if (SDValue 195k
SCC195k
= SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
6923
2.16k
                                  CC, SDLoc(N), false)) {
6924
2.16k
    AddToWorklist(SCC.getNode());
6925
2.16k
6926
2.16k
    if (ConstantSDNode *
SCCC2.16k
= dyn_cast<ConstantSDNode>(SCC.getNode())) {
6927
31
      if (!SCCC->isNullValue())
6928
17
        return N2;    // cond always true -> true val
6929
31
      else
6930
14
        return N3;    // cond always false -> false val
6931
2.13k
    } else 
if (2.13k
SCC->isUndef()2.13k
) {
6932
0
      // When the condition is UNDEF, just return the first operand. This is
6933
0
      // coherent the DAG creation, no setcc node is created in this case
6934
0
      return N2;
6935
2.13k
    } else 
if (2.13k
SCC.getOpcode() == ISD::SETCC2.13k
) {
6936
2.10k
      // Fold to a simpler select_cc
6937
2.10k
      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
6938
2.10k
                         SCC.getOperand(0), SCC.getOperand(1), N2, N3,
6939
2.10k
                         SCC.getOperand(2));
6940
2.10k
    }
6941
192k
  }
6942
192k
6943
192k
  // If we can fold this based on the true/false value, do so.
6944
192k
  
if (192k
SimplifySelectOps(N, N2, N3)192k
)
6945
0
    return SDValue(N, 0);  // Don't revisit N.
6946
192k
6947
192k
  // fold select_cc into other things, such as min/max/abs
6948
192k
  return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
6949
192k
}
6950
6951
759k
SDValue DAGCombiner::visitSETCC(SDNode *N) {
6952
759k
  return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
6953
759k
                       cast<CondCodeSDNode>(N->getOperand(2))->get(),
6954
759k
                       SDLoc(N));
6955
759k
}
6956
6957
1.05k
SDValue DAGCombiner::visitSETCCE(SDNode *N) {
6958
1.05k
  SDValue LHS = N->getOperand(0);
6959
1.05k
  SDValue RHS = N->getOperand(1);
6960
1.05k
  SDValue Carry = N->getOperand(2);
6961
1.05k
  SDValue Cond = N->getOperand(3);
6962
1.05k
6963
1.05k
  // If Carry is false, fold to a regular SETCC.
6964
1.05k
  if (Carry.getOpcode() == ISD::CARRY_FALSE)
6965
30
    return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
6966
1.02k
6967
1.02k
  return SDValue();
6968
1.02k
}
6969
6970
648
SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
6971
648
  SDValue LHS = N->getOperand(0);
6972
648
  SDValue RHS = N->getOperand(1);
6973
648
  SDValue Carry = N->getOperand(2);
6974
648
  SDValue Cond = N->getOperand(3);
6975
648
6976
648
  // If Carry is false, fold to a regular SETCC.
6977
648
  if (isNullConstant(Carry))
6978
30
    return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
6979
618
6980
618
  return SDValue();
6981
618
}
6982
6983
/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
6984
/// a build_vector of constants.
6985
/// This function is called by the DAGCombiner when visiting sext/zext/aext
6986
/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
6987
/// Vector extends are not folded if operations are legal; this is to
6988
/// avoid introducing illegal build_vector dag nodes.
6989
static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
6990
                                         SelectionDAG &DAG, bool LegalTypes,
6991
1.08M
                                         bool LegalOperations) {
6992
1.08M
  unsigned Opcode = N->getOpcode();
6993
1.08M
  SDValue N0 = N->getOperand(0);
6994
1.08M
  EVT VT = N->getValueType(0);
6995
1.08M
6996
1.08M
  assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
6997
1.08M
         Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6998
1.08M
         Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
6999
1.08M
         && "Expected EXTEND dag node in input!");
7000
1.08M
7001
1.08M
  // fold (sext c1) -> c1
7002
1.08M
  // fold (zext c1) -> c1
7003
1.08M
  // fold (aext c1) -> c1
7004
1.08M
  if (isa<ConstantSDNode>(N0))
7005
257
    return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
7006
1.08M
7007
1.08M
  // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
7008
1.08M
  // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
7009
1.08M
  // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
7010
1.08M
  EVT SVT = VT.getScalarType();
7011
1.08M
  if (!(VT.isVector() &&
7012
80.8k
      
(!LegalTypes || 80.8k
(!LegalOperations && 62.1k
TLI.isTypeLegal(SVT)9.18k
)) &&
7013
26.3k
      ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
7014
1.08M
    return nullptr;
7015
231
7016
231
  // We can fold this node into a build_vector.
7017
231
  unsigned VTBits = SVT.getSizeInBits();
7018
231
  unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
7019
231
  SmallVector<SDValue, 8> Elts;
7020
231
  unsigned NumElts = VT.getVectorNumElements();
7021
231
  SDLoc DL(N);
7022
231
7023
1.37k
  for (unsigned i=0; 
i != NumElts1.37k
;
++i1.14k
) {
7024
1.14k
    SDValue Op = N0->getOperand(i);
7025
1.14k
    if (
Op->isUndef()1.14k
) {
7026
86
      Elts.push_back(DAG.getUNDEF(SVT));
7027
86
      continue;
7028
86
    }
7029
1.06k
7030
1.06k
    SDLoc DL(Op);
7031
1.06k
    // Get the constant value and if needed trunc it to the size of the type.
7032
1.06k
    // Nodes like build_vector might have constants wider than the scalar type.
7033
1.06k
    APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
7034
1.06k
    if (
Opcode == ISD::SIGN_EXTEND || 1.06k
Opcode == ISD::SIGN_EXTEND_VECTOR_INREG644
)
7035
478
      Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
7036
1.06k
    else
7037
584
      Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
7038
1.14k
  }
7039
1.08M
7040
1.08M
  return DAG.getBuildVector(VT, DL, Elts).getNode();
7041
1.08M
}
7042
7043
// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
7044
// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
7045
// transformation. Returns true if extension are possible and the above
7046
// mentioned transformation is profitable.
7047
static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
7048
                                    unsigned ExtOpc,
7049
                                    SmallVectorImpl<SDNode *> &ExtendNodes,
7050
66.0k
                                    const TargetLowering &TLI) {
7051
66.0k
  bool HasCopyToRegUses = false;
7052
66.0k
  bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
7053
66.0k
  for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
7054
66.0k
                            UE = N0.getNode()->use_end();
7055
223k
       
UI != UE223k
;
++UI157k
) {
7056
159k
    SDNode *User = *UI;
7057
159k
    if (User == N)
7058
64.2k
      continue;
7059
95.6k
    
if (95.6k
UI.getUse().getResNo() != N0.getResNo()95.6k
)
7060
16.2k
      continue;
7061
79.4k
    // FIXME: Only extend SETCC N, N and SETCC N, c for now.
7062
79.4k
    
if (79.4k
ExtOpc != ISD::ANY_EXTEND && 79.4k
User->getOpcode() == ISD::SETCC70.3k
) {
7063
16.5k
      ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
7064
16.5k
      if (
ExtOpc == ISD::ZERO_EXTEND && 16.5k
ISD::isSignedIntSetCC(CC)14.0k
)
7065
16.5k
        // Sign bits will be lost after a zext.
7066
217
        return false;
7067
16.3k
      bool Add = false;
7068
47.0k
      for (unsigned i = 0; 
i != 247.0k
;
++i30.7k
) {
7069
32.0k
        SDValue UseOp = User->getOperand(i);
7070
32.0k
        if (UseOp == N0)
7071
15.7k
          continue;
7072
16.3k
        
if (16.3k
!isa<ConstantSDNode>(UseOp)16.3k
)
7073
1.32k
          return false;
7074
14.9k
        Add = true;
7075
14.9k
      }
7076
14.9k
      
if (14.9k
Add14.9k
)
7077
14.9k
        ExtendNodes.push_back(User);
7078
14.9k
      continue;
7079
62.8k
    }
7080
62.8k
    // If truncates aren't free and there are users we can't
7081
62.8k
    // extend, it isn't worthwhile.
7082
62.8k
    
if (62.8k
!isTruncFree62.8k
)
7083
460
      return false;
7084
62.4k
    // Remember if this value is live-out.
7085
62.4k
    
if (62.4k
User->getOpcode() == ISD::CopyToReg62.4k
)
7086
11.6k
      HasCopyToRegUses = true;
7087
159k
  }
7088
66.0k
7089
64.0k
  
if (64.0k
HasCopyToRegUses64.0k
) {
7090
11.5k
    bool BothLiveOut = false;
7091
11.5k
    for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
7092
17.0k
         
UI != UE17.0k
;
++UI5.44k
) {
7093
12.2k
      SDUse &Use = UI.getUse();
7094
12.2k
      if (
Use.getResNo() == 0 && 12.2k
Use.getUser()->getOpcode() == ISD::CopyToReg12.2k
) {
7095
6.84k
        BothLiveOut = true;
7096
6.84k
        break;
7097
6.84k
      }
7098
12.2k
    }
7099
11.5k
    if (BothLiveOut)
7100
11.5k
      // Both unextended and extended values are live out. There had better be
7101
11.5k
      // a good reason for the transformation.
7102
6.84k
      return ExtendNodes.size();
7103
57.1k
  }
7104
57.1k
  return true;
7105
57.1k
}
7106
7107
void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
7108
                                  SDValue Trunc, SDValue ExtLoad,
7109
235k
                                  const SDLoc &DL, ISD::NodeType ExtType) {
7110
235k
  // Extend SetCC uses if necessary.
7111
250k
  for (unsigned i = 0, e = SetCCs.size(); 
i != e250k
;
++i14.9k
) {
7112
14.9k
    SDNode *SetCC = SetCCs[i];
7113
14.9k
    SmallVector<SDValue, 4> Ops;
7114
14.9k
7115
44.9k
    for (unsigned j = 0; 
j != 244.9k
;
++j29.9k
) {
7116
29.9k
      SDValue SOp = SetCC->getOperand(j);
7117
29.9k
      if (SOp == Trunc)
7118
0
        Ops.push_back(ExtLoad);
7119
29.9k
      else
7120
29.9k
        Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
7121
29.9k
    }
7122
14.9k
7123
14.9k
    Ops.push_back(SetCC->getOperand(2));
7124
14.9k
    CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
7125
14.9k
  }
7126
235k
}
7127
7128
// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
7129
654k
SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
7130
654k
  SDValue N0 = N->getOperand(0);
7131
654k
  EVT DstVT = N->getValueType(0);
7132
654k
  EVT SrcVT = N0.getValueType();
7133
654k
7134
654k
  assert((N->getOpcode() == ISD::SIGN_EXTEND ||
7135
654k
          N->getOpcode() == ISD::ZERO_EXTEND) &&
7136
654k
         "Unexpected node type (not an extend)!");
7137
654k
7138
654k
  // fold (sext (load x)) to multiple smaller sextloads; same for zext.
7139
654k
  // For example, on a target with legal v4i32, but illegal v8i32, turn:
7140
654k
  //   (v8i32 (sext (v8i16 (load x))))
7141
654k
  // into:
7142
654k
  //   (v8i32 (concat_vectors (v4i32 (sextload x)),
7143
654k
  //                          (v4i32 (sextload (x + 16)))))
7144
654k
  // Where uses of the original load, i.e.:
7145
654k
  //   (v8i16 (load x))
7146
654k
  // are replaced with:
7147
654k
  //   (v8i16 (truncate
7148
654k
  //     (v8i32 (concat_vectors (v4i32 (sextload x)),
7149
654k
  //                            (v4i32 (sextload (x + 16)))))))
7150
654k
  //
7151
654k
  // This combine is only applicable to illegal, but splittable, vectors.
7152
654k
  // All legal types, and illegal non-vector types, are handled elsewhere.
7153
654k
  // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
7154
654k
  //
7155
654k
  if (N0->getOpcode() != ISD::LOAD)
7156
633k
    return SDValue();
7157
20.6k
7158
20.6k
  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7159
20.6k
7160
20.6k
  if (
!ISD::isNON_EXTLoad(LN0) || 20.6k
!ISD::isUNINDEXEDLoad(LN0)19.3k
||
7161
20.6k
      
!N0.hasOneUse()19.3k
||
LN0->isVolatile()10.6k
||
!DstVT.isVector()10.6k
||
7162
20.6k
      
!DstVT.isPow2VectorType()10.4k
||
!TLI.isVectorLoadExtDesirable(SDValue(N, 0))10.2k
)
7163
20.0k
    return SDValue();
7164
584
7165
584
  SmallVector<SDNode *, 4> SetCCs;
7166
584
  if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
7167
0
    return SDValue();
7168
584
7169
584
  ISD::LoadExtType ExtType =
7170
584
      N->getOpcode() == ISD::SIGN_EXTEND ? 
ISD::SEXTLOAD307
:
ISD::ZEXTLOAD277
;
7171
584
7172
584
  // Try to split the vector types to get down to legal types.
7173
584
  EVT SplitSrcVT = SrcVT;
7174
584
  EVT SplitDstVT = DstVT;
7175
1.48k
  while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
7176
1.26k
         
SplitSrcVT.getVectorNumElements() > 11.26k
) {
7177
905
    SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
7178
905
    SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
7179
905
  }
7180
584
7181
584
  if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
7182
356
    return SDValue();
7183
228
7184
228
  SDLoc DL(N);
7185
228
  const unsigned NumSplits =
7186
228
      DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
7187
228
  const unsigned Stride = SplitSrcVT.getStoreSize();
7188
228
  SmallVector<SDValue, 4> Loads;
7189
228
  SmallVector<SDValue, 4> Chains;
7190
228
7191
228
  SDValue BasePtr = LN0->getBasePtr();
7192
727
  for (unsigned Idx = 0; 
Idx < NumSplits727
;
Idx++499
) {
7193
499
    const unsigned Offset = Idx * Stride;
7194
499
    const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
7195
499
7196
499
    SDValue SplitLoad = DAG.getExtLoad(
7197
499
        ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
7198
499
        LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
7199
499
        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7200
499
7201
499
    BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
7202
499
                          DAG.getConstant(Stride, DL, BasePtr.getValueType()));
7203
499
7204
499
    Loads.push_back(SplitLoad.getValue(0));
7205
499
    Chains.push_back(SplitLoad.getValue(1));
7206
499
  }
7207
654k
7208
654k
  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
7209
654k
  SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
7210
654k
7211
654k
  // Simplify TF.
7212
654k
  AddToWorklist(NewChain.getNode());
7213
654k
7214
654k
  CombineTo(N, NewValue);
7215
654k
7216
654k
  // Replace uses of the original load (before extension)
7217
654k
  // with a truncate of the concatenated sextloaded vectors.
7218
654k
  SDValue Trunc =
7219
654k
      DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
7220
654k
  CombineTo(N0.getNode(), Trunc, NewChain);
7221
654k
  ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
7222
654k
                  (ISD::NodeType)N->getOpcode());
7223
654k
  return SDValue(N, 0); // Return N so it doesn't get rechecked!
7224
654k
}
7225
7226
/// If we're narrowing or widening the result of a vector select and the final
7227
/// size is the same size as a setcc (compare) feeding the select, then try to
7228
/// apply the cast operation to the select's operands because matching vector
7229
/// sizes for a select condition and other operands should be more efficient.
7230
1.52M
SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
7231
1.52M
  unsigned CastOpcode = Cast->getOpcode();
7232
1.52M
  assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
7233
1.52M
          CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
7234
1.52M
          CastOpcode == ISD::FP_ROUND) &&
7235
1.52M
         "Unexpected opcode for vector select narrowing/widening");
7236
1.52M
7237
1.52M
  // We only do this transform before legal ops because the pattern may be
7238
1.52M
  // obfuscated by target-specific operations after legalization. Do not create
7239
1.52M
  // an illegal select op, however, because that may be difficult to lower.
7240
1.52M
  EVT VT = Cast->getValueType(0);
7241
1.52M
  if (
LegalOperations || 1.52M
!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)896k
)
7242
808k
    return SDValue();
7243
713k
7244
713k
  SDValue VSel = Cast->getOperand(0);
7245
713k
  if (
VSel.getOpcode() != ISD::VSELECT || 713k
!VSel.hasOneUse()19
||
7246
19
      VSel.getOperand(0).getOpcode() != ISD::SETCC)
7247
713k
    return SDValue();
7248
19
7249
19
  // Does the setcc have the same vector size as the casted select?
7250
19
  SDValue SetCC = VSel.getOperand(0);
7251
19
  EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
7252
19
  if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
7253
3
    return SDValue();
7254
16
7255
16
  // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
7256
16
  SDValue A = VSel.getOperand(1);
7257
16
  SDValue B = VSel.getOperand(2);
7258
16
  SDValue CastA, CastB;
7259
16
  SDLoc DL(Cast);
7260
16
  if (
CastOpcode == ISD::FP_ROUND16
) {
7261
4
    // FP_ROUND (fptrunc) has an extra flag operand to pass along.
7262
4
    CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
7263
4
    CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
7264
16
  } else {
7265
12
    CastA = DAG.getNode(CastOpcode, DL, VT, A);
7266
12
    CastB = DAG.getNode(CastOpcode, DL, VT, B);
7267
12
  }
7268
1.52M
  return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
7269
1.52M
}
7270
7271
518k
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
7272
518k
  SDValue N0 = N->getOperand(0);
7273
518k
  EVT VT = N->getValueType(0);
7274
518k
  SDLoc DL(N);
7275
518k
7276
518k
  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7277
518k
                                              LegalOperations))
7278
146
    return SDValue(Res, 0);
7279
518k
7280
518k
  // fold (sext (sext x)) -> (sext x)
7281
518k
  // fold (sext (aext x)) -> (sext x)
7282
518k
  
if (518k
N0.getOpcode() == ISD::SIGN_EXTEND || 518k
N0.getOpcode() == ISD::ANY_EXTEND518k
)
7283
6
    return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
7284
518k
7285
518k
  
if (518k
N0.getOpcode() == ISD::TRUNCATE518k
) {
7286
10.1k
    // fold (sext (truncate (load x))) -> (sext (smaller load x))
7287
10.1k
    // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
7288
10.1k
    if (SDValue 
NarrowLoad10.1k
= ReduceLoadWidth(N0.getNode())) {
7289
20
      SDNode *oye = N0.getOperand(0).getNode();
7290
20
      if (
NarrowLoad.getNode() != N0.getNode()20
) {
7291
20
        CombineTo(N0.getNode(), NarrowLoad);
7292
20
        // CombineTo deleted the truncate, if needed, but not what's under it.
7293
20
        AddToWorklist(oye);
7294
20
      }
7295
20
      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7296
20
    }
7297
10.1k
7298
10.1k
    // See if the value being truncated is already sign extended.  If so, just
7299
10.1k
    // eliminate the trunc/sext pair.
7300
10.1k
    SDValue Op = N0.getOperand(0);
7301
10.1k
    unsigned OpBits   = Op.getScalarValueSizeInBits();
7302
10.1k
    unsigned MidBits  = N0.getScalarValueSizeInBits();
7303
10.1k
    unsigned DestBits = VT.getScalarSizeInBits();
7304
10.1k
    unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
7305
10.1k
7306
10.1k
    if (
OpBits == DestBits10.1k
) {
7307
6.50k
      // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
7308
6.50k
      // bits, it is already ready.
7309
6.50k
      if (NumSignBits > DestBits-MidBits)
7310
1.65k
        return Op;
7311
3.67k
    } else 
if (3.67k
OpBits < DestBits3.67k
) {
7312
2.86k
      // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
7313
2.86k
      // bits, just sext from i32.
7314
2.86k
      if (NumSignBits > OpBits-MidBits)
7315
530
        return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
7316
813
    } else {
7317
813
      // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
7318
813
      // bits, just truncate to i32.
7319
813
      if (NumSignBits > OpBits-MidBits)
7320
274
        return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7321
7.71k
    }
7322
7.71k
7323
7.71k
    // fold (sext (truncate x)) -> (sextinreg x).
7324
7.71k
    
if (7.71k
!LegalOperations || 7.71k
TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
7325
7.71k
                                                 N0.getValueType())) {
7326
7.39k
      if (OpBits < DestBits)
7327
2.33k
        Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
7328
5.06k
      else 
if (5.06k
OpBits > DestBits5.06k
)
7329
539
        Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
7330
7.39k
      return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
7331
7.39k
                         DAG.getValueType(N0.getValueType()));
7332
7.39k
    }
7333
508k
  }
7334
508k
7335
508k
  // fold (sext (load x)) -> (sext (truncate (sextload x)))
7336
508k
  // Only generate vector extloads when 1) they're legal, and 2) they are
7337
508k
  // deemed desirable by the target.
7338
508k
  
if (508k
ISD::isNON_EXTLoad(N0.getNode()) && 508k
ISD::isUNINDEXEDLoad(N0.getNode())124k
&&
7339
124k
      
((!LegalOperations && 124k
!VT.isVector()120k
&&
7340
117k
        !cast<LoadSDNode>(N0)->isVolatile()) ||
7341
508k
       
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType())6.99k
)) {
7342
121k
    bool DoXform = true;
7343
121k
    SmallVector<SDNode*, 4> SetCCs;
7344
121k
    if (!N0.hasOneUse())
7345
25.7k
      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
7346
121k
    if (VT.isVector())
7347
1.02k
      DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7348
121k
    if (
DoXform121k
) {
7349
114k
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7350
114k
      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7351
114k
                                       LN0->getBasePtr(), N0.getValueType(),
7352
114k
                                       LN0->getMemOperand());
7353
114k
      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7354
114k
                                  N0.getValueType(), ExtLoad);
7355
114k
      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7356
114k
      // If the load value is used only by N, replace it via CombineTo N.
7357
114k
      bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7358
114k
      CombineTo(N, ExtLoad);
7359
114k
      if (NoReplaceTrunc)
7360
97.1k
        DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7361
114k
      else
7362
17.8k
        CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7363
114k
      return SDValue(N, 0);
7364
114k
    }
7365
393k
  }
7366
393k
7367
393k
  // fold (sext (load x)) to multiple smaller sextloads.
7368
393k
  // Only on illegal but splittable vectors.
7369
393k
  
if (SDValue 393k
ExtLoad393k
= CombineExtLoad(N))
7370
167
    return ExtLoad;
7371
393k
7372
393k
  // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
7373
393k
  // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
7374
393k
  
if (393k
(ISD::isSEXTLoad(N0.getNode()) || 393k
ISD::isEXTLoad(N0.getNode())392k
) &&
7375
393k
      
ISD::isUNINDEXEDLoad(N0.getNode())397
&&
N0.hasOneUse()397
) {
7376
37
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7377
37
    EVT MemVT = LN0->getMemoryVT();
7378
37
    if (
(!LegalOperations && 37
!LN0->isVolatile()6
) ||
7379
37
        
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)31
) {
7380
7
      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7381
7
                                       LN0->getBasePtr(), MemVT,
7382
7
                                       LN0->getMemOperand());
7383
7
      CombineTo(N, ExtLoad);
7384
7
      CombineTo(N0.getNode(),
7385
7
                DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7386
7
                            N0.getValueType(), ExtLoad),
7387
7
                ExtLoad.getValue(1));
7388
7
      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7389
7
    }
7390
393k
  }
7391
393k
7392
393k
  // fold (sext (and/or/xor (load x), cst)) ->
7393
393k
  //      (and/or/xor (sextload x), (sext cst))
7394
393k
  
if (393k
(N0.getOpcode() == ISD::AND || 393k
N0.getOpcode() == ISD::OR390k
||
7395
388k
       N0.getOpcode() == ISD::XOR) &&
7396
16.0k
      isa<LoadSDNode>(N0.getOperand(0)) &&
7397
55
      N0.getOperand(1).getOpcode() == ISD::Constant &&
7398
38
      TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
7399
393k
      
(!LegalOperations && 38
TLI.isOperationLegal(N0.getOpcode(), VT)25
)) {
7400
0
    LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7401
0
    if (
LN0->getExtensionType() != ISD::ZEXTLOAD && 0
LN0->isUnindexed()0
) {
7402
0
      bool DoXform = true;
7403
0
      SmallVector<SDNode*, 4> SetCCs;
7404
0
      if (!N0.hasOneUse())
7405
0
        DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
7406
0
                                          SetCCs, TLI);
7407
0
      if (
DoXform0
) {
7408
0
        SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
7409
0
                                         LN0->getChain(), LN0->getBasePtr(),
7410
0
                                         LN0->getMemoryVT(),
7411
0
                                         LN0->getMemOperand());
7412
0
        APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7413
0
        Mask = Mask.sext(VT.getSizeInBits());
7414
0
        SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7415
0
                                  ExtLoad, DAG.getConstant(Mask, DL, VT));
7416
0
        SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7417
0
                                    SDLoc(N0.getOperand(0)),
7418
0
                                    N0.getOperand(0).getValueType(), ExtLoad);
7419
0
        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7420
0
        bool NoReplaceTruncAnd = !N0.hasOneUse();
7421
0
        bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7422
0
        CombineTo(N, And);
7423
0
        // If N0 has multiple uses, change other uses as well.
7424
0
        if (
NoReplaceTruncAnd0
) {
7425
0
          SDValue TruncAnd =
7426
0
              DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
7427
0
          CombineTo(N0.getNode(), TruncAnd);
7428
0
        }
7429
0
        if (NoReplaceTrunc)
7430
0
          DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7431
0
        else
7432
0
          CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7433
0
        return SDValue(N,0); // Return N so it doesn't get rechecked!
7434
0
      }
7435
393k
    }
7436
0
  }
7437
393k
7438
393k
  
if (393k
N0.getOpcode() == ISD::SETCC393k
) {
7439
4.10k
    SDValue N00 = N0.getOperand(0);
7440
4.10k
    SDValue N01 = N0.getOperand(1);
7441
4.10k
    ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7442
4.10k
    EVT N00VT = N0.getOperand(0).getValueType();
7443
4.10k
7444
4.10k
    // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
7445
4.10k
    // Only do this before legalize for now.
7446
4.10k
    if (
VT.isVector() && 4.10k
!LegalOperations2.43k
&&
7447
2.43k
        TLI.getBooleanContents(N00VT) ==
7448
4.10k
            TargetLowering::ZeroOrNegativeOneBooleanContent) {
7449
2.43k
      // On some architectures (such as SSE/NEON/etc) the SETCC result type is
7450
2.43k
      // of the same size as the compared operands. Only optimize sext(setcc())
7451
2.43k
      // if this is the case.
7452
2.43k
      EVT SVT = getSetCCResultType(N00VT);
7453
2.43k
7454
2.43k
      // We know that the # elements of the results is the same as the
7455
2.43k
      // # elements of the compare (and the # elements of the compare result
7456
2.43k
      // for that matter).  Check to see that they are the same size.  If so,
7457
2.43k
      // we know that the element size of the sext'd result matches the
7458
2.43k
      // element size of the compare operands.
7459
2.43k
      if (VT.getSizeInBits() == SVT.getSizeInBits())
7460
2.05k
        return DAG.getSetCC(DL, VT, N00, N01, CC);
7461
377
7462
377
      // If the desired elements are smaller or larger than the source
7463
377
      // elements, we can use a matching integer vector type and then
7464
377
      // truncate/sign extend.
7465
377
      EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
7466
377
      if (
SVT == MatchingVecType377
) {
7467
254
        SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
7468
254
        return DAG.getSExtOrTrunc(VsetCC, DL, VT);
7469
254
      }
7470
1.78k
    }
7471
1.78k
7472
1.78k
    // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
7473
1.78k
    // Here, T can be 1 or -1, depending on the type of the setcc and
7474
1.78k
    // getBooleanContents().
7475
1.78k
    unsigned SetCCWidth = N0.getScalarValueSizeInBits();
7476
1.78k
7477
1.78k
    // To determine the "true" side of the select, we need to know the high bit
7478
1.78k
    // of the value returned by the setcc if it evaluates to true.
7479
1.78k
    // If the type of the setcc is i1, then the true case of the select is just
7480
1.78k
    // sext(i1 1), that is, -1.
7481
1.78k
    // If the type of the setcc is larger (say, i8) then the value of the high
7482
1.78k
    // bit depends on getBooleanContents(), so ask TLI for a real "true" value
7483
1.78k
    // of the appropriate width.
7484
1.78k
    SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT)
7485
1
                                           : TLI.getConstTrueVal(DAG, VT, DL);
7486
1.78k
    SDValue Zero = DAG.getConstant(0, DL, VT);
7487
1.78k
    if (SDValue SCC =
7488
1.78k
            SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
7489
12
      return SCC;
7490
1.77k
7491
1.77k
    
if (1.77k
!VT.isVector() && 1.77k
!TLI.convertSelectOfConstantsToMath(VT)1.65k
) {
7492
948
      EVT SetCCVT = getSetCCResultType(N00VT);
7493
948
      // Don't do this transform for i1 because there's a select transform
7494
948
      // that would reverse it.
7495
948
      // TODO: We should not do this transform at all without a target hook
7496
948
      // because a sext is likely cheaper than a select?
7497
948
      if (SetCCVT.getScalarSizeInBits() != 1 &&
7498
948
          
(!LegalOperations || 300
TLI.isOperationLegal(ISD::SETCC, N00VT)0
)) {
7499
300
        SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
7500
300
        return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
7501
300
      }
7502
390k
    }
7503
4.10k
  }
7504
390k
7505
390k
  // fold (sext x) -> (zext x) if the sign bit is known zero.
7506
390k
  
if (390k
(!LegalOperations || 390k
TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)172k
) &&
7507
390k
      DAG.SignBitIsZero(N0))
7508
513
    return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
7509
390k
7510
390k
  
if (SDValue 390k
NewVSel390k
= matchVSelectOpSizesWithSetCC(N))
7511
4
    return NewVSel;
7512
390k
7513
390k
  return SDValue();
7514
390k
}
7515
7516
// isTruncateOf - If N is a truncate of some other value, return true, record
7517
// the value being truncated in Op and which of Op's bits are zero/one in Known.
7518
// This function computes KnownBits to avoid a duplicated call to
7519
// computeKnownBits in the caller.
7520
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
7521
353k
                         KnownBits &Known) {
7522
353k
  if (
N->getOpcode() == ISD::TRUNCATE353k
) {
7523
27.6k
    Op = N->getOperand(0);
7524
27.6k
    DAG.computeKnownBits(Op, Known);
7525
27.6k
    return true;
7526
27.6k
  }
7527
325k
7528
325k
  
if (325k
N->getOpcode() != ISD::SETCC || 325k
N->getValueType(0) != MVT::i122.8k
||
7529
18.1k
      cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
7530
322k
    return false;
7531
3.40k
7532
3.40k
  SDValue Op0 = N->getOperand(0);
7533
3.40k
  SDValue Op1 = N->getOperand(1);
7534
3.40k
  assert(Op0.getValueType() == Op1.getValueType());
7535
3.40k
7536
3.40k
  if (isNullConstant(Op0))
7537
0
    Op = Op1;
7538
3.40k
  else 
if (3.40k
isNullConstant(Op1)3.40k
)
7539
2.29k
    Op = Op0;
7540
3.40k
  else
7541
1.10k
    return false;
7542
2.29k
7543
2.29k
  DAG.computeKnownBits(Op, Known);
7544
2.29k
7545
2.29k
  if (!(Known.Zero | 1).isAllOnesValue())
7546
2.04k
    return false;
7547
250
7548
250
  return true;
7549
250
}
7550
7551
388k
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
7552
388k
  SDValue N0 = N->getOperand(0);
7553
388k
  EVT VT = N->getValueType(0);
7554
388k
7555
388k
  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7556
388k
                                              LegalOperations))
7557
138
    return SDValue(Res, 0);
7558
388k
7559
388k
  // fold (zext (zext x)) -> (zext x)
7560
388k
  // fold (zext (aext x)) -> (zext x)
7561
388k
  
if (388k
N0.getOpcode() == ISD::ZERO_EXTEND || 388k
N0.getOpcode() == ISD::ANY_EXTEND388k
)
7562
44
    return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
7563
44
                       N0.getOperand(0));
7564
388k
7565
388k
  // fold (zext (truncate x)) -> (zext x) or
7566
388k
  //      (zext (truncate x)) -> (truncate x)
7567
388k
  // This is valid when the truncated bits of x are already zero.
7568
388k
  // FIXME: We should extend this to work for vectors too.
7569
388k
  SDValue Op;
7570
388k
  KnownBits Known;
7571
388k
  if (
!VT.isVector() && 388k
isTruncateOf(DAG, N0, Op, Known)353k
) {
7572
27.8k
    APInt TruncatedBits =
7573
27.8k
      (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
7574
2
      APInt(Op.getValueSizeInBits(), 0) :
7575
27.8k
      APInt::getBitsSet(Op.getValueSizeInBits(),
7576
27.8k
                        N0.getValueSizeInBits(),
7577
27.8k
                        std::min(Op.getValueSizeInBits(),
7578
27.8k
                                 VT.getSizeInBits()));
7579
27.8k
    if (TruncatedBits.isSubsetOf(Known.Zero))
7580
20.5k
      return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7581
367k
  }
7582
367k
7583
367k
  // fold (zext (truncate x)) -> (and x, mask)
7584
367k
  
if (367k
N0.getOpcode() == ISD::TRUNCATE367k
) {
7585
7.46k
    // fold (zext (truncate (load x))) -> (zext (smaller load x))
7586
7.46k
    // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
7587
7.46k
    if (SDValue 
NarrowLoad7.46k
= ReduceLoadWidth(N0.getNode())) {
7588
46
      SDNode *oye = N0.getOperand(0).getNode();
7589
46
      if (
NarrowLoad.getNode() != N0.getNode()46
) {
7590
46
        CombineTo(N0.getNode(), NarrowLoad);
7591
46
        // CombineTo deleted the truncate, if needed, but not what's under it.
7592
46
        AddToWorklist(oye);
7593
46
      }
7594
46
      return SDValue(N, 0); // Return N so it doesn't get rechecked!
7595
46
    }
7596
7.41k
7597
7.41k
    EVT SrcVT = N0.getOperand(0).getValueType();
7598
7.41k
    EVT MinVT = N0.getValueType();
7599
7.41k
7600
7.41k
    // Try to mask before the extension to avoid having to generate a larger mask,
7601
7.41k
    // possibly over several sub-vectors.
7602
7.41k
    if (
SrcVT.bitsLT(VT)7.41k
) {
7603
1.98k
      if (
!LegalOperations || 1.98k
(TLI.isOperationLegal(ISD::AND, SrcVT) &&
7604
1.98k
                               
TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)31
)) {
7605
1.98k
        SDValue Op = N0.getOperand(0);
7606
1.98k
        Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7607
1.98k
        AddToWorklist(Op.getNode());
7608
1.98k
        return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7609
1.98k
      }
7610
5.43k
    }
7611
5.43k
7612
5.43k
    
if (5.43k
!LegalOperations || 5.43k
TLI.isOperationLegal(ISD::AND, VT)235
) {
7613
5.39k
      SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
7614
5.39k
      AddToWorklist(Op.getNode());
7615
5.39k
      return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7616
5.39k
    }
7617
360k
  }
7618
360k
7619
360k
  // Fold (zext (and (trunc x), cst)) -> (and x, cst),
7620
360k
  // if either of the casts is not free.
7621
360k
  
if (360k
N0.getOpcode() == ISD::AND &&
7622
50.9k
      N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7623
3.71k
      N0.getOperand(1).getOpcode() == ISD::Constant &&
7624
752
      (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7625
752
                           N0.getValueType()) ||
7626
360k
       
!TLI.isZExtFree(N0.getValueType(), VT)372
)) {
7627
526
    SDValue X = N0.getOperand(0).getOperand(0);
7628
526
    X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
7629
526
    APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7630
526
    Mask = Mask.zext(VT.getSizeInBits());
7631
526
    SDLoc DL(N);
7632
526
    return DAG.getNode(ISD::AND, DL, VT,
7633
526
                       X, DAG.getConstant(Mask, DL, VT));
7634
526
  }
7635
359k
7636
359k
  // fold (zext (load x)) -> (zext (truncate (zextload x)))
7637
359k
  // Only generate vector extloads when 1) they're legal, and 2) they are
7638
359k
  // deemed desirable by the target.
7639
359k
  
if (359k
ISD::isNON_EXTLoad(N0.getNode()) && 359k
ISD::isUNINDEXEDLoad(N0.getNode())108k
&&
7640
108k
      
((!LegalOperations && 108k
!VT.isVector()105k
&&
7641
100k
        !cast<LoadSDNode>(N0)->isVolatile()) ||
7642
359k
       
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType())7.89k
)) {
7643
102k
    bool DoXform = true;
7644
102k
    SmallVector<SDNode*, 4> SetCCs;
7645
102k
    if (!N0.hasOneUse())
7646
35.9k
      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
7647
102k
    if (VT.isVector())
7648
937
      DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7649
102k
    if (
DoXform102k
) {
7650
99.1k
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7651
99.1k
      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7652
99.1k
                                       LN0->getChain(),
7653
99.1k
                                       LN0->getBasePtr(), N0.getValueType(),
7654
99.1k
                                       LN0->getMemOperand());
7655
99.1k
7656
99.1k
      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7657
99.1k
                                  N0.getValueType(), ExtLoad);
7658
99.1k
      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND);
7659
99.1k
      // If the load value is used only by N, replace it via CombineTo N.
7660
99.1k
      bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7661
99.1k
      CombineTo(N, ExtLoad);
7662
99.1k
      if (NoReplaceTrunc)
7663
78.2k
        DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7664
99.1k
      else
7665
20.9k
        CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7666
99.1k
      return SDValue(N, 0); // Return N so it doesn't get rechecked!
7667
99.1k
    }
7668
260k
  }
7669
260k
7670
260k
  // fold (zext (load x)) to multiple smaller zextloads.
7671
260k
  // Only on illegal but splittable vectors.
7672
260k
  
if (SDValue 260k
ExtLoad260k
= CombineExtLoad(N))
7673
61
    return ExtLoad;
7674
260k
7675
260k
  // fold (zext (and/or/xor (load x), cst)) ->
7676
260k
  //      (and/or/xor (zextload x), (zext cst))
7677
260k
  // Unless (and (load x) cst) will match as a zextload already and has
7678
260k
  // additional users.
7679
260k
  
if (260k
(N0.getOpcode() == ISD::AND || 260k
N0.getOpcode() == ISD::OR210k
||
7680
207k
       N0.getOpcode() == ISD::XOR) &&
7681
61.0k
      isa<LoadSDNode>(N0.getOperand(0)) &&
7682
6.82k
      N0.getOperand(1).getOpcode() == ISD::Constant &&
7683
6.58k
      TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
7684
260k
      
(!LegalOperations && 6.53k
TLI.isOperationLegal(N0.getOpcode(), VT)6.45k
)) {
7685
6.34k
    LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7686
6.34k
    if (
LN0->getExtensionType() != ISD::SEXTLOAD && 6.34k
LN0->isUnindexed()6.31k
) {
7687
6.31k
      bool DoXform = true;
7688
6.31k
      SmallVector<SDNode*, 4> SetCCs;
7689
6.31k
      if (
!N0.hasOneUse()6.31k
) {
7690
254
        if (
N0.getOpcode() == ISD::AND254
) {
7691
250
          auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
7692
250
          auto NarrowLoad = false;
7693
250
          EVT LoadResultTy = AndC->getValueType(0);
7694
250
          EVT ExtVT, LoadedVT;
7695
250
          if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
7696
250
                               NarrowLoad))
7697
11
            DoXform = false;
7698
250
        }
7699
254
        if (DoXform)
7700
243
          DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
7701
243
                                            ISD::ZERO_EXTEND, SetCCs, TLI);
7702
254
      }
7703
6.31k
      if (
DoXform6.31k
) {
7704
6.30k
        SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
7705
6.30k
                                         LN0->getChain(), LN0->getBasePtr(),
7706
6.30k
                                         LN0->getMemoryVT(),
7707
6.30k
                                         LN0->getMemOperand());
7708
6.30k
        APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7709
6.30k
        Mask = Mask.zext(VT.getSizeInBits());
7710
6.30k
        SDLoc DL(N);
7711
6.30k
        SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7712
6.30k
                                  ExtLoad, DAG.getConstant(Mask, DL, VT));
7713
6.30k
        SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7714
6.30k
                                    SDLoc(N0.getOperand(0)),
7715
6.30k
                                    N0.getOperand(0).getValueType(), ExtLoad);
7716
6.30k
        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND);
7717
6.30k
        bool NoReplaceTruncAnd = !N0.hasOneUse();
7718
6.30k
        bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7719
6.30k
        CombineTo(N, And);
7720
6.30k
        // If N0 has multiple uses, change other uses as well.
7721
6.30k
        if (
NoReplaceTruncAnd6.30k
) {
7722
243
          SDValue TruncAnd =
7723
243
              DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
7724
243
          CombineTo(N0.getNode(), TruncAnd);
7725
243
        }
7726
6.30k
        if (NoReplaceTrunc)
7727
3.28k
          DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7728
6.30k
        else
7729
3.02k
          CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7730
6.30k
        return SDValue(N,0); // Return N so it doesn't get rechecked!
7731
6.30k
      }
7732
254k
    }
7733
6.34k
  }
7734
254k
7735
254k
  // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
7736
254k
  // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
7737
254k
  
if (254k
(ISD::isZEXTLoad(N0.getNode()) || 254k
ISD::isEXTLoad(N0.getNode())253k
) &&
7738
254k
      
ISD::isUNINDEXEDLoad(N0.getNode())740
&&
N0.hasOneUse()740
) {
7739
65
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7740
65
    EVT MemVT = LN0->getMemoryVT();
7741
65
    if (
(!LegalOperations && 65
!LN0->isVolatile()33
) ||
7742
65
        
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)35
) {
7743
35
      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7744
35
                                       LN0->getChain(),
7745
35
                                       LN0->getBasePtr(), MemVT,
7746
35
                                       LN0->getMemOperand());
7747
35
      CombineTo(N, ExtLoad);
7748
35
      CombineTo(N0.getNode(),
7749
35
                DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
7750
35
                            ExtLoad),
7751
35
                ExtLoad.getValue(1));
7752
35
      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7753
35
    }
7754
254k
  }
7755
254k
7756
254k
  
if (254k
N0.getOpcode() == ISD::SETCC254k
) {
7757
22.8k
    // Only do this before legalize for now.
7758
22.8k
    if (
!LegalOperations && 22.8k
VT.isVector()22.0k
&&
7759
22.8k
        
N0.getValueType().getVectorElementType() == MVT::i1280
) {
7760
279
      EVT N00VT = N0.getOperand(0).getValueType();
7761
279
      if (getSetCCResultType(N00VT) == N0.getValueType())
7762
13
        return SDValue();
7763
266
7764
266
      // We know that the # elements of the results is the same as the #
7765
266
      // elements of the compare (and the # elements of the compare result for
7766
266
      // that matter). Check to see that they are the same size. If so, we know
7767
266
      // that the element size of the sext'd result matches the element size of
7768
266
      // the compare operands.
7769
266
      SDLoc DL(N);
7770
266
      SDValue VecOnes = DAG.getConstant(1, DL, VT);
7771
266
      if (
VT.getSizeInBits() == N00VT.getSizeInBits()266
) {
7772
218
        // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
7773
218
        SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
7774
218
                                     N0.getOperand(1), N0.getOperand(2));
7775
218
        return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
7776
218
      }
7777
48
7778
48
      // If the desired elements are smaller or larger than the source
7779
48
      // elements we can use a matching integer vector type and then
7780
48
      // truncate/sign extend.
7781
48
      EVT MatchingElementType = EVT::getIntegerVT(
7782
48
          *DAG.getContext(), N00VT.getScalarSizeInBits());
7783
48
      EVT MatchingVectorType = EVT::getVectorVT(
7784
48
          *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
7785
48
      SDValue VsetCC =
7786
48
          DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
7787
48
                      N0.getOperand(1), N0.getOperand(2));
7788
48
      return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
7789
48
                         VecOnes);
7790
48
    }
7791
22.5k
7792
22.5k
    // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7793
22.5k
    SDLoc DL(N);
7794
22.5k
    if (SDValue SCC = SimplifySelectCC(
7795
22.5k
            DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7796
22.5k
            DAG.getConstant(0, DL, VT),
7797
22.5k
            cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7798
59
      return SCC;
7799
254k
  }
7800
254k
7801
254k
  // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
7802
254k
  
if (254k
(N0.getOpcode() == ISD::SHL || 254k
N0.getOpcode() == ISD::SRL250k
) &&
7803
9.31k
      isa<ConstantSDNode>(N0.getOperand(1)) &&
7804
8.87k
      N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
7805
254k
      
N0.hasOneUse()100
) {
7806
67
    SDValue ShAmt = N0.getOperand(1);
7807
67
    unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
7808
67
    if (
N0.getOpcode() == ISD::SHL67
) {
7809
23
      SDValue InnerZExt = N0.getOperand(0);
7810
23
      // If the original shl may be shifting out bits, do not perform this
7811
23
      // transformation.
7812
23
      unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
7813
23
        InnerZExt.getOperand(0).getValueSizeInBits();
7814
23
      if (ShAmtVal > KnownZeroBits)
7815
0
        return SDValue();
7816
67
    }
7817
67
7818
67
    SDLoc DL(N);
7819
67
7820
67
    // Ensure that the shift amount is wide enough for the shifted value.
7821
67
    if (VT.getSizeInBits() >= 256)
7822
0
      ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
7823
67
7824
67
    return DAG.getNode(N0.getOpcode(), DL, VT,
7825
67
                       DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
7826
67
                       ShAmt);
7827
67
  }
7828
253k
7829
253k
  
if (SDValue 253k
NewVSel253k
= matchVSelectOpSizesWithSetCC(N))
7830
2
    return NewVSel;
7831
253k
7832
253k
  return SDValue();
7833
253k
}
7834
7835
168k
SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
7836
168k
  SDValue N0 = N->getOperand(0);
7837
168k
  EVT VT = N->getValueType(0);
7838
168k
7839
168k
  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7840
168k
                                              LegalOperations))
7841
145
    return SDValue(Res, 0);
7842
168k
7843
168k
  // fold (aext (aext x)) -> (aext x)
7844
168k
  // fold (aext (zext x)) -> (zext x)
7845
168k
  // fold (aext (sext x)) -> (sext x)
7846
168k
  
if (168k
N0.getOpcode() == ISD::ANY_EXTEND ||
7847
168k
      N0.getOpcode() == ISD::ZERO_EXTEND ||
7848
168k
      N0.getOpcode() == ISD::SIGN_EXTEND)
7849
12
    return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
7850
168k
7851
168k
  // fold (aext (truncate (load x))) -> (aext (smaller load x))
7852
168k
  // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
7853
168k
  
if (168k
N0.getOpcode() == ISD::TRUNCATE168k
) {
7854
7.66k
    if (SDValue 
NarrowLoad7.66k
= ReduceLoadWidth(N0.getNode())) {
7855
21
      SDNode *oye = N0.getOperand(0).getNode();
7856
21
      if (
NarrowLoad.getNode() != N0.getNode()21
) {
7857
21
        CombineTo(N0.getNode(), NarrowLoad);
7858
21
        // CombineTo deleted the truncate, if needed, but not what's under it.
7859
21
        AddToWorklist(oye);
7860
21
      }
7861
21
      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7862
21
    }
7863
168k
  }
7864
168k
7865
168k
  // fold (aext (truncate x))
7866
168k
  
if (168k
N0.getOpcode() == ISD::TRUNCATE168k
)
7867
7.63k
    return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
7868
160k
7869
160k
  // Fold (aext (and (trunc x), cst)) -> (and x, cst)
7870
160k
  // if the trunc is not free.
7871
160k
  
if (160k
N0.getOpcode() == ISD::AND &&
7872
14.1k
      N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7873
1.16k
      N0.getOperand(1).getOpcode() == ISD::Constant &&
7874
906
      !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7875
160k
                          N0.getValueType())) {
7876
41
    SDLoc DL(N);
7877
41
    SDValue X = N0.getOperand(0).getOperand(0);
7878
41
    X = DAG.getAnyExtOrTrunc(X, DL, VT);
7879
41
    APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7880
41
    Mask = Mask.zext(VT.getSizeInBits());
7881
41
    return DAG.getNode(ISD::AND, DL, VT,
7882
41
                       X, DAG.getConstant(Mask, DL, VT));
7883
41
  }
7884
160k
7885
160k
  // fold (aext (load x)) -> (aext (truncate (extload x)))
7886
160k
  // None of the supported targets knows how to perform load and any_ext
7887
160k
  // on vectors in one instruction.  We only perform this transformation on
7888
160k
  // scalars.
7889
160k
  
if (160k
ISD::isNON_EXTLoad(N0.getNode()) && 160k
!VT.isVector()14.9k
&&
7890
14.9k
      ISD::isUNINDEXEDLoad(N0.getNode()) &&
7891
160k
      
TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())14.9k
) {
7892
14.5k
    bool DoXform = true;
7893
14.5k
    SmallVector<SDNode*, 4> SetCCs;
7894
14.5k
    if (!N0.hasOneUse())
7895
3.39k
      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
7896
14.5k
    if (
DoXform14.5k
) {
7897
14.4k
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7898
14.4k
      SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
7899
14.4k
                                       LN0->getChain(),
7900
14.4k
                                       LN0->getBasePtr(), N0.getValueType(),
7901
14.4k
                                       LN0->getMemOperand());
7902
14.4k
      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7903
14.4k
                                  N0.getValueType(), ExtLoad);
7904
14.4k
      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
7905
14.4k
                      ISD::ANY_EXTEND);
7906
14.4k
      // If the load value is used only by N, replace it via CombineTo N.
7907
14.4k
      bool NoReplaceTrunc = N0.hasOneUse();
7908
14.4k
      CombineTo(N, ExtLoad);
7909
14.4k
      if (NoReplaceTrunc)
7910
11.1k
        DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7911
14.4k
      else
7912
3.25k
        CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7913
14.4k
      return SDValue(N, 0); // Return N so it doesn't get rechecked!
7914
14.4k
    }
7915
146k
  }
7916
146k
7917
146k
  // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
7918
146k
  // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
7919
146k
  // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
7920
146k
  
if (146k
N0.getOpcode() == ISD::LOAD &&
7921
146k
      
!ISD::isNON_EXTLoad(N0.getNode())1.40k
&&
ISD::isUNINDEXEDLoad(N0.getNode())853
&&
7922
146k
      
N0.hasOneUse()853
) {
7923
321
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7924
321
    ISD::LoadExtType ExtType = LN0->getExtensionType();
7925
321
    EVT MemVT = LN0->getMemoryVT();
7926
321
    if (
!LegalOperations || 321
TLI.isLoadExtLegal(ExtType, VT, MemVT)37
) {
7927
300
      SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
7928
300
                                       VT, LN0->getChain(), LN0->getBasePtr(),
7929
300
                                       MemVT, LN0->getMemOperand());
7930
300
      CombineTo(N, ExtLoad);
7931
300
      CombineTo(N0.getNode(),
7932
300
                DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7933
300
                            N0.getValueType(), ExtLoad),
7934
300
                ExtLoad.getValue(1));
7935
300
      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7936
300
    }
7937
146k
  }
7938
146k
7939
146k
  
if (146k
N0.getOpcode() == ISD::SETCC146k
) {
7940
22.1k
    // For vectors:
7941
22.1k
    // aext(setcc) -> vsetcc
7942
22.1k
    // aext(setcc) -> truncate(vsetcc)
7943
22.1k
    // aext(setcc) -> aext(vsetcc)
7944
22.1k
    // Only do this before legalize for now.
7945
22.1k
    if (
VT.isVector() && 22.1k
!LegalOperations314
) {
7946
314
      EVT N0VT = N0.getOperand(0).getValueType();
7947
314
        // We know that the # elements of the results is the same as the
7948
314
        // # elements of the compare (and the # elements of the compare result
7949
314
        // for that matter).  Check to see that they are the same size.  If so,
7950
314
        // we know that the element size of the sext'd result matches the
7951
314
        // element size of the compare operands.
7952
314
      if (VT.getSizeInBits() == N0VT.getSizeInBits())
7953
159
        return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
7954
159
                             N0.getOperand(1),
7955
159
                             cast<CondCodeSDNode>(N0.getOperand(2))->get());
7956
314
      // If the desired elements are smaller or larger than the source
7957
314
      // elements we can use a matching integer vector type and then
7958
314
      // truncate/any extend
7959
155
      else {
7960
155
        EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
7961
155
        SDValue VsetCC =
7962
155
          DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
7963
155
                        N0.getOperand(1),
7964
155
                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
7965
155
        return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
7966
155
      }
7967
21.7k
    }
7968
21.7k
7969
21.7k
    // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7970
21.7k
    SDLoc DL(N);
7971
21.7k
    if (SDValue SCC = SimplifySelectCC(
7972
21.7k
            DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7973
21.7k
            DAG.getConstant(0, DL, VT),
7974
21.7k
            cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7975
34
      return SCC;
7976
145k
  }
7977
145k
7978
145k
  return SDValue();
7979
145k
}
7980
7981
// TODO: These transforms should work with AssertSext too.
7982
// Change the function name, comments, opcode references, and caller.
7983
510k
SDValue DAGCombiner::visitAssertZext(SDNode *N) {
7984
510k
  SDValue N0 = N->getOperand(0);
7985
510k
  SDValue N1 = N->getOperand(1);
7986
510k
  EVT AssertVT = cast<VTSDNode>(N1)->getVT();
7987
510k
7988
510k
  // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt)
7989
510k
  if (N0.getOpcode() == ISD::AssertZext &&
7990
927
      AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
7991
842
    return N0;
7992
509k
7993
509k
  
if (509k
N0.getOpcode() == ISD::TRUNCATE && 509k
N0.hasOneUse()736
&&
7994
509k
      
N0.getOperand(0).getOpcode() == ISD::AssertZext736
) {
7995
702
    // We have an assert, truncate, assert sandwich. Make one stronger assert
7996
702
    // by asserting on the smallest asserted type to the larger source type.
7997
702
    // This eliminates the later assert:
7998
702
    // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
7999
702
    // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
8000
702
    SDValue BigA = N0.getOperand(0);
8001
702
    EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
8002
702
    assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
8003
702
           "Asserting zero/sign-extended bits from a type larger than the "
8004
702
           "truncated destination does not provide information");
8005
702
8006
702
    SDLoc DL(N);
8007
702
    EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? 
AssertVT702
:
BigA_AssertVT0
;
8008
702
    SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
8009
702
    SDValue NewAssert = DAG.getNode(ISD::AssertZext, DL, BigA.getValueType(),
8010
702
                                    BigA.getOperand(0), MinAssertVTVal);
8011
702
    return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
8012
702
  }
8013
508k
8014
508k
  return SDValue();
8015
508k
}
8016
8017
/// If the result of a wider load is shifted to right of N  bits and then
8018
/// truncated to a narrower type and where N is a multiple of number of bits of
8019
/// the narrower type, transform it to a narrower load from address + N / num of
8020
/// bits of new type. If the result is to be extended, also fold the extension
8021
/// to form a extending load.
8022
1.21M
SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
8023
1.21M
  unsigned Opc = N->getOpcode();
8024
1.21M
8025
1.21M
  ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
8026
1.21M
  SDValue N0 = N->getOperand(0);
8027
1.21M
  EVT VT = N->getValueType(0);
8028
1.21M
  EVT ExtVT = VT;
8029
1.21M
8030
1.21M
  // This transformation isn't valid for vector loads.
8031
1.21M
  if (VT.isVector())
8032
49.1k
    return SDValue();
8033
1.16M
8034
1.16M
  // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
8035
1.16M
  // extended to VT.
8036
1.16M
  
if (1.16M
Opc == ISD::SIGN_EXTEND_INREG1.16M
) {
8037
84.4k
    ExtType = ISD::SEXTLOAD;
8038
84.4k
    ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
8039
1.16M
  } else 
if (1.07M
Opc == ISD::SRL1.07M
) {
8040
273k
    // Another special-case: SRL is basically zero-extending a narrower value.
8041
273k
    ExtType = ISD::ZEXTLOAD;
8042
273k
    N0 = SDValue(N, 0);
8043
273k
    ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8044
273k
    if (
!N01273k
)
return SDValue()20.9k
;
8045
252k
    ExtVT = EVT::getIntegerVT(*DAG.getContext(),
8046
252k
                              VT.getSizeInBits() - N01->getZExtValue());
8047
252k
  }
8048
1.14M
  
if (1.14M
LegalOperations && 1.14M
!TLI.isLoadExtLegal(ExtType, VT, ExtVT)458k
)
8049
98.0k
    return SDValue();
8050
1.04M
8051
1.04M
  unsigned EVTBits = ExtVT.getSizeInBits();
8052
1.04M
8053
1.04M
  // Do not generate loads of non-round integer types since these can
8054
1.04M
  // be expensive (and would be wrong if the type is not byte sized).
8055
1.04M
  if (!ExtVT.isRound())
8056
159k
    return SDValue();
8057
884k
8058
884k
  unsigned ShAmt = 0;
8059
884k
  if (
N0.getOpcode() == ISD::SRL && 884k
N0.hasOneUse()181k
) {
8060
170k
    if (ConstantSDNode *
N01170k
= dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
8061
169k
      ShAmt = N01->getZExtValue();
8062
169k
      // Is the shift amount a multiple of size of VT?
8063
169k
      if (
(ShAmt & (EVTBits-1)) == 0169k
) {
8064
145k
        N0 = N0.getOperand(0);
8065
145k
        // Is the load width a multiple of size of VT?
8066
145k
        if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
8067
21
          return SDValue();
8068
169k
      }
8069
169k
8070
169k
      // At this point, we must have a load or else we can't do the transform.
8071
169k
      
if (169k
!isa<LoadSDNode>(N0)169k
)
return SDValue()86.4k
;
8072
83.4k
8073
83.4k
      // Because a SRL must be assumed to *need* to zero-extend the high bits
8074
83.4k
      // (as opposed to anyext the high bits), we can't combine the zextload
8075
83.4k
      // lowering of SRL and an sextload.
8076
83.4k
      
if (83.4k
cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD83.4k
)
8077
35
        return SDValue();
8078
83.4k
8079
83.4k
      // If the shift amount is larger than the input type then we're not
8080
83.4k
      // accessing any of the loaded bytes.  If the load was a zextload/extload
8081
83.4k
      // then the result of the shift+trunc is zero/undef (handled elsewhere).
8082
83.4k
      
if (83.4k
ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()83.4k
)
8083
2
        return SDValue();
8084
798k
    }
8085
170k
  }
8086
798k
8087
798k
  // If the load is shifted left (and the result isn't shifted back right),
8088
798k
  // we can fold the truncate through the shift.
8089
798k
  unsigned ShLeftAmt = 0;
8090
798k
  if (
ShAmt == 0 && 798k
N0.getOpcode() == ISD::SHL714k
&&
N0.hasOneUse()1.28k
&&
8091
798k
      
ExtVT == VT582
&&
TLI.isNarrowingProfitable(N0.getValueType(), VT)355
) {
8092
38
    if (ConstantSDNode *
N0138
= dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
8093
9
      ShLeftAmt = N01->getZExtValue();
8094
9
      N0 = N0.getOperand(0);
8095
9
    }
8096
38
  }
8097
798k
8098
798k
  // If we haven't found a load, we can't narrow it.  Don't transform one with
8099
798k
  // multiple uses, this would require adding a new load.
8100
798k
  if (
!isa<LoadSDNode>(N0) || 798k
!N0.hasOneUse()249k
)
8101
791k
    return SDValue();
8102
7.27k
8103
7.27k
  // Don't change the width of a volatile load.
8104
7.27k
  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8105
7.27k
  if (LN0->isVolatile())
8106
140
    return SDValue();
8107
7.13k
8108
7.13k
  // Verify that we are actually reducing a load width here.
8109
7.13k
  
if (7.13k
LN0->getMemoryVT().getSizeInBits() < EVTBits7.13k
)
8110
223
    return SDValue();
8111
6.90k
8112
6.90k
  // For the transform to be legal, the load must produce only two values
8113
6.90k
  // (the value loaded and the chain).  Don't transform a pre-increment
8114
6.90k
  // load, for example, which produces an extra value.  Otherwise the
8115
6.90k
  // transformation is not equivalent, and the downstream logic to replace
8116
6.90k
  // uses gets things wrong.
8117
6.90k
  
if (6.90k
LN0->getNumValues() > 26.90k
)
8118
2
    return SDValue();
8119
6.90k
8120
6.90k
  // If the load that we're shrinking is an extload and we're not just
8121
6.90k
  // discarding the extension we can't simply shrink the load. Bail.
8122
6.90k
  // TODO: It would be possible to merge the extensions in some cases.
8123
6.90k
  
if (6.90k
LN0->getExtensionType() != ISD::NON_EXTLOAD &&
8124
1.62k
      LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
8125
2
    return SDValue();
8126
6.90k
8127
6.90k
  
if (6.90k
!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT)6.90k
)
8128
877
    return SDValue();
8129
6.02k
8130
6.02k
  EVT PtrType = N0.getOperand(1).getValueType();
8131
6.02k
8132
6.02k
  if (
PtrType == MVT::Untyped || 6.02k
PtrType.isExtended()6.02k
)
8133
6.02k
    // It's not possible to generate a constant of extended or untyped type.
8134
0
    return SDValue();
8135
6.02k
8136
6.02k
  // For big endian targets, we need to adjust the offset to the pointer to
8137
6.02k
  // load the correct bytes.
8138
6.02k
  
if (6.02k
DAG.getDataLayout().isBigEndian()6.02k
) {
8139
99
    unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
8140
99
    unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
8141
99
    ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
8142
99
  }
8143
6.02k
8144
6.02k
  uint64_t PtrOff = ShAmt / 8;
8145
6.02k
  unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
8146
6.02k
  SDLoc DL(LN0);
8147
6.02k
  // The original load itself didn't wrap, so an offset within it doesn't.
8148
6.02k
  SDNodeFlags Flags;
8149
6.02k
  Flags.setNoUnsignedWrap(true);
8150
6.02k
  SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
8151
6.02k
                               PtrType, LN0->getBasePtr(),
8152
6.02k
                               DAG.getConstant(PtrOff, DL, PtrType),
8153
6.02k
                               Flags);
8154
6.02k
  AddToWorklist(NewPtr.getNode());
8155
6.02k
8156
6.02k
  SDValue Load;
8157
6.02k
  if (ExtType == ISD::NON_EXTLOAD)
8158
2.90k
    Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
8159
2.90k
                       LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8160
2.90k
                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8161
6.02k
  else
8162
3.12k
    Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
8163
3.12k
                          LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
8164
3.12k
                          NewAlign, LN0->getMemOperand()->getFlags(),
8165
3.12k
                          LN0->getAAInfo());
8166
6.02k
8167
6.02k
  // Replace the old load's chain with the new load's chain.
8168
6.02k
  WorklistRemover DeadNodes(*this);
8169
6.02k
  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8170
6.02k
8171
6.02k
  // Shift the result left, if we've swallowed a left shift.
8172
6.02k
  SDValue Result = Load;
8173
6.02k
  if (
ShLeftAmt != 06.02k
) {
8174
9
    EVT ShImmTy = getShiftAmountTy(Result.getValueType());
8175
9
    if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
8176
0
      ShImmTy = VT;
8177
9
    // If the shift amount is as large as the result size (but, presumably,
8178
9
    // no larger than the source) then the useful bits of the result are
8179
9
    // zero; we can't simply return the shortened shift, because the result
8180
9
    // of that operation is undefined.
8181
9
    SDLoc DL(N0);
8182
9
    if (ShLeftAmt >= VT.getSizeInBits())
8183
7
      Result = DAG.getConstant(0, DL, VT);
8184
9
    else
8185
2
      Result = DAG.getNode(ISD::SHL, DL, VT,
8186
2
                          Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
8187
9
  }
8188
1.21M
8189
1.21M
  // Return the new loaded value.
8190
1.21M
  return Result;
8191
1.21M
}
8192
8193
99.7k
SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
8194
99.7k
  SDValue N0 = N->getOperand(0);
8195
99.7k
  SDValue N1 = N->getOperand(1);
8196
99.7k
  EVT VT = N->getValueType(0);
8197
99.7k
  EVT EVT = cast<VTSDNode>(N1)->getVT();
8198
99.7k
  unsigned VTBits = VT.getScalarSizeInBits();
8199
99.7k
  unsigned EVTBits = EVT.getScalarSizeInBits();
8200
99.7k
8201
99.7k
  if (N0.isUndef())
8202
1
    return DAG.getUNDEF(VT);
8203
99.7k
8204
99.7k
  // fold (sext_in_reg c1) -> c1
8205
99.7k
  
if (99.7k
DAG.isConstantIntBuildVectorOrConstantInt(N0)99.7k
)
8206
36
    return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
8207
99.7k
8208
99.7k
  // If the input is already sign extended, just drop the extension.
8209
99.7k
  
if (99.7k
DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+199.7k
)
8210
2.69k
    return N0;
8211
97.0k
8212
97.0k
  // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
8213
97.0k
  
if (97.0k
N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
8214
2
      EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
8215
2
    return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8216
2
                       N0.getOperand(0), N1);
8217
97.0k
8218
97.0k
  // fold (sext_in_reg (sext x)) -> (sext x)
8219
97.0k
  // fold (sext_in_reg (aext x)) -> (sext x)
8220
97.0k
  // if x is small enough.
8221
97.0k
  
if (97.0k
N0.getOpcode() == ISD::SIGN_EXTEND || 97.0k
N0.getOpcode() == ISD::ANY_EXTEND97.0k
) {
8222
9.04k
    SDValue N00 = N0.getOperand(0);
8223
9.04k
    if (N00.getScalarValueSizeInBits() <= EVTBits &&
8224
440
        
(!LegalOperations || 440
TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)6
))
8225
438
      return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8226
96.6k
  }
8227
96.6k
8228
96.6k
  // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
8229
96.6k
  
if (96.6k
(N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
8230
96.5k
       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
8231
96.5k
       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
8232
96.6k
      
N0.getOperand(0).getScalarValueSizeInBits() == EVTBits53
) {
8233
38
    if (!LegalOperations ||
8234
0
        TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
8235
38
      return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
8236
96.5k
  }
8237
96.5k
8238
96.5k
  // fold (sext_in_reg (zext x)) -> (sext x)
8239
96.5k
  // iff we are extending the source sign bit.
8240
96.5k
  
if (96.5k
N0.getOpcode() == ISD::ZERO_EXTEND96.5k
) {
8241
186
    SDValue N00 = N0.getOperand(0);
8242
186
    if (N00.getScalarValueSizeInBits() == EVTBits &&
8243
9
        
(!LegalOperations || 9
TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)0
))
8244
9
      return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8245
96.5k
  }
8246
96.5k
8247
96.5k
  // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
8248
96.5k
  
if (96.5k
DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1))96.5k
)
8249
75
    return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
8250
96.4k
8251
96.4k
  // fold operands of sext_in_reg based on knowledge that the top bits are not
8252
96.4k
  // demanded.
8253
96.4k
  
if (96.4k
SimplifyDemandedBits(SDValue(N, 0))96.4k
)
8254
2.58k
    return SDValue(N, 0);
8255
93.9k
8256
93.9k
  // fold (sext_in_reg (load x)) -> (smaller sextload x)
8257
93.9k
  // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
8258
93.9k
  
if (SDValue 93.9k
NarrowLoad93.9k
= ReduceLoadWidth(N))
8259
2.98k
    return NarrowLoad;
8260
90.9k
8261
90.9k
  // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
8262
90.9k
  // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
8263
90.9k
  // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
8264
90.9k
  
if (90.9k
N0.getOpcode() == ISD::SRL90.9k
) {
8265
11.0k
    if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
8266
9.74k
      
if (9.74k
ShAmt->getZExtValue()+EVTBits <= VTBits9.74k
) {
8267
9.74k
        // We can turn this into an SRA iff the input to the SRL is already sign
8268
9.74k
        // extended enough.
8269
9.74k
        unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
8270
9.74k
        if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
8271
1.89k
          return DAG.getNode(ISD::SRA, SDLoc(N), VT,
8272
1.89k
                             N0.getOperand(0), N0.getOperand(1));
8273
89.0k
      }
8274
11.0k
  }
8275
89.0k
8276
89.0k
  // fold (sext_inreg (extload x)) -> (sextload x)
8277
89.0k
  
if (89.0k
ISD::isEXTLoad(N0.getNode()) &&
8278
3.04k
      ISD::isUNINDEXEDLoad(N0.getNode()) &&
8279
3.04k
      EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8280
2.86k
      
((!LegalOperations && 2.86k
!cast<LoadSDNode>(N0)->isVolatile()1.98k
) ||
8281
89.0k
       
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT)883
)) {
8282
2.01k
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8283
2.01k
    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8284
2.01k
                                     LN0->getChain(),
8285
2.01k
                                     LN0->getBasePtr(), EVT,
8286
2.01k
                                     LN0->getMemOperand());
8287
2.01k
    CombineTo(N, ExtLoad);
8288
2.01k
    CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8289
2.01k
    AddToWorklist(ExtLoad.getNode());
8290
2.01k
    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8291
2.01k
  }
8292
87.0k
  // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
8293
87.0k
  
if (87.0k
ISD::isZEXTLoad(N0.getNode()) && 87.0k
ISD::isUNINDEXEDLoad(N0.getNode())589
&&
8294
589
      N0.hasOneUse() &&
8295
0
      EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8296
0
      
((!LegalOperations && 0
!cast<LoadSDNode>(N0)->isVolatile()0
) ||
8297
87.0k
       
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT)0
)) {
8298
0
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8299
0
    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8300
0
                                     LN0->getChain(),
8301
0
                                     LN0->getBasePtr(), EVT,
8302
0
                                     LN0->getMemOperand());
8303
0
    CombineTo(N, ExtLoad);
8304
0
    CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8305
0
    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8306
0
  }
8307
87.0k
8308
87.0k
  // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
8309
87.0k
  
if (87.0k
EVTBits <= 16 && 87.0k
N0.getOpcode() == ISD::OR70.7k
) {
8310
5.84k
    if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
8311
5.84k
                                           N0.getOperand(1), false))
8312
8
      return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8313
8
                         BSwap, N1);
8314
87.0k
  }
8315
87.0k
8316
87.0k
  return SDValue();
8317
87.0k
}
8318
8319
3.09k
SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
8320
3.09k
  SDValue N0 = N->getOperand(0);
8321
3.09k
  EVT VT = N->getValueType(0);
8322
3.09k
8323
3.09k
  if (N0.isUndef())
8324
0
    return DAG.getUNDEF(VT);
8325
3.09k
8326
3.09k
  
if (SDNode *3.09k
Res3.09k
= tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8327
3.09k
                                              LegalOperations))
8328
28
    return SDValue(Res, 0);
8329
3.06k
8330
3.06k
  return SDValue();
8331
3.06k
}
8332
8333
3.91k
SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
8334
3.91k
  SDValue N0 = N->getOperand(0);
8335
3.91k
  EVT VT = N->getValueType(0);
8336
3.91k
8337
3.91k
  if (N0.isUndef())
8338
0
    return DAG.getUNDEF(VT);
8339
3.91k
8340
3.91k
  
if (SDNode *3.91k
Res3.91k
= tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8341
3.91k
                                              LegalOperations))
8342
31
    return SDValue(Res, 0);
8343
3.88k
8344
3.88k
  return SDValue();
8345
3.88k
}
8346
8347
849k
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
8348
849k
  SDValue N0 = N->getOperand(0);
8349
849k
  EVT VT = N->getValueType(0);
8350
849k
  bool isLE = DAG.getDataLayout().isLittleEndian();
8351
849k
8352
849k
  // noop truncate
8353
849k
  if (N0.getValueType() == N->getValueType(0))
8354
0
    return N0;
8355
849k
  // fold (truncate c1) -> c1
8356
849k
  
if (849k
DAG.isConstantIntBuildVectorOrConstantInt(N0)849k
)
8357
2.76k
    return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
8358
846k
  // fold (truncate (truncate x)) -> (truncate x)
8359
846k
  
if (846k
N0.getOpcode() == ISD::TRUNCATE846k
)
8360
1.73k
    return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8361
845k
  // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
8362
845k
  
if (845k
N0.getOpcode() == ISD::ZERO_EXTEND ||
8363
844k
      N0.getOpcode() == ISD::SIGN_EXTEND ||
8364
845k
      
N0.getOpcode() == ISD::ANY_EXTEND843k
) {
8365
11.5k
    // if the source is smaller than the dest, we still need an extend.
8366
11.5k
    if (N0.getOperand(0).getValueType().bitsLT(VT))
8367
91
      return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8368
11.4k
    // if the source is larger than the dest, than we just need the truncate.
8369
11.4k
    
if (11.4k
N0.getOperand(0).getValueType().bitsGT(VT)11.4k
)
8370
707
      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8371
10.7k
    // if the source and dest are the same type, we can drop both the extend
8372
10.7k
    // and the truncate.
8373
10.7k
    return N0.getOperand(0);
8374
10.7k
  }
8375
833k
8376
833k
  // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
8377
833k
  
if (833k
N->hasOneUse() && 833k
(N->use_begin()->getOpcode() == ISD::ANY_EXTEND)690k
)
8378
1.42k
    return SDValue();
8379
832k
8380
832k
  // Fold extract-and-trunc into a narrow extract. For example:
8381
832k
  //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
8382
832k
  //   i32 y = TRUNCATE(i64 x)
8383
832k
  //        -- becomes --
8384
832k
  //   v16i8 b = BITCAST (v2i64 val)
8385
832k
  //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
8386
832k
  //
8387
832k
  // Note: We only run this optimization after type legalization (which often
8388
832k
  // creates this pattern) and before operation legalization after which
8389
832k
  // we need to be more careful about the vector instructions that we generate.
8390
832k
  
if (832k
N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8391
832k
      
LegalTypes14.0k
&&
!LegalOperations13.8k
&&
N0->hasOneUse()11.1k
&&
VT != MVT::i18.17k
) {
8392
8.17k
    EVT VecTy = N0.getOperand(0).getValueType();
8393
8.17k
    EVT ExTy = N0.getValueType();
8394
8.17k
    EVT TrTy = N->getValueType(0);
8395
8.17k
8396
8.17k
    unsigned NumElem = VecTy.getVectorNumElements();
8397
8.17k
    unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
8398
8.17k
8399
8.17k
    EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
8400
8.17k
    assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
8401
8.17k
8402
8.17k
    SDValue EltNo = N0->getOperand(1);
8403
8.17k
    if (
isa<ConstantSDNode>(EltNo) && 8.17k
isTypeLegal(NVT)8.16k
) {
8404
7.95k
      int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
8405
7.95k
      EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
8406
7.95k
      int Index = isLE ? 
(Elt*SizeRatio)7.37k
:
(Elt*SizeRatio + (SizeRatio-1))578
;
8407
7.95k
8408
7.95k
      SDLoc DL(N);
8409
7.95k
      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
8410
7.95k
                         DAG.getBitcast(NVT, N0.getOperand(0)),
8411
7.95k
                         DAG.getConstant(Index, DL, IndexTy));
8412
7.95k
    }
8413
824k
  }
8414
824k
8415
824k
  // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
8416
824k
  
if (824k
N0.getOpcode() == ISD::SELECT && 824k
N0.hasOneUse()1.25k
) {
8417
883
    EVT SrcVT = N0.getValueType();
8418
883
    if (
(!LegalOperations || 883
TLI.isOperationLegal(ISD::SELECT, SrcVT)5
) &&
8419
883
        
TLI.isTruncateFree(SrcVT, VT)883
) {
8420
827
      SDLoc SL(N0);
8421
827
      SDValue Cond = N0.getOperand(0);
8422
827
      SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8423
827
      SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
8424
827
      return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
8425
827
    }
8426
823k
  }
8427
823k
8428
823k
  // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
8429
823k
  
if (823k
N0.getOpcode() == ISD::SHL && 823k
N0.hasOneUse()1.78k
&&
8430
1.11k
      
(!LegalOperations || 1.11k
TLI.isOperationLegalOrCustom(ISD::SHL, VT)483
) &&
8431
823k
      
TLI.isTypeDesirableForOp(ISD::SHL, VT)1.11k
) {
8432
1.03k
    SDValue Amt = N0.getOperand(1);
8433
1.03k
    KnownBits Known;
8434
1.03k
    DAG.computeKnownBits(Amt, Known);
8435
1.03k
    unsigned Size = VT.getScalarSizeInBits();
8436
1.03k
    if (
Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)1.03k
) {
8437
666
      SDLoc SL(N);
8438
666
      EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
8439
666
8440
666
      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8441
666
      if (
AmtVT != Amt.getValueType()666
) {
8442
8
        Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
8443
8
        AddToWorklist(Amt.getNode());
8444
8
      }
8445
666
      return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
8446
666
    }
8447
822k
  }
8448
822k
8449
822k
  // Fold a series of buildvector, bitcast, and truncate if possible.
8450
822k
  // For example fold
8451
822k
  //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
8452
822k
  //   (2xi32 (buildvector x, y)).
8453
822k
  
if (822k
Level == AfterLegalizeVectorOps && 822k
VT.isVector()18.7k
&&
8454
822k
      
N0.getOpcode() == ISD::BITCAST7.41k
&&
N0.hasOneUse()555
&&
8455
555
      N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
8456
822k
      
N0.getOperand(0).hasOneUse()521
) {
8457
521
    SDValue BuildVect = N0.getOperand(0);
8458
521
    EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
8459
521
    EVT TruncVecEltTy = VT.getVectorElementType();
8460
521
8461
521
    // Check that the element types match.
8462
521
    if (
BuildVectEltTy == TruncVecEltTy521
) {
8463
1
      // Now we only need to compute the offset of the truncated elements.
8464
1
      unsigned BuildVecNumElts =  BuildVect.getNumOperands();
8465
1
      unsigned TruncVecNumElts = VT.getVectorNumElements();
8466
1
      unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
8467
1
8468
1
      assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
8469
1
             "Invalid number of elements");
8470
1
8471
1
      SmallVector<SDValue, 8> Opnds;
8472
3
      for (unsigned i = 0, e = BuildVecNumElts; 
i != e3
;
i += TruncEltOffset2
)
8473
2
        Opnds.push_back(BuildVect.getOperand(i));
8474
1
8475
1
      return DAG.getBuildVector(VT, SDLoc(N), Opnds);
8476
1
    }
8477
822k
  }
8478
822k
8479
822k
  // See if we can simplify the input to this truncate through knowledge that
8480
822k
  // only the low bits are being used.
8481
822k
  // For example "trunc (or (shl x, 8), y)" // -> trunc y
8482
822k
  // Currently we only perform this optimization on scalars because vectors
8483
822k
  // may have different active low bits.
8484
822k
  
if (822k
!VT.isVector()822k
) {
8485
789k
    APInt Mask =
8486
789k
        APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
8487
789k
    if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
8488
1.58k
      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
8489
821k
  }
8490
821k
8491
821k
  // fold (truncate (load x)) -> (smaller load x)
8492
821k
  // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
8493
821k
  
if (821k
!LegalTypes || 821k
TLI.isTypeDesirableForOp(N0.getOpcode(), VT)376k
) {
8494
813k
    if (SDValue Reduced = ReduceLoadWidth(N))
8495
2.81k
      return Reduced;
8496
811k
8497
811k
    // Handle the case where the load remains an extending load even
8498
811k
    // after truncation.
8499
811k
    
if (811k
N0.hasOneUse() && 811k
ISD::isUNINDEXEDLoad(N0.getNode())509k
) {
8500
2.23k
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8501
2.23k
      if (!LN0->isVolatile() &&
8502
2.23k
          
LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()2.12k
) {
8503
221
        SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
8504
221
                                         VT, LN0->getChain(), LN0->getBasePtr(),
8505
221
                                         LN0->getMemoryVT(),
8506
221
                                         LN0->getMemOperand());
8507
221
        DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
8508
221
        return NewLoad;
8509
221
      }
8510
818k
    }
8511
813k
  }
8512
818k
8513
818k
  // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
8514
818k
  // where ... are all 'undef'.
8515
818k
  
if (818k
N0.getOpcode() == ISD::CONCAT_VECTORS && 818k
!LegalTypes830
) {
8516
235
    SmallVector<EVT, 8> VTs;
8517
235
    SDValue V;
8518
235
    unsigned Idx = 0;
8519
235
    unsigned NumDefs = 0;
8520
235
8521
493
    for (unsigned i = 0, e = N0.getNumOperands(); 
i != e493
;
++i258
) {
8522
486
      SDValue X = N0.getOperand(i);
8523
486
      if (
!X.isUndef()486
) {
8524
463
        V = X;
8525
463
        Idx = i;
8526
463
        NumDefs++;
8527
463
      }
8528
486
      // Stop if more than one members are non-undef.
8529
486
      if (NumDefs > 1)
8530
228
        break;
8531
258
      VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
8532
258
                                     VT.getVectorElementType(),
8533
258
                                     X.getValueType().getVectorNumElements()));
8534
258
    }
8535
235
8536
235
    if (NumDefs == 0)
8537
0
      return DAG.getUNDEF(VT);
8538
235
8539
235
    
if (235
NumDefs == 1235
) {
8540
7
      assert(V.getNode() && "The single defined operand is empty!");
8541
7
      SmallVector<SDValue, 8> Opnds;
8542
37
      for (unsigned i = 0, e = VTs.size(); 
i != e37
;
++i30
) {
8543
30
        if (
i != Idx30
) {
8544
23
          Opnds.push_back(DAG.getUNDEF(VTs[i]));
8545
23
          continue;
8546
23
        }
8547
7
        SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
8548
7
        AddToWorklist(NV.getNode());
8549
7
        Opnds.push_back(NV);
8550
7
      }
8551
7
      return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
8552
7
    }
8553
818k
  }
8554
818k
8555
818k
  // Fold truncate of a bitcast of a vector to an extract of the low vector
8556
818k
  // element.
8557
818k
  //
8558
818k
  // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
8559
818k
  
if (818k
N0.getOpcode() == ISD::BITCAST && 818k
!VT.isVector()12.8k
) {
8560
11.9k
    SDValue VecSrc = N0.getOperand(0);
8561
11.9k
    EVT SrcVT = VecSrc.getValueType();
8562
11.9k
    if (
SrcVT.isVector() && 11.9k
SrcVT.getScalarType() == VT10.4k
&&
8563
9.72k
        (!LegalOperations ||
8564
11.9k
         
TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT)8.82k
)) {
8565
9.69k
      SDLoc SL(N);
8566
9.69k
8567
9.69k
      EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
8568
9.69k
      unsigned Idx = isLE ? 
09.68k
:
SrcVT.getVectorNumElements() - 13
;
8569
9.69k
      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
8570
9.69k
                         VecSrc, DAG.getConstant(Idx, SL, IdxVT));
8571
9.69k
    }
8572
808k
  }
8573
808k
8574
808k
  // Simplify the operands using demanded-bits information.
8575
808k
  
if (808k
!VT.isVector() &&
8576
775k
      SimplifyDemandedBits(SDValue(N, 0)))
8577
20.9k
    return SDValue(N, 0);
8578
787k
8579
787k
  // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
8580
787k
  // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
8581
787k
  // When the adde's carry is not used.
8582
787k
  
if (787k
(N0.getOpcode() == ISD::ADDE || 787k
N0.getOpcode() == ISD::ADDCARRY787k
) &&
8583
787k
      
N0.hasOneUse()15
&&
!N0.getNode()->hasAnyUseOfValue(1)9
&&
8584
787k
      
(!LegalOperations || 9
TLI.isOperationLegal(N0.getOpcode(), VT)0
)) {
8585
9
    SDLoc SL(N);
8586
9
    auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8587
9
    auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8588
9
    auto VTs = DAG.getVTList(VT, N0->getValueType(1));
8589
9
    return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
8590
9
  }
8591
787k
8592
787k
  
if (SDValue 787k
NewVSel787k
= matchVSelectOpSizesWithSetCC(N))
8593
4
    return NewVSel;
8594
787k
8595
787k
  return SDValue();
8596
787k
}
8597
8598
57.9k
static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
8599
57.9k
  SDValue Elt = N->getOperand(i);
8600
57.9k
  if (Elt.getOpcode() != ISD::MERGE_VALUES)
8601
57.8k
    return Elt.getNode();
8602
98
  return Elt.getOperand(Elt.getResNo()).getNode();
8603
98
}
8604
8605
/// build_pair (load, load) -> load
8606
/// if load locations are consecutive.
8607
28.9k
SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
8608
28.9k
  assert(N->getOpcode() == ISD::BUILD_PAIR);
8609
28.9k
8610
28.9k
  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
8611
28.9k
  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
8612
28.9k
  if (
!LD1 || 28.9k
!LD23.03k
||
!ISD::isNON_EXTLoad(LD1)2.54k
||
!LD1->hasOneUse()2.54k
||
8613
2.09k
      LD1->getAddressSpace() != LD2->getAddressSpace())
8614
26.9k
    return SDValue();
8615
2.09k
  EVT LD1VT = LD1->getValueType(0);
8616
2.09k
  unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
8617
2.09k
  if (
ISD::isNON_EXTLoad(LD2) && 2.09k
LD2->hasOneUse()2.09k
&&
8618
2.09k
      
DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)2.09k
) {
8619
1.95k
    unsigned Align = LD1->getAlignment();
8620
1.95k
    unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
8621
1.95k
        VT.getTypeForEVT(*DAG.getContext()));
8622
1.95k
8623
1.95k
    if (NewAlign <= Align &&
8624
1.80k
        
(!LegalOperations || 1.80k
TLI.isOperationLegal(ISD::LOAD, VT)0
))
8625
1.80k
      return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
8626
1.80k
                         LD1->getPointerInfo(), Align);
8627
293
  }
8628
293
8629
293
  return SDValue();
8630
293
}
8631
8632
10
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
8633
10
  // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
8634
10
  // and Lo parts; on big-endian machines it doesn't.
8635
10
  return DAG.getDataLayout().isBigEndian() ? 
16
:
04
;
8636
10
}
8637
8638
static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
8639
365k
                                    const TargetLowering &TLI) {
8640
365k
  // If this is not a bitcast to an FP type or if the target doesn't have
8641
365k
  // IEEE754-compliant FP logic, we're done.
8642
365k
  EVT VT = N->getValueType(0);
8643
365k
  if (
!VT.isFloatingPoint() || 365k
!TLI.hasBitPreservingFPLogic(VT)86.9k
)
8644
335k
    return SDValue();
8645
30.2k
8646
30.2k
  // TODO: Use splat values for the constant-checking below and remove this
8647
30.2k
  // restriction.
8648
30.2k
  SDValue N0 = N->getOperand(0);
8649
30.2k
  EVT SourceVT = N0.getValueType();
8650
30.2k
  if (SourceVT.isVector())
8651
13.6k
    return SDValue();
8652
16.5k
8653
16.5k
  unsigned FPOpcode;
8654
16.5k
  APInt SignMask;
8655
16.5k
  switch (N0.getOpcode()) {
8656
91
  case ISD::AND:
8657
91
    FPOpcode = ISD::FABS;
8658
91
    SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
8659
91
    break;
8660
222
  case ISD::XOR:
8661
222
    FPOpcode = ISD::FNEG;
8662
222
    SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
8663
222
    break;
8664
16.5k
  // TODO: ISD::OR --> ISD::FNABS?
8665
16.2k
  default:
8666
16.2k
    return SDValue();
8667
313
  }
8668
313
8669
313
  // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
8670
313
  // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
8671
313
  SDValue LogicOp0 = N0.getOperand(0);
8672
313
  ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8673
313
  if (
LogicOp1 && 313
LogicOp1->getAPIntValue() == SignMask63
&&
8674
52
      LogicOp0.getOpcode() == ISD::BITCAST &&
8675
50
      LogicOp0->getOperand(0).getValueType() == VT)
8676
50
    return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
8677
263
8678
263
  return SDValue();
8679
263
}
8680
8681
394k
SDValue DAGCombiner::visitBITCAST(SDNode *N) {
8682
394k
  SDValue N0 = N->getOperand(0);
8683
394k
  EVT VT = N->getValueType(0);
8684
394k
8685
394k
  if (N0.isUndef())
8686
109
    return DAG.getUNDEF(VT);
8687
393k
8688
393k
  // If the input is a BUILD_VECTOR with all constant elements, fold this now.
8689
393k
  // Only do this before legalize, since afterward the target may be depending
8690
393k
  // on the bitconvert.
8691
393k
  // First check to see if this is all constant.
8692
393k
  
if (393k
!LegalTypes &&
8693
393k
      
N0.getOpcode() == ISD::BUILD_VECTOR55.9k
&&
N0.getNode()->hasOneUse()2.54k
&&
8694
393k
      
VT.isVector()1.92k
) {
8695
1.23k
    bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
8696
1.23k
8697
1.23k
    EVT DestEltVT = N->getValueType(0).getVectorElementType();
8698
1.23k
    assert(!DestEltVT.isVector() &&
8699
1.23k
           "Element type of vector ValueType must not be vector!");
8700
1.23k
    if (isSimple)
8701
377
      return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
8702
393k
  }
8703
393k
8704
393k
  // If the input is a constant, let getNode fold it.
8705
393k
  
if (393k
isa<ConstantSDNode>(N0) || 393k
isa<ConstantFPSDNode>(N0)391k
) {
8706
2.49k
    // If we can't allow illegal operations, we need to check that this is just
8707
2.49k
    // a fp -> int or int -> conversion and that the resulting operation will
8708
2.49k
    // be legal.
8709
2.49k
    if (!LegalOperations ||
8710
2.30k
        
(isa<ConstantSDNode>(N0) && 2.30k
VT.isFloatingPoint()2.26k
&&
!VT.isVector()13
&&
8711
2.30k
         TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
8712
2.30k
        
(isa<ConstantFPSDNode>(N0) && 2.30k
VT.isInteger()44
&&
!VT.isVector()44
&&
8713
26
         TLI.isOperationLegal(ISD::Constant, VT)))
8714
220
      return DAG.getBitcast(VT, N0);
8715
393k
  }
8716
393k
8717
393k
  // (conv (conv x, t1), t2) -> (conv x, t2)
8718
393k
  
if (393k
N0.getOpcode() == ISD::BITCAST393k
)
8719
24.0k
    return DAG.getBitcast(VT, N0.getOperand(0));
8720
369k
8721
369k
  // fold (conv (load x)) -> (load (conv*)x)
8722
369k
  // If the resultant load doesn't need a higher alignment than the original!
8723
369k
  
if (369k
ISD::isNormalLoad(N0.getNode()) && 369k
N0.hasOneUse()79.6k
&&
8724
369k
      // Do not change the width of a volatile load.
8725
77.0k
      !cast<LoadSDNode>(N0)->isVolatile() &&
8726
369k
      // Do not remove the cast if the types differ in endian layout.
8727
72.9k
      TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
8728
72.9k
          TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
8729
72.9k
      
(!LegalOperations || 72.9k
TLI.isOperationLegal(ISD::LOAD, VT)65.3k
) &&
8730
369k
      
TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)8.73k
) {
8731
3.68k
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8732
3.68k
    unsigned OrigAlign = LN0->getAlignment();
8733
3.68k
8734
3.68k
    bool Fast = false;
8735
3.68k
    if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8736
3.68k
                               LN0->getAddressSpace(), OrigAlign, &Fast) &&
8737
3.68k
        
Fast3.67k
) {
8738
3.65k
      SDValue Load =
8739
3.65k
          DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
8740
3.65k
                      LN0->getPointerInfo(), OrigAlign,
8741
3.65k
                      LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8742
3.65k
      DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8743
3.65k
      return Load;
8744
3.65k
    }
8745
365k
  }
8746
365k
8747
365k
  
if (SDValue 365k
V365k
= foldBitcastedFPLogic(N, DAG, TLI))
8748
50
    return V;
8749
365k
8750
365k
  // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
8751
365k
  // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
8752
365k
  //
8753
365k
  // For ppc_fp128:
8754
365k
  // fold (bitcast (fneg x)) ->
8755
365k
  //     flipbit = signbit
8756
365k
  //     (xor (bitcast x) (build_pair flipbit, flipbit))
8757
365k
  //
8758
365k
  // fold (bitcast (fabs x)) ->
8759
365k
  //     flipbit = (and (extract_element (bitcast x), 0), signbit)
8760
365k
  //     (xor (bitcast x) (build_pair flipbit, flipbit))
8761
365k
  // This often reduces constant pool loads.
8762
365k
  
if (365k
((N0.getOpcode() == ISD::FNEG && 365k
!TLI.isFNegFree(N0.getValueType())415
) ||
8763
365k
       
(N0.getOpcode() == ISD::FABS && 365k
!TLI.isFAbsFree(N0.getValueType())321
)) &&
8764
365k
      
N0.getNode()->hasOneUse()372
&&
VT.isInteger()222
&&
8765
365k
      
!VT.isVector()197
&&
!N0.getValueType().isVector()102
) {
8766
66
    SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
8767
66
    AddToWorklist(NewConv.getNode());
8768
66
8769
66
    SDLoc DL(N);
8770
66
    if (
N0.getValueType() == MVT::ppcf128 && 66
!LegalTypes10
) {
8771
10
      assert(VT.getSizeInBits() == 128);
8772
10
      SDValue SignBit = DAG.getConstant(
8773
10
          APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
8774
10
      SDValue FlipBit;
8775
10
      if (
N0.getOpcode() == ISD::FNEG10
) {
8776
5
        FlipBit = SignBit;
8777
5
        AddToWorklist(FlipBit.getNode());
8778
10
      } else {
8779
5
        assert(N0.getOpcode() == ISD::FABS);
8780
5
        SDValue Hi =
8781
5
            DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
8782
5
                        DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8783
5
                                              SDLoc(NewConv)));
8784
5
        AddToWorklist(Hi.getNode());
8785
5
        FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
8786
5
        AddToWorklist(FlipBit.getNode());
8787
5
      }
8788
10
      SDValue FlipBits =
8789
10
          DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8790
10
      AddToWorklist(FlipBits.getNode());
8791
10
      return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
8792
10
    }
8793
56
    APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
8794
56
    if (N0.getOpcode() == ISD::FNEG)
8795
22
      return DAG.getNode(ISD::XOR, DL, VT,
8796
22
                         NewConv, DAG.getConstant(SignBit, DL, VT));
8797
0
    assert(N0.getOpcode() == ISD::FABS);
8798
34
    return DAG.getNode(ISD::AND, DL, VT,
8799
34
                       NewConv, DAG.getConstant(~SignBit, DL, VT));
8800
34
  }
8801
365k
8802
365k
  // fold (bitconvert (fcopysign cst, x)) ->
8803
365k
  //         (or (and (bitconvert x), sign), (and cst, (not sign)))
8804
365k
  // Note that we don't handle (copysign x, cst) because this can always be
8805
365k
  // folded to an fneg or fabs.
8806
365k
  //
8807
365k
  // For ppc_fp128:
8808
365k
  // fold (bitcast (fcopysign cst, x)) ->
8809
365k
  //     flipbit = (and (extract_element
8810
365k
  //                     (xor (bitcast cst), (bitcast x)), 0),
8811
365k
  //                    signbit)
8812
365k
  //     (xor (bitcast cst) (build_pair flipbit, flipbit))
8813
365k
  
if (365k
N0.getOpcode() == ISD::FCOPYSIGN && 365k
N0.getNode()->hasOneUse()232
&&
8814
214
      isa<ConstantFPSDNode>(N0.getOperand(0)) &&
8815
365k
      
VT.isInteger()6
&&
!VT.isVector()6
) {
8816
6
    unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
8817
6
    EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
8818
6
    if (
isTypeLegal(IntXVT)6
) {
8819
6
      SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
8820
6
      AddToWorklist(X.getNode());
8821
6
8822
6
      // If X has a different width than the result/lhs, sext it or truncate it.
8823
6
      unsigned VTWidth = VT.getSizeInBits();
8824
6
      if (
OrigXWidth < VTWidth6
) {
8825
0
        X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
8826
0
        AddToWorklist(X.getNode());
8827
6
      } else 
if (6
OrigXWidth > VTWidth6
) {
8828
0
        // To get the sign bit in the right place, we have to shift it right
8829
0
        // before truncating.
8830
0
        SDLoc DL(X);
8831
0
        X = DAG.getNode(ISD::SRL, DL,
8832
0
                        X.getValueType(), X,
8833
0
                        DAG.getConstant(OrigXWidth-VTWidth, DL,
8834
0
                                        X.getValueType()));
8835
0
        AddToWorklist(X.getNode());
8836
0
        X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
8837
0
        AddToWorklist(X.getNode());
8838
0
      }
8839
6
8840
6
      if (
N0.getValueType() == MVT::ppcf128 && 6
!LegalTypes5
) {
8841
5
        APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
8842
5
        SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8843
5
        AddToWorklist(Cst.getNode());
8844
5
        SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
8845
5
        AddToWorklist(X.getNode());
8846
5
        SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
8847
5
        AddToWorklist(XorResult.getNode());
8848
5
        SDValue XorResult64 = DAG.getNode(
8849
5
            ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
8850
5
            DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8851
5
                                  SDLoc(XorResult)));
8852
5
        AddToWorklist(XorResult64.getNode());
8853
5
        SDValue FlipBit =
8854
5
            DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
8855
5
                        DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
8856
5
        AddToWorklist(FlipBit.getNode());
8857
5
        SDValue FlipBits =
8858
5
            DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8859
5
        AddToWorklist(FlipBits.getNode());
8860
5
        return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
8861
5
      }
8862
1
      APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
8863
1
      X = DAG.getNode(ISD::AND, SDLoc(X), VT,
8864
1
                      X, DAG.getConstant(SignBit, SDLoc(X), VT));
8865
1
      AddToWorklist(X.getNode());
8866
1
8867
1
      SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8868
1
      Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
8869
1
                        Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
8870
1
      AddToWorklist(Cst.getNode());
8871
1
8872
1
      return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
8873
1
    }
8874
6
  }
8875
365k
8876
365k
  // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
8877
365k
  
if (365k
N0.getOpcode() == ISD::BUILD_PAIR365k
)
8878
3.36k
    
if (SDValue 3.36k
CombineLD3.36k
= CombineConsecutiveLoads(N0.getNode(), VT))
8879
38
      return CombineLD;
8880
365k
8881
365k
  // Remove double bitcasts from shuffles - this is often a legacy of
8882
365k
  // XformToShuffleWithZero being used to combine bitmaskings (of
8883
365k
  // float vectors bitcast to integer vectors) into shuffles.
8884
365k
  // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
8885
365k
  
if (365k
Level < AfterLegalizeDAG && 365k
TLI.isTypeLegal(VT)128k
&&
VT.isVector()119k
&&
8886
89.5k
      N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
8887
10.4k
      VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
8888
365k
      
!(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())678
) {
8889
678
    ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
8890
678
8891
678
    // If operands are a bitcast, peek through if it casts the original VT.
8892
678
    // If operands are a constant, just bitcast back to original VT.
8893
1.35k
    auto PeekThroughBitcast = [&](SDValue Op) {
8894
1.35k
      if (Op.getOpcode() == ISD::BITCAST &&
8895
721
          Op.getOperand(0).getValueType() == VT)
8896
271
        return SDValue(Op.getOperand(0));
8897
1.08k
      
if (1.08k
Op.isUndef() || 1.08k
ISD::isBuildVectorOfConstantSDNodes(Op.getNode())778
||
8898
766
          ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
8899
326
        return DAG.getBitcast(VT, Op);
8900
759
      return SDValue();
8901
759
    };
8902
678
8903
678
    // FIXME: If either input vector is bitcast, try to convert the shuffle to
8904
678
    // the result type of this bitcast. This would eliminate at least one
8905
678
    // bitcast. See the transform in InstCombine.
8906
678
    SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
8907
678
    SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
8908
678
    if (
!(SV0 && 678
SV1153
))
8909
530
      return SDValue();
8910
148
8911
148
    int MaskScale =
8912
148
        VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
8913
148
    SmallVector<int, 8> NewMask;
8914
148
    for (int M : SVN->getMask())
8915
1.89k
      
for (int i = 0; 598
i != MaskScale1.89k
;
++i1.29k
)
8916
1.29k
        
NewMask.push_back(M < 0 ? 1.29k
-120
:
M * MaskScale + i1.27k
);
8917
148
8918
148
    bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8919
148
    if (
!LegalMask148
) {
8920
0
      std::swap(SV0, SV1);
8921
0
      ShuffleVectorSDNode::commuteMask(NewMask);
8922
0
      LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8923
0
    }
8924
148
8925
148
    if (LegalMask)
8926
148
      return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
8927
364k
  }
8928
364k
8929
364k
  return SDValue();
8930
364k
}
8931
8932
25.6k
SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
8933
25.6k
  EVT VT = N->getValueType(0);
8934
25.6k
  return CombineConsecutiveLoads(N, VT);
8935
25.6k
}
8936
8937
/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
8938
/// operands. DstEltVT indicates the destination element value type.
8939
SDValue DAGCombiner::
8940
502
ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
8941
502
  EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
8942
502
8943
502
  // If this is already the right type, we're done.
8944
502
  if (
SrcEltVT == DstEltVT502
)
return SDValue(BV, 0)0
;
8945
502
8946
502
  unsigned SrcBitSize = SrcEltVT.getSizeInBits();
8947
502
  unsigned DstBitSize = DstEltVT.getSizeInBits();
8948
502
8949
502
  // If this is a conversion of N elements of one type to N elements of another
8950
502
  // type, convert each element.  This handles FP<->INT cases.
8951
502
  if (
SrcBitSize == DstBitSize502
) {
8952
148
    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
8953
148
                              BV->getValueType(0).getVectorNumElements());
8954
148
8955
148
    // Due to the FP element handling below calling this routine recursively,
8956
148
    // we can end up with a scalar-to-vector node here.
8957
148
    if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
8958
0
      return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
8959
0
                         DAG.getBitcast(DstEltVT, BV->getOperand(0)));
8960
148
8961
148
    SmallVector<SDValue, 8> Ops;
8962
451
    for (SDValue Op : BV->op_values()) {
8963
451
      // If the vector element type is not legal, the BUILD_VECTOR operands
8964
451
      // are promoted and implicitly truncated.  Make that explicit here.
8965
451
      if (Op.getValueType() != SrcEltVT)
8966
0
        Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
8967
451
      Ops.push_back(DAG.getBitcast(DstEltVT, Op));
8968
451
      AddToWorklist(Ops.back().getNode());
8969
451
    }
8970
148
    return DAG.getBuildVector(VT, SDLoc(BV), Ops);
8971
148
  }
8972
354
8973
354
  // Otherwise, we're growing or shrinking the elements.  To avoid having to
8974
354
  // handle annoying details of growing/shrinking FP values, we convert them to
8975
354
  // int first.
8976
354
  
if (354
SrcEltVT.isFloatingPoint()354
) {
8977
43
    // Convert the input float vector to a int vector where the elements are the
8978
43
    // same sizes.
8979
43
    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
8980
43
    BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
8981
43
    SrcEltVT = IntVT;
8982
43
  }
8983
354
8984
354
  // Now we know the input is an integer vector.  If the output is a FP type,
8985
354
  // convert to integer first, then to FP of the right size.
8986
354
  if (
DstEltVT.isFloatingPoint()354
) {
8987
41
    EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
8988
41
    SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
8989
41
8990
41
    // Next, convert to FP elements of the same size.
8991
41
    return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
8992
41
  }
8993
313
8994
313
  SDLoc DL(BV);
8995
313
8996
313
  // Okay, we know the src/dst types are both integers of differing types.
8997
313
  // Handling growing first.
8998
313
  assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
8999
313
  if (
SrcBitSize < DstBitSize313
) {
9000
208
    unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
9001
208
9002
208
    SmallVector<SDValue, 8> Ops;
9003
788
    for (unsigned i = 0, e = BV->getNumOperands(); i != e;
9004
580
         
i += NumInputsPerOutput580
) {
9005
580
      bool isLE = DAG.getDataLayout().isLittleEndian();
9006
580
      APInt NewBits = APInt(DstBitSize, 0);
9007
580
      bool EltIsUndef = true;
9008
2.18k
      for (unsigned j = 0; 
j != NumInputsPerOutput2.18k
;
++j1.60k
) {
9009
1.60k
        // Shift the previously computed bits over.
9010
1.60k
        NewBits <<= SrcBitSize;
9011
1.60k
        SDValue Op = BV->getOperand(i+ (isLE ? 
(NumInputsPerOutput-j-1)1.14k
:
j468
));
9012
1.60k
        if (
Op.isUndef()1.60k
)
continue146
;
9013
1.46k
        EltIsUndef = false;
9014
1.46k
9015
1.46k
        NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
9016
1.46k
                   zextOrTrunc(SrcBitSize).zext(DstBitSize);
9017
1.46k
      }
9018
580
9019
580
      if (EltIsUndef)
9020
12
        Ops.push_back(DAG.getUNDEF(DstEltVT));
9021
580
      else
9022
568
        Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
9023
580
    }
9024
208
9025
208
    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
9026
208
    return DAG.getBuildVector(VT, DL, Ops);
9027
208
  }
9028
105
9029
105
  // Finally, this must be the case where we are shrinking elements: each input
9030
105
  // turns into multiple outputs.
9031
105
  unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
9032
105
  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
9033
105
                            NumOutputsPerInput*BV->getNumOperands());
9034
105
  SmallVector<SDValue, 8> Ops;
9035
105
9036
458
  for (const SDValue &Op : BV->op_values()) {
9037
458
    if (
Op.isUndef()458
) {
9038
10
      Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
9039
10
      continue;
9040
10
    }
9041
448
9042
448
    APInt OpVal = cast<ConstantSDNode>(Op)->
9043
448
                  getAPIntValue().zextOrTrunc(SrcBitSize);
9044
448
9045
1.82k
    for (unsigned j = 0; 
j != NumOutputsPerInput1.82k
;
++j1.38k
) {
9046
1.38k
      APInt ThisVal = OpVal.trunc(DstBitSize);
9047
1.38k
      Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
9048
1.38k
      OpVal.lshrInPlace(DstBitSize);
9049
1.38k
    }
9050
448
9051
448
    // For big endian targets, swap the order of the pieces of each element.
9052
448
    if (DAG.getDataLayout().isBigEndian())
9053
32
      std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
9054
458
  }
9055
502
9056
502
  return DAG.getBuildVector(VT, DL, Ops);
9057
502
}
9058
9059
141k
static bool isContractable(SDNode *N) {
9060
141k
  SDNodeFlags F = N->getFlags();
9061
141k
  return F.hasAllowContract() || F.hasUnsafeAlgebra();
9062
141k
}
9063
9064
/// Try to perform FMA combining on a given FADD node.
9065
141k
SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
9066
141k
  SDValue N0 = N->getOperand(0);
9067
141k
  SDValue N1 = N->getOperand(1);
9068
141k
  EVT VT = N->getValueType(0);
9069
141k
  SDLoc SL(N);
9070
141k
9071
141k
  const TargetOptions &Options = DAG.getTarget().Options;
9072
141k
9073
141k
  // Floating-point multiply-add with intermediate rounding.
9074
64.7k
  bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9075
141k
9076
141k
  // Floating-point multiply-add without intermediate rounding.
9077
141k
  bool HasFMA =
9078
141k
      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9079
121k
      
(!LegalOperations || 121k
TLI.isOperationLegalOrCustom(ISD::FMA, VT)54.6k
);
9080
141k
9081
141k
  // No valid opcode, do not combine.
9082
141k
  if (
!HasFMAD && 141k
!HasFMA137k
)
9083
15.9k
    return SDValue();
9084
125k
9085
125k
  bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9086
125k
                              
Options.UnsafeFPMath125k
||
HasFMAD124k
);
9087
125k
  // If the addition is not contractable, do not combine.
9088
125k
  if (
!AllowFusionGlobally && 125k
!isContractable(N)119k
)
9089
119k
    return SDValue();
9090
6.01k
9091
6.01k
  const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9092
6.01k
  if (
STI && 6.01k
STI->generateFMAsInMachineCombiner(OptLevel)6.01k
)
9093
56
    return SDValue();
9094
5.96k
9095
5.96k
  // Always prefer FMAD to FMA for precision.
9096
5.96k
  
unsigned PreferredFusedOpcode = HasFMAD ? 5.96k
ISD::FMAD4.43k
:
ISD::FMA1.52k
;
9097
5.96k
  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9098
5.96k
  bool LookThroughFPExt = TLI.isFPExtFree(VT);
9099
5.96k
9100
5.96k
  // Is the node an FMUL and contractable either due to global flags or
9101
5.96k
  // SDNodeFlags.
9102
14.1k
  auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9103
14.1k
    if (N.getOpcode() != ISD::FMUL)
9104
10.9k
      return false;
9105
3.27k
    
return AllowFusionGlobally || 3.27k
isContractable(N.getNode())15
;
9106
14.1k
  };
9107
5.96k
  // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
9108
5.96k
  // prefer to fold the multiply with fewer uses.
9109
5.96k
  if (
Aggressive && 5.96k
isContractableFMUL(N0)2.78k
&&
isContractableFMUL(N1)1.09k
) {
9110
204
    if (N0.getNode()->use_size() > N1.getNode()->use_size())
9111
3
      std::swap(N0, N1);
9112
204
  }
9113
5.96k
9114
5.96k
  // fold (fadd (fmul x, y), z) -> (fma x, y, z)
9115
5.96k
  if (
isContractableFMUL(N0) && 5.96k
(Aggressive || 1.69k
N0->hasOneUse()605
)) {
9116
1.63k
    return DAG.getNode(PreferredFusedOpcode, SL, VT,
9117
1.63k
                       N0.getOperand(0), N0.getOperand(1), N1);
9118
1.63k
  }
9119
4.32k
9120
4.32k
  // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
9121
4.32k
  // Note: Commutes FADD operands.
9122
4.32k
  
if (4.32k
isContractableFMUL(N1) && 4.32k
(Aggressive || 266
N1->hasOneUse()157
)) {
9123
239
    return DAG.getNode(PreferredFusedOpcode, SL, VT,
9124
239
                       N1.getOperand(0), N1.getOperand(1), N0);
9125
239
  }
9126
4.08k
9127
4.08k
  // Look through FP_EXTEND nodes to do more combining.
9128
4.08k
  
if (4.08k
LookThroughFPExt4.08k
) {
9129
214
    // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
9130
214
    if (
N0.getOpcode() == ISD::FP_EXTEND214
) {
9131
10
      SDValue N00 = N0.getOperand(0);
9132
10
      if (isContractableFMUL(N00))
9133
2
        return DAG.getNode(PreferredFusedOpcode, SL, VT,
9134
2
                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
9135
2
                                       N00.getOperand(0)),
9136
2
                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
9137
2
                                       N00.getOperand(1)), N1);
9138
212
    }
9139
212
9140
212
    // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
9141
212
    // Note: Commutes FADD operands.
9142
212
    
if (212
N1.getOpcode() == ISD::FP_EXTEND212
) {
9143
10
      SDValue N10 = N1.getOperand(0);
9144
10
      if (isContractableFMUL(N10))
9145
2
        return DAG.getNode(PreferredFusedOpcode, SL, VT,
9146
2
                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
9147
2
                                       N10.getOperand(0)),
9148
2
                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
9149
2
                                       N10.getOperand(1)), N0);
9150
4.07k
    }
9151
214
  }
9152
4.07k
9153
4.07k
  // More folding opportunities when target permits.
9154
4.07k
  
if (4.07k
Aggressive4.07k
) {
9155
1.58k
    // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
9156
1.58k
    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9157
1.58k
    // are currently only supported on binary nodes.
9158
1.58k
    if (Options.UnsafeFPMath &&
9159
248
        N0.getOpcode() == PreferredFusedOpcode &&
9160
17
        N0.getOperand(2).getOpcode() == ISD::FMUL &&
9161
1.58k
        
N0->hasOneUse()15
&&
N0.getOperand(2)->hasOneUse()11
) {
9162
7
      return DAG.getNode(PreferredFusedOpcode, SL, VT,
9163
7
                         N0.getOperand(0), N0.getOperand(1),
9164
7
                         DAG.getNode(PreferredFusedOpcode, SL, VT,
9165
7
                                     N0.getOperand(2).getOperand(0),
9166
7
                                     N0.getOperand(2).getOperand(1),
9167
7
                                     N1));
9168
7
    }
9169
1.57k
9170
1.57k
    // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
9171
1.57k
    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9172
1.57k
    // are currently only supported on binary nodes.
9173
1.57k
    
if (1.57k
Options.UnsafeFPMath &&
9174
241
        N1->getOpcode() == PreferredFusedOpcode &&
9175
20
        N1.getOperand(2).getOpcode() == ISD::FMUL &&
9176
1.57k
        
N1->hasOneUse()10
&&
N1.getOperand(2)->hasOneUse()6
) {
9177
2
      return DAG.getNode(PreferredFusedOpcode, SL, VT,
9178
2
                         N1.getOperand(0), N1.getOperand(1),
9179
2
                         DAG.getNode(PreferredFusedOpcode, SL, VT,
9180
2
                                     N1.getOperand(2).getOperand(0),
9181
2
                                     N1.getOperand(2).getOperand(1),
9182
2
                                     N0));
9183
2
    }
9184
1.57k
9185
1.57k
    
if (1.57k
LookThroughFPExt1.57k
) {
9186
206
      // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
9187
206
      //   -> (fma x, y, (fma (fpext u), (fpext v), z))
9188
206
      auto FoldFAddFMAFPExtFMul = [&] (
9189
8
          SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
9190
8
        return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
9191
8
                           DAG.getNode(PreferredFusedOpcode, SL, VT,
9192
8
                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9193
8
                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9194
8
                                       Z));
9195
8
      };
9196
206
      if (
N0.getOpcode() == PreferredFusedOpcode206
) {
9197
18
        SDValue N02 = N0.getOperand(2);
9198
18
        if (
N02.getOpcode() == ISD::FP_EXTEND18
) {
9199
4
          SDValue N020 = N02.getOperand(0);
9200
4
          if (isContractableFMUL(N020))
9201
4
            return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
9202
4
                                        N020.getOperand(0), N020.getOperand(1),
9203
4
                                        N1);
9204
202
        }
9205
18
      }
9206
202
9207
202
      // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
9208
202
      //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
9209
202
      // FIXME: This turns two single-precision and one double-precision
9210
202
      // operation into two double-precision operations, which might not be
9211
202
      // interesting for all targets, especially GPUs.
9212
202
      auto FoldFAddFPExtFMAFMul = [&] (
9213
8
          SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
9214
8
        return DAG.getNode(PreferredFusedOpcode, SL, VT,
9215
8
                           DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
9216
8
                           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
9217
8
                           DAG.getNode(PreferredFusedOpcode, SL, VT,
9218
8
                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9219
8
                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9220
8
                                       Z));
9221
8
      };
9222
202
      if (
N0.getOpcode() == ISD::FP_EXTEND202
) {
9223
8
        SDValue N00 = N0.getOperand(0);
9224
8
        if (
N00.getOpcode() == PreferredFusedOpcode8
) {
9225
4
          SDValue N002 = N00.getOperand(2);
9226
4
          if (isContractableFMUL(N002))
9227
4
            return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
9228
4
                                        N002.getOperand(0), N002.getOperand(1),
9229
4
                                        N1);
9230
198
        }
9231
8
      }
9232
198
9233
198
      // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
9234
198
      //   -> (fma y, z, (fma (fpext u), (fpext v), x))
9235
198
      
if (198
N1.getOpcode() == PreferredFusedOpcode198
) {
9236
16
        SDValue N12 = N1.getOperand(2);
9237
16
        if (
N12.getOpcode() == ISD::FP_EXTEND16
) {
9238
4
          SDValue N120 = N12.getOperand(0);
9239
4
          if (isContractableFMUL(N120))
9240
4
            return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
9241
4
                                        N120.getOperand(0), N120.getOperand(1),
9242
4
                                        N0);
9243
194
        }
9244
16
      }
9245
194
9246
194
      // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
9247
194
      //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
9248
194
      // FIXME: This turns two single-precision and one double-precision
9249
194
      // operation into two double-precision operations, which might not be
9250
194
      // interesting for all targets, especially GPUs.
9251
194
      
if (194
N1.getOpcode() == ISD::FP_EXTEND194
) {
9252
8
        SDValue N10 = N1.getOperand(0);
9253
8
        if (
N10.getOpcode() == PreferredFusedOpcode8
) {
9254
4
          SDValue N102 = N10.getOperand(2);
9255
4
          if (isContractableFMUL(N102))
9256
4
            return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
9257
4
                                        N102.getOperand(0), N102.getOperand(1),
9258
4
                                        N0);
9259
4.05k
        }
9260
8
      }
9261
206
    }
9262
1.58k
  }
9263
4.05k
9264
4.05k
  return SDValue();
9265
4.05k
}
9266
9267
/// Try to perform FMA combining on a given FSUB node.
9268
28.5k
SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
9269
28.5k
  SDValue N0 = N->getOperand(0);
9270
28.5k
  SDValue N1 = N->getOperand(1);
9271
28.5k
  EVT VT = N->getValueType(0);
9272
28.5k
  SDLoc SL(N);
9273
28.5k
9274
28.5k
  const TargetOptions &Options = DAG.getTarget().Options;
9275
28.5k
  // Floating-point multiply-add with intermediate rounding.
9276
12.7k
  bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9277
28.5k
9278
28.5k
  // Floating-point multiply-add without intermediate rounding.
9279
28.5k
  bool HasFMA =
9280
28.5k
      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9281
22.8k
      
(!LegalOperations || 22.8k
TLI.isOperationLegalOrCustom(ISD::FMA, VT)10.2k
);
9282
28.5k
9283
28.5k
  // No valid opcode, do not combine.
9284
28.5k
  if (
!HasFMAD && 28.5k
!HasFMA27.8k
)
9285
5.01k
    return SDValue();
9286
23.5k
9287
23.5k
  bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9288
23.5k
                              
Options.UnsafeFPMath22.6k
||
HasFMAD22.2k
);
9289
23.5k
  // If the subtraction is not contractable, do not combine.
9290
23.5k
  if (
!AllowFusionGlobally && 23.5k
!isContractable(N)21.6k
)
9291
21.6k
    return SDValue();
9292
1.87k
9293
1.87k
  const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9294
1.87k
  if (
STI && 1.87k
STI->generateFMAsInMachineCombiner(OptLevel)1.87k
)
9295
58
    return SDValue();
9296
1.82k
9297
1.82k
  // Always prefer FMAD to FMA for precision.
9298
1.82k
  
unsigned PreferredFusedOpcode = HasFMAD ? 1.82k
ISD::FMAD707
:
ISD::FMA1.11k
;
9299
1.82k
  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9300
1.82k
  bool LookThroughFPExt = TLI.isFPExtFree(VT);
9301
1.82k
9302
1.82k
  // Is the node an FMUL and contractable either due to global flags or
9303
1.82k
  // SDNodeFlags.
9304
3.53k
  auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9305
3.53k
    if (N.getOpcode() != ISD::FMUL)
9306
2.57k
      return false;
9307
958
    
return AllowFusionGlobally || 958
isContractable(N.getNode())5
;
9308
3.53k
  };
9309
1.82k
9310
1.82k
  // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
9311
1.82k
  if (
isContractableFMUL(N0) && 1.82k
(Aggressive || 335
N0->hasOneUse()168
)) {
9312
283
    return DAG.getNode(PreferredFusedOpcode, SL, VT,
9313
283
                       N0.getOperand(0), N0.getOperand(1),
9314
283
                       DAG.getNode(ISD::FNEG, SL, VT, N1));
9315
283
  }
9316
1.53k
9317
1.53k
  // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
9318
1.53k
  // Note: Commutes FSUB operands.
9319
1.53k
  
if (1.53k
isContractableFMUL(N1) && 1.53k
(Aggressive || 492
N1->hasOneUse()261
))
9320
468
    return DAG.getNode(PreferredFusedOpcode, SL, VT,
9321
468
                       DAG.getNode(ISD::FNEG, SL, VT,
9322
468
                                   N1.getOperand(0)),
9323
468
                       N1.getOperand(1), N0);
9324
1.06k
9325
1.06k
  // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
9326
1.06k
  
if (1.06k
N0.getOpcode() == ISD::FNEG && 1.06k
isContractableFMUL(N0.getOperand(0))112
&&
9327
1.06k
      
(Aggressive || 86
(N0->hasOneUse() && 66
N0.getOperand(0).hasOneUse()66
))) {
9328
86
    SDValue N00 = N0.getOperand(0).getOperand(0);
9329
86
    SDValue N01 = N0.getOperand(0).getOperand(1);
9330
86
    return DAG.getNode(PreferredFusedOpcode, SL, VT,
9331
86
                       DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
9332
86
                       DAG.getNode(ISD::FNEG, SL, VT, N1));
9333
86
  }
9334
983
9335
983
  // Look through FP_EXTEND nodes to do more combining.
9336
983
  
if (983
LookThroughFPExt983
) {
9337
62
    // fold (fsub (fpext (fmul x, y)), z)
9338
62
    //   -> (fma (fpext x), (fpext y), (fneg z))
9339
62
    if (
N0.getOpcode() == ISD::FP_EXTEND62
) {
9340
12
      SDValue N00 = N0.getOperand(0);
9341
12
      if (isContractableFMUL(N00))
9342
2
        return DAG.getNode(PreferredFusedOpcode, SL, VT,
9343
2
                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
9344
2
                                       N00.getOperand(0)),
9345
2
                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
9346
2
                                       N00.getOperand(1)),
9347
2
                           DAG.getNode(ISD::FNEG, SL, VT, N1));
9348
60
    }
9349
60
9350
60
    // fold (fsub x, (fpext (fmul y, z)))
9351
60
    //   -> (fma (fneg (fpext y)), (fpext z), x)
9352
60
    // Note: Commutes FSUB operands.
9353
60
    
if (60
N1.getOpcode() == ISD::FP_EXTEND60
) {
9354
10
      SDValue N10 = N1.getOperand(0);
9355
10
      if (isContractableFMUL(N10))
9356
2
        return DAG.getNode(PreferredFusedOpcode, SL, VT,
9357
2
                           DAG.getNode(ISD::FNEG, SL, VT,
9358
2
                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
9359
2
                                                   N10.getOperand(0))),
9360
2
                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
9361
2
                                       N10.getOperand(1)),
9362
2
                           N0);
9363
58
    }
9364
58
9365
58
    // fold (fsub (fpext (fneg (fmul, x, y))), z)
9366
58
    //   -> (fneg (fma (fpext x), (fpext y), z))
9367
58
    // Note: This could be removed with appropriate canonicalization of the
9368
58
    // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9369
58
    // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9370
58
    // from implementing the canonicalization in visitFSUB.
9371
58
    
if (58
N0.getOpcode() == ISD::FP_EXTEND58
) {
9372
10
      SDValue N00 = N0.getOperand(0);
9373
10
      if (
N00.getOpcode() == ISD::FNEG10
) {
9374
2
        SDValue N000 = N00.getOperand(0);
9375
2
        if (
isContractableFMUL(N000)2
) {
9376
2
          return DAG.getNode(ISD::FNEG, SL, VT,
9377
2
                             DAG.getNode(PreferredFusedOpcode, SL, VT,
9378
2
                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
9379
2
                                                     N000.getOperand(0)),
9380
2
                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
9381
2
                                                     N000.getOperand(1)),
9382
2
                                         N1));
9383
2
        }
9384
56
      }
9385
10
    }
9386
56
9387
56
    // fold (fsub (fneg (fpext (fmul, x, y))), z)
9388
56
    //   -> (fneg (fma (fpext x)), (fpext y), z)
9389
56
    // Note: This could be removed with appropriate canonicalization of the
9390
56
    // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9391
56
    // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9392
56
    // from implementing the canonicalization in visitFSUB.
9393
56
    
if (56
N0.getOpcode() == ISD::FNEG56
) {
9394
2
      SDValue N00 = N0.getOperand(0);
9395
2
      if (
N00.getOpcode() == ISD::FP_EXTEND2
) {
9396
2
        SDValue N000 = N00.getOperand(0);
9397
2
        if (
isContractableFMUL(N000)2
) {
9398
2
          return DAG.getNode(ISD::FNEG, SL, VT,
9399
2
                             DAG.getNode(PreferredFusedOpcode, SL, VT,
9400
2
                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
9401
2
                                                     N000.getOperand(0)),
9402
2
                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
9403
2
                                                     N000.getOperand(1)),
9404
2
                                         N1));
9405
2
        }
9406
975
      }
9407
2
    }
9408
62
9409
62
  }
9410
975
9411
975
  // More folding opportunities when target permits.
9412
975
  
if (975
Aggressive975
) {
9413
605
    // fold (fsub (fma x, y, (fmul u, v)), z)
9414
605
    //   -> (fma x, y (fma u, v, (fneg z)))
9415
605
    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9416
605
    // are currently only supported on binary nodes.
9417
605
    if (
Options.UnsafeFPMath && 605
N0.getOpcode() == PreferredFusedOpcode91
&&
9418
605
        
isContractableFMUL(N0.getOperand(2))17
&&
N0->hasOneUse()15
&&
9419
605
        
N0.getOperand(2)->hasOneUse()11
) {
9420
7
      return DAG.getNode(PreferredFusedOpcode, SL, VT,
9421
7
                         N0.getOperand(0), N0.getOperand(1),
9422
7
                         DAG.getNode(PreferredFusedOpcode, SL, VT,
9423
7
                                     N0.getOperand(2).getOperand(0),
9424
7
                                     N0.getOperand(2).getOperand(1),
9425
7
                                     DAG.getNode(ISD::FNEG, SL, VT,
9426
7
                                                 N1)));
9427
7
    }
9428
598
9429
598
    // fold (fsub x, (fma y, z, (fmul u, v)))
9430
598
    //   -> (fma (fneg y), z, (fma (fneg u), v, x))
9431
598
    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9432
598
    // are currently only supported on binary nodes.
9433
598
    
if (598
Options.UnsafeFPMath && 598
N1.getOpcode() == PreferredFusedOpcode84
&&
9434
598
        
isContractableFMUL(N1.getOperand(2))6
) {
9435
4
      SDValue N20 = N1.getOperand(2).getOperand(0);
9436
4
      SDValue N21 = N1.getOperand(2).getOperand(1);
9437
4
      return DAG.getNode(PreferredFusedOpcode, SL, VT,
9438
4
                         DAG.getNode(ISD::FNEG, SL, VT,
9439
4
                                     N1.getOperand(0)),
9440
4
                         N1.getOperand(1),
9441
4
                         DAG.getNode(PreferredFusedOpcode, SL, VT,
9442
4
                                     DAG.getNode(ISD::FNEG, SL, VT, N20),
9443
4
9444
4
                                     N21, N0));
9445
4
    }
9446
594
9447
594
    
if (594
LookThroughFPExt594
) {
9448
50
      // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
9449
50
      //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
9450
50
      if (
N0.getOpcode() == PreferredFusedOpcode50
) {
9451
8
        SDValue N02 = N0.getOperand(2);
9452
8
        if (
N02.getOpcode() == ISD::FP_EXTEND8
) {
9453
4
          SDValue N020 = N02.getOperand(0);
9454
4
          if (isContractableFMUL(N020))
9455
4
            return DAG.getNode(PreferredFusedOpcode, SL, VT,
9456
4
                               N0.getOperand(0), N0.getOperand(1),
9457
4
                               DAG.getNode(PreferredFusedOpcode, SL, VT,
9458
4
                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
9459
4
                                                       N020.getOperand(0)),
9460
4
                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
9461
4
                                                       N020.getOperand(1)),
9462
4
                                           DAG.getNode(ISD::FNEG, SL, VT,
9463
4
                                                       N1)));
9464
46
        }
9465
8
      }
9466
46
9467
46
      // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
9468
46
      //   -> (fma (fpext x), (fpext y),
9469
46
      //           (fma (fpext u), (fpext v), (fneg z)))
9470
46
      // FIXME: This turns two single-precision and one double-precision
9471
46
      // operation into two double-precision operations, which might not be
9472
46
      // interesting for all targets, especially GPUs.
9473
46
      
if (46
N0.getOpcode() == ISD::FP_EXTEND46
) {
9474
8
        SDValue N00 = N0.getOperand(0);
9475
8
        if (
N00.getOpcode() == PreferredFusedOpcode8
) {
9476
4
          SDValue N002 = N00.getOperand(2);
9477
4
          if (isContractableFMUL(N002))
9478
4
            return DAG.getNode(PreferredFusedOpcode, SL, VT,
9479
4
                               DAG.getNode(ISD::FP_EXTEND, SL, VT,
9480
4
                                           N00.getOperand(0)),
9481
4
                               DAG.getNode(ISD::FP_EXTEND, SL, VT,
9482
4
                                           N00.getOperand(1)),
9483
4
                               DAG.getNode(PreferredFusedOpcode, SL, VT,
9484
4
                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
9485
4
                                                       N002.getOperand(0)),
9486
4
                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
9487
4
                                                       N002.getOperand(1)),
9488
4
                                           DAG.getNode(ISD::FNEG, SL, VT,
9489
4
                                                       N1)));
9490
42
        }
9491
8
      }
9492
42
9493
42
      // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
9494
42
      //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
9495
42
      
if (42
N1.getOpcode() == PreferredFusedOpcode &&
9496
42
        
N1.getOperand(2).getOpcode() == ISD::FP_EXTEND8
) {
9497
4
        SDValue N120 = N1.getOperand(2).getOperand(0);
9498
4
        if (
isContractableFMUL(N120)4
) {
9499
4
          SDValue N1200 = N120.getOperand(0);
9500
4
          SDValue N1201 = N120.getOperand(1);
9501
4
          return DAG.getNode(PreferredFusedOpcode, SL, VT,
9502
4
                             DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
9503
4
                             N1.getOperand(1),
9504
4
                             DAG.getNode(PreferredFusedOpcode, SL, VT,
9505
4
                                         DAG.getNode(ISD::FNEG, SL, VT,
9506
4
                                             DAG.getNode(ISD::FP_EXTEND, SL,
9507
4
                                                         VT, N1200)),
9508
4
                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
9509
4
                                                     N1201),
9510
4
                                         N0));
9511
4
        }
9512
38
      }
9513
38
9514
38
      // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
9515
38
      //   -> (fma (fneg (fpext y)), (fpext z),
9516
38
      //           (fma (fneg (fpext u)), (fpext v), x))
9517
38
      // FIXME: This turns two single-precision and one double-precision
9518
38
      // operation into two double-precision operations, which might not be
9519
38
      // interesting for all targets, especially GPUs.
9520
38
      
if (38
N1.getOpcode() == ISD::FP_EXTEND &&
9521
38
        
N1.getOperand(0).getOpcode() == PreferredFusedOpcode8
) {
9522
4
        SDValue N100 = N1.getOperand(0).getOperand(0);
9523
4
        SDValue N101 = N1.getOperand(0).getOperand(1);
9524
4
        SDValue N102 = N1.getOperand(0).getOperand(2);
9525
4
        if (
isContractableFMUL(N102)4
) {
9526
4
          SDValue N1020 = N102.getOperand(0);
9527
4
          SDValue N1021 = N102.getOperand(1);
9528
4
          return DAG.getNode(PreferredFusedOpcode, SL, VT,
9529
4
                             DAG.getNode(ISD::FNEG, SL, VT,
9530
4
                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
9531
4
                                                     N100)),
9532
4
                             DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
9533
4
                             DAG.getNode(PreferredFusedOpcode, SL, VT,
9534
4
                                         DAG.getNode(ISD::FNEG, SL, VT,
9535
4
                                             DAG.getNode(ISD::FP_EXTEND, SL,
9536
4
                                                         VT, N1020)),
9537
4
                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
9538
4
                                                     N1021),
9539
4
                                         N0));
9540
4
        }
9541
948
      }
9542
50
    }
9543
605
  }
9544
948
9545
948
  return SDValue();
9546
948
}
9547
9548
/// Try to perform FMA combining on a given FMUL node based on the distributive
9549
/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
9550
/// subtraction instead of addition).
9551
123k
SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
9552
123k
  SDValue N0 = N->getOperand(0);
9553
123k
  SDValue N1 = N->getOperand(1);
9554
123k
  EVT VT = N->getValueType(0);
9555
123k
  SDLoc SL(N);
9556
123k
9557
123k
  assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
9558
123k
9559
123k
  const TargetOptions &Options = DAG.getTarget().Options;
9560
123k
9561
123k
  // The transforms below are incorrect when x == 0 and y == inf, because the
9562
123k
  // intermediate multiplication produces a nan.
9563
123k
  if (!Options.NoInfsFPMath)
9564
122k
    return SDValue();
9565
451
9566
451
  // Floating-point multiply-add without intermediate rounding.
9567
451
  bool HasFMA =
9568
226
      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
9569
451
      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9570
326
      
(!LegalOperations || 326
TLI.isOperationLegalOrCustom(ISD::FMA, VT)74
);
9571
451
9572
451
  // Floating-point multiply-add with intermediate rounding. This can result
9573
451
  // in a less precise result due to the changed rounding order.
9574
451
  bool HasFMAD = Options.UnsafeFPMath &&
9575
323
                 
(LegalOperations && 323
TLI.isOperationLegal(ISD::FMAD, VT)125
);
9576
451
9577
451
  // No valid opcode, do not combine.
9578
451
  if (
!HasFMAD && 451
!HasFMA451
)
9579
125
    return SDValue();
9580
326
9581
326
  // Always prefer FMAD to FMA for precision.
9582
326
  
unsigned PreferredFusedOpcode = HasFMAD ? 326
ISD::FMAD0
:
ISD::FMA326
;
9583
326
  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9584
326
9585
326
  // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
9586
326
  // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
9587
634
  auto FuseFADD = [&](SDValue X, SDValue Y) {
9588
634
    if (
X.getOpcode() == ISD::FADD && 634
(Aggressive || 47
X->hasOneUse()42
)) {
9589
47
      auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9590
47
      if (
XC1 && 47
XC1->isExactlyValue(+1.0)37
)
9591
18
        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9592
29
      
if (29
XC1 && 29
XC1->isExactlyValue(-1.0)19
)
9593
18
        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9594
18
                           DAG.getNode(ISD::FNEG, SL, VT, Y));
9595
598
    }
9596
598
    return SDValue();
9597
598
  };
9598
326
9599
326
  if (SDValue FMA = FuseFADD(N0, N1))
9600
18
    return FMA;
9601
308
  
if (SDValue 308
FMA308
= FuseFADD(N1, N0))
9602
18
    return FMA;
9603
290
9604
290
  // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
9605
290
  // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
9606
290
  // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
9607
290
  // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
9608
290
  
auto FuseFSUB = [&](SDValue X, SDValue Y) 290
{
9609
544
    if (
X.getOpcode() == ISD::FSUB && 544
(Aggressive || 108
X->hasOneUse()96
)) {
9610
108
      auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
9611
108
      if (
XC0 && 108
XC0->isExactlyValue(+1.0)72
)
9612
52
        return DAG.getNode(PreferredFusedOpcode, SL, VT,
9613
52
                           DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9614
52
                           Y);
9615
56
      
if (56
XC0 && 56
XC0->isExactlyValue(-1.0)20
)
9616
18
        return DAG.getNode(PreferredFusedOpcode, SL, VT,
9617
18
                           DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9618
18
                           DAG.getNode(ISD::FNEG, SL, VT, Y));
9619
38
9620
38
      auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9621
38
      if (
XC1 && 38
XC1->isExactlyValue(+1.0)36
)
9622
18
        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9623
18
                           DAG.getNode(ISD::FNEG, SL, VT, Y));
9624
20
      
if (20
XC1 && 20
XC1->isExactlyValue(-1.0)18
)
9625
18
        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9626
438
    }
9627
438
    return SDValue();
9628
438
  };
9629
290
9630
290
  if (SDValue FMA = FuseFSUB(N0, N1))
9631
36
    return FMA;
9632
254
  
if (SDValue 254
FMA254
= FuseFSUB(N1, N0))
9633
70
    return FMA;
9634
184
9635
184
  return SDValue();
9636
184
}
9637
9638
283k
static bool isFMulNegTwo(SDValue &N) {
9639
283k
  if (N.getOpcode() != ISD::FMUL)
9640
222k
    return false;
9641
61.0k
  
if (ConstantFPSDNode *61.0k
CFP61.0k
= isConstOrConstSplatFP(N.getOperand(1)))
9642
11.1k
    return CFP->isExactlyValue(-2.0);
9643
49.8k
  return false;
9644
49.8k
}
9645
9646
142k
SDValue DAGCombiner::visitFADD(SDNode *N) {
9647
142k
  SDValue N0 = N->getOperand(0);
9648
142k
  SDValue N1 = N->getOperand(1);
9649
142k
  bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
9650
142k
  bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
9651
142k
  EVT VT = N->getValueType(0);
9652
142k
  SDLoc DL(N);
9653
142k
  const TargetOptions &Options = DAG.getTarget().Options;
9654
142k
  const SDNodeFlags Flags = N->getFlags();
9655
142k
9656
142k
  // fold vector ops
9657
142k
  if (VT.isVector())
9658
27.1k
    
if (SDValue 27.1k
FoldedVOp27.1k
= SimplifyVBinOp(N))
9659
2
      return FoldedVOp;
9660
142k
9661
142k
  // fold (fadd c1, c2) -> c1 + c2
9662
142k
  
if (142k
N0CFP && 142k
N1CFP216
)
9663
3
    return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
9664
142k
9665
142k
  // canonicalize constant to RHS
9666
142k
  
if (142k
N0CFP && 142k
!N1CFP213
)
9667
213
    return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
9668
142k
9669
142k
  
if (SDValue 142k
NewSel142k
= foldBinOpIntoSelect(N))
9670
5
    return NewSel;
9671
142k
9672
142k
  // fold (fadd A, (fneg B)) -> (fsub A, B)
9673
142k
  
if (142k
(!LegalOperations || 142k
TLI.isOperationLegalOrCustom(ISD::FSUB, VT)64.7k
) &&
9674
139k
      isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
9675
214
    return DAG.getNode(ISD::FSUB, DL, VT, N0,
9676
214
                       GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9677
142k
9678
142k
  // fold (fadd (fneg A), B) -> (fsub B, A)
9679
142k
  
if (142k
(!LegalOperations || 142k
TLI.isOperationLegalOrCustom(ISD::FSUB, VT)64.7k
) &&
9680
139k
      isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
9681
256
    return DAG.getNode(ISD::FSUB, DL, VT, N1,
9682
256
                       GetNegatedExpression(N0, DAG, LegalOperations), Flags);
9683
141k
9684
141k
  // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
9685
141k
  // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
9686
141k
  
if (141k
(isFMulNegTwo(N0) && 141k
N0.hasOneUse()14
) ||
9687
141k
      
(isFMulNegTwo(N1) && 141k
N1.hasOneUse()33
)) {
9688
45
    bool N1IsFMul = isFMulNegTwo(N1);
9689
45
    SDValue AddOp = N1IsFMul ? 
N1.getOperand(0)31
:
N0.getOperand(0)14
;
9690
45
    SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
9691
45
    return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? 
N031
:
N114
, Add, Flags);
9692
45
  }
9693
141k
9694
141k
  // FIXME: Auto-upgrade the target/function-level option.
9695
141k
  
if (141k
Options.NoSignedZerosFPMath || 141k
N->getFlags().hasNoSignedZeros()141k
) {
9696
1.32k
    // fold (fadd A, 0) -> A
9697
1.32k
    if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
9698
156
      
if (156
N1C->isZero()156
)
9699
14
        return N0;
9700
141k
  }
9701
141k
9702
141k
  // If 'unsafe math' is enabled, fold lots of things.
9703
141k
  
if (141k
Options.UnsafeFPMath141k
) {
9704
2.06k
    // No FP constant should be created after legalization as Instruction
9705
2.06k
    // Selection pass has a hard time dealing with FP constants.
9706
2.06k
    bool AllowNewConst = (Level < AfterLegalizeDAG);
9707
2.06k
9708
2.06k
    // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
9709
2.06k
    if (
N1CFP && 2.06k
N0.getOpcode() == ISD::FADD99
&&
N0.getNode()->hasOneUse()2
&&
9710
2
        isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
9711
2
      return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
9712
2
                         DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
9713
2
                                     Flags),
9714
2
                         Flags);
9715
2.05k
9716
2.05k
    // If allowed, fold (fadd (fneg x), x) -> 0.0
9717
2.05k
    
if (2.05k
AllowNewConst && 2.05k
N0.getOpcode() == ISD::FNEG1.31k
&&
N0.getOperand(0) == N10
)
9718
0
      return DAG.getConstantFP(0.0, DL, VT);
9719
2.05k
9720
2.05k
    // If allowed, fold (fadd x, (fneg x)) -> 0.0
9721
2.05k
    
if (2.05k
AllowNewConst && 2.05k
N1.getOpcode() == ISD::FNEG1.31k
&&
N1.getOperand(0) == N00
)
9722
0
      return DAG.getConstantFP(0.0, DL, VT);
9723
2.05k
9724
2.05k
    // We can fold chains of FADD's of the same value into multiplications.
9725
2.05k
    // This transform is not safe in general because we are reducing the number
9726
2.05k
    // of rounding steps.
9727
2.05k
    
if (2.05k
TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && 2.05k
!N0CFP2.00k
&&
!N1CFP2.00k
) {
9728
1.91k
      if (
N0.getOpcode() == ISD::FMUL1.91k
) {
9729
436
        bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9730
436
        bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
9731
436
9732
436
        // (fadd (fmul x, c), x) -> (fmul x, c+1)
9733
436
        if (
CFP01 && 436
!CFP0054
&&
N0.getOperand(0) == N154
) {
9734
2
          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9735
2
                                       DAG.getConstantFP(1.0, DL, VT), Flags);
9736
2
          return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
9737
2
        }
9738
434
9739
434
        // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
9740
434
        
if (434
CFP01 && 434
!CFP0052
&&
N1.getOpcode() == ISD::FADD52
&&
9741
4
            N1.getOperand(0) == N1.getOperand(1) &&
9742
434
            
N0.getOperand(0) == N1.getOperand(0)4
) {
9743
4
          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9744
4
                                       DAG.getConstantFP(2.0, DL, VT), Flags);
9745
4
          return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
9746
4
        }
9747
1.90k
      }
9748
1.90k
9749
1.90k
      
if (1.90k
N1.getOpcode() == ISD::FMUL1.90k
) {
9750
581
        bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9751
581
        bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
9752
581
9753
581
        // (fadd x, (fmul x, c)) -> (fmul x, c+1)
9754
581
        if (
CFP11 && 581
!CFP1018
&&
N1.getOperand(0) == N018
) {
9755
3
          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9756
3
                                       DAG.getConstantFP(1.0, DL, VT), Flags);
9757
3
          return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
9758
3
        }
9759
578
9760
578
        // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
9761
578
        
if (578
CFP11 && 578
!CFP1015
&&
N0.getOpcode() == ISD::FADD15
&&
9762
4
            N0.getOperand(0) == N0.getOperand(1) &&
9763
578
            
N1.getOperand(0) == N0.getOperand(0)4
) {
9764
4
          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9765
4
                                       DAG.getConstantFP(2.0, DL, VT), Flags);
9766
4
          return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
9767
4
        }
9768
1.89k
      }
9769
1.89k
9770
1.89k
      
if (1.89k
N0.getOpcode() == ISD::FADD && 1.89k
AllowNewConst406
) {
9771
236
        bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9772
236
        // (fadd (fadd x, x), x) -> (fmul x, 3.0)
9773
236
        if (
!CFP00 && 236
N0.getOperand(0) == N0.getOperand(1)236
&&
9774
236
            
(N0.getOperand(0) == N1)32
) {
9775
6
          return DAG.getNode(ISD::FMUL, DL, VT,
9776
6
                             N1, DAG.getConstantFP(3.0, DL, VT), Flags);
9777
6
        }
9778
1.89k
      }
9779
1.89k
9780
1.89k
      
if (1.89k
N1.getOpcode() == ISD::FADD && 1.89k
AllowNewConst265
) {
9781
173
        bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9782
173
        // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
9783
173
        if (
!CFP10 && 173
N1.getOperand(0) == N1.getOperand(1)173
&&
9784
173
            
N1.getOperand(0) == N05
) {
9785
2
          return DAG.getNode(ISD::FMUL, DL, VT,
9786
2
                             N0, DAG.getConstantFP(3.0, DL, VT), Flags);
9787
2
        }
9788
1.88k
      }
9789
1.88k
9790
1.88k
      // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
9791
1.88k
      
if (1.88k
AllowNewConst &&
9792
1.88k
          
N0.getOpcode() == ISD::FADD1.15k
&&
N1.getOpcode() == ISD::FADD230
&&
9793
17
          N0.getOperand(0) == N0.getOperand(1) &&
9794
3
          N1.getOperand(0) == N1.getOperand(1) &&
9795
1.88k
          
N0.getOperand(0) == N1.getOperand(0)3
) {
9796
3
        return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
9797
3
                           DAG.getConstantFP(4.0, DL, VT), Flags);
9798
3
      }
9799
141k
    }
9800
2.06k
  } // enable-unsafe-fp-math
9801
141k
9802
141k
  // FADD -> FMA combines:
9803
141k
  
if (SDValue 141k
Fused141k
= visitFADDForFMACombine(N)) {
9804
1.90k
    AddToWorklist(Fused.getNode());
9805
1.90k
    return Fused;
9806
1.90k
  }
9807
139k
  return SDValue();
9808
139k
}
9809
9810
29.4k
SDValue DAGCombiner::visitFSUB(SDNode *N) {
9811
29.4k
  SDValue N0 = N->getOperand(0);
9812
29.4k
  SDValue N1 = N->getOperand(1);
9813
29.4k
  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9814
29.4k
  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9815
29.4k
  EVT VT = N->getValueType(0);
9816
29.4k
  SDLoc DL(N);
9817
29.4k
  const TargetOptions &Options = DAG.getTarget().Options;
9818
29.4k
  const SDNodeFlags Flags = N->getFlags();
9819
29.4k
9820
29.4k
  // fold vector ops
9821
29.4k
  if (VT.isVector())
9822
3.99k
    
if (SDValue 3.99k
FoldedVOp3.99k
= SimplifyVBinOp(N))
9823
1
      return FoldedVOp;
9824
29.4k
9825
29.4k
  // fold (fsub c1, c2) -> c1-c2
9826
29.4k
  
if (29.4k
N0CFP && 29.4k
N1CFP4.08k
)
9827
0
    return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
9828
29.4k
9829
29.4k
  
if (SDValue 29.4k
NewSel29.4k
= foldBinOpIntoSelect(N))
9830
2
    return NewSel;
9831
29.4k
9832
29.4k
  // fold (fsub A, (fneg B)) -> (fadd A, B)
9833
29.4k
  
if (29.4k
isNegatibleForFree(N1, LegalOperations, TLI, &Options)29.4k
)
9834
847
    return DAG.getNode(ISD::FADD, DL, VT, N0,
9835
847
                       GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9836
28.5k
9837
28.5k
  // FIXME: Auto-upgrade the target/function-level option.
9838
28.5k
  
if (28.5k
Options.NoSignedZerosFPMath || 28.5k
N->getFlags().hasNoSignedZeros()28.3k
) {
9839
721
    // (fsub 0, B) -> -B
9840
721
    if (
N0CFP && 721
N0CFP->isZero()266
) {
9841
20
      if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
9842
0
        return GetNegatedExpression(N1, DAG, LegalOperations);
9843
20
      
if (20
!LegalOperations || 20
TLI.isOperationLegal(ISD::FNEG, VT)0
)
9844
20
        return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
9845
28.5k
    }
9846
721
  }
9847
28.5k
9848
28.5k
  // If 'unsafe math' is enabled, fold lots of things.
9849
28.5k
  
if (28.5k
Options.UnsafeFPMath28.5k
) {
9850
826
    // (fsub A, 0) -> A
9851
826
    if (
N1CFP && 826
N1CFP->isZero()7
)
9852
1
      return N0;
9853
825
9854
825
    // (fsub x, x) -> 0.0
9855
825
    
if (825
N0 == N1825
)
9856
5
      return DAG.getConstantFP(0.0f, DL, VT);
9857
820
9858
820
    // (fsub x, (fadd x, y)) -> (fneg y)
9859
820
    // (fsub x, (fadd y, x)) -> (fneg y)
9860
820
    
if (820
N1.getOpcode() == ISD::FADD820
) {
9861
8
      SDValue N10 = N1->getOperand(0);
9862
8
      SDValue N11 = N1->getOperand(1);
9863
8
9864
8
      if (
N10 == N0 && 8
isNegatibleForFree(N11, LegalOperations, TLI, &Options)0
)
9865
0
        return GetNegatedExpression(N11, DAG, LegalOperations);
9866
8
9867
8
      
if (8
N11 == N0 && 8
isNegatibleForFree(N10, LegalOperations, TLI, &Options)0
)
9868
0
        return GetNegatedExpression(N10, DAG, LegalOperations);
9869
28.5k
    }
9870
826
  }
9871
28.5k
9872
28.5k
  // FSUB -> FMA combines:
9873
28.5k
  
if (SDValue 28.5k
Fused28.5k
= visitFSUBForFMACombine(N)) {
9874
872
    AddToWorklist(Fused.getNode());
9875
872
    return Fused;
9876
872
  }
9877
27.6k
9878
27.6k
  return SDValue();
9879
27.6k
}
9880
9881
123k
SDValue DAGCombiner::visitFMUL(SDNode *N) {
9882
123k
  SDValue N0 = N->getOperand(0);
9883
123k
  SDValue N1 = N->getOperand(1);
9884
123k
  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9885
123k
  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9886
123k
  EVT VT = N->getValueType(0);
9887
123k
  SDLoc DL(N);
9888
123k
  const TargetOptions &Options = DAG.getTarget().Options;
9889
123k
  const SDNodeFlags Flags = N->getFlags();
9890
123k
9891
123k
  // fold vector ops
9892
123k
  if (
VT.isVector()123k
) {
9893
18.0k
    // This just handles C1 * C2 for vectors. Other vector folds are below.
9894
18.0k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
9895
2
      return FoldedVOp;
9896
123k
  }
9897
123k
9898
123k
  // fold (fmul c1, c2) -> c1*c2
9899
123k
  
if (123k
N0CFP && 123k
N1CFP105
)
9900
0
    return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
9901
123k
9902
123k
  // canonicalize constant to RHS
9903
123k
  
if (123k
isConstantFPBuildVectorOrConstantFP(N0) &&
9904
179
     !isConstantFPBuildVectorOrConstantFP(N1))
9905
179
    return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
9906
123k
9907
123k
  // fold (fmul A, 1.0) -> A
9908
123k
  
if (123k
N1CFP && 123k
N1CFP->isExactlyValue(1.0)33.9k
)
9909
79
    return N0;
9910
123k
9911
123k
  
if (SDValue 123k
NewSel123k
= foldBinOpIntoSelect(N))
9912
3
    return NewSel;
9913
123k
9914
123k
  
if (123k
Options.UnsafeFPMath123k
) {
9915
2.95k
    // fold (fmul A, 0) -> 0
9916
2.95k
    if (
N1CFP && 2.95k
N1CFP->isZero()319
)
9917
2
      return N1;
9918
2.95k
9919
2.95k
    // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
9920
2.95k
    
if (2.95k
N0.getOpcode() == ISD::FMUL2.95k
) {
9921
543
      // Fold scalars or any vector constants (not just splats).
9922
543
      // This fold is done in general by InstCombine, but extra fmul insts
9923
543
      // may have been generated during lowering.
9924
543
      SDValue N00 = N0.getOperand(0);
9925
543
      SDValue N01 = N0.getOperand(1);
9926
543
      auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
9927
543
      auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
9928
543
      auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
9929
543
9930
543
      // Check 1: Make sure that the first operand of the inner multiply is NOT
9931
543
      // a constant. Otherwise, we may induce infinite looping.
9932
543
      if (
!(isConstOrConstSplatFP(N00) || 543
(BV00 && 541
BV00->isConstant()22
))) {
9933
519
        // Check 2: Make sure that the second operand of the inner multiply and
9934
519
        // the second operand of the outer multiply are constants.
9935
519
        if (
(N1CFP && 519
isConstOrConstSplatFP(N01)58
) ||
9936
519
            
(BV1 && 505
BV0127
&&
BV1->isConstant()7
&&
BV01->isConstant()7
)) {
9937
21
          SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
9938
21
          return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
9939
21
        }
9940
2.93k
      }
9941
543
    }
9942
2.93k
9943
2.93k
    // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
9944
2.93k
    // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
9945
2.93k
    // during an early run of DAGCombiner can prevent folding with fmuls
9946
2.93k
    // inserted during lowering.
9947
2.93k
    
if (2.93k
N0.getOpcode() == ISD::FADD &&
9948
269
        (N0.getOperand(0) == N0.getOperand(1)) &&
9949
2.93k
        
N0.hasOneUse()38
) {
9950
18
      const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
9951
18
      SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
9952
18
      return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
9953
18
    }
9954
123k
  }
9955
123k
9956
123k
  // fold (fmul X, 2.0) -> (fadd X, X)
9957
123k
  
if (123k
N1CFP && 123k
N1CFP->isExactlyValue(+2.0)33.8k
)
9958
527
    return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
9959
123k
9960
123k
  // fold (fmul X, -1.0) -> (fneg X)
9961
123k
  
if (123k
N1CFP && 123k
N1CFP->isExactlyValue(-1.0)33.3k
)
9962
41
    
if (41
!LegalOperations || 41
TLI.isOperationLegal(ISD::FNEG, VT)0
)
9963
41
      return DAG.getNode(ISD::FNEG, DL, VT, N0);
9964
123k
9965
123k
  // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
9966
123k
  
if (char 123k
LHSNeg123k
= isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
9967
1.04k
    if (char 
RHSNeg1.04k
= isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
9968
223
      // Both can be negated for free, check to see if at least one is cheaper
9969
223
      // negated.
9970
223
      if (
LHSNeg == 2 || 223
RHSNeg == 2204
)
9971
19
        return DAG.getNode(ISD::FMUL, DL, VT,
9972
19
                           GetNegatedExpression(N0, DAG, LegalOperations),
9973
19
                           GetNegatedExpression(N1, DAG, LegalOperations),
9974
19
                           Flags);
9975
123k
    }
9976
1.04k
  }
9977
123k
9978
123k
  // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
9979
123k
  // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
9980
123k
  
if (123k
Flags.hasNoNaNs() && 123k
Flags.hasNoSignedZeros()1.70k
&&
9981
1.54k
      
(N0.getOpcode() == ISD::SELECT || 1.54k
N1.getOpcode() == ISD::SELECT1.54k
) &&
9982
123k
      
TLI.isOperationLegal(ISD::FABS, VT)2
) {
9983
2
    SDValue Select = N0, X = N1;
9984
2
    if (Select.getOpcode() != ISD::SELECT)
9985
2
      std::swap(Select, X);
9986
2
9987
2
    SDValue Cond = Select.getOperand(0);
9988
2
    auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
9989
2
    auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
9990
2
9991
2
    if (
TrueOpnd && 2
FalseOpnd2
&&
9992
2
        
Cond.getOpcode() == ISD::SETCC2
&&
Cond.getOperand(0) == X2
&&
9993
2
        isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
9994
2
        
cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)2
) {
9995
2
      ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9996
2
      switch (CC) {
9997
0
      default: break;
9998
1
      case ISD::SETOLT:
9999
1
      case ISD::SETULT:
10000
1
      case ISD::SETOLE:
10001
1
      case ISD::SETULE:
10002
1
      case ISD::SETLT:
10003
1
      case ISD::SETLE:
10004
1
        std::swap(TrueOpnd, FalseOpnd);
10005
1
        // Fall through
10006
2
      case ISD::SETOGT:
10007
2
      case ISD::SETUGT:
10008
2
      case ISD::SETOGE:
10009
2
      case ISD::SETUGE:
10010
2
      case ISD::SETGT:
10011
2
      case ISD::SETGE:
10012
2
        if (
TrueOpnd->isExactlyValue(-1.0) && 2
FalseOpnd->isExactlyValue(1.0)1
&&
10013
1
            TLI.isOperationLegal(ISD::FNEG, VT))
10014
1
          return DAG.getNode(ISD::FNEG, DL, VT,
10015
1
                   DAG.getNode(ISD::FABS, DL, VT, X));
10016
1
        
if (1
TrueOpnd->isExactlyValue(1.0) && 1
FalseOpnd->isExactlyValue(-1.0)1
)
10017
1
          return DAG.getNode(ISD::FABS, DL, VT, X);
10018
0
10019
0
        break;
10020
2
      }
10021
2
    }
10022
2
  }
10023
123k
10024
123k
  // FMUL -> FMA combines:
10025
123k
  
if (SDValue 123k
Fused123k
= visitFMULForFMADistributiveCombine(N)) {
10026
142
    AddToWorklist(Fused.getNode());
10027
142
    return Fused;
10028
142
  }
10029
122k
10030
122k
  return SDValue();
10031
122k
}
10032
10033
6.34k
SDValue DAGCombiner::visitFMA(SDNode *N) {
10034
6.34k
  SDValue N0 = N->getOperand(0);
10035
6.34k
  SDValue N1 = N->getOperand(1);
10036
6.34k
  SDValue N2 = N->getOperand(2);
10037
6.34k
  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10038
6.34k
  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10039
6.34k
  EVT VT = N->getValueType(0);
10040
6.34k
  SDLoc DL(N);
10041
6.34k
  const TargetOptions &Options = DAG.getTarget().Options;
10042
6.34k
10043
6.34k
  // Constant fold FMA.
10044
6.34k
  if (isa<ConstantFPSDNode>(N0) &&
10045
58
      isa<ConstantFPSDNode>(N1) &&
10046
6.34k
      
isa<ConstantFPSDNode>(N2)18
) {
10047
0
    return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
10048
0
  }
10049
6.34k
10050
6.34k
  
if (6.34k
Options.UnsafeFPMath6.34k
) {
10051
1.90k
    if (
N0CFP && 1.90k
N0CFP->isZero()0
)
10052
0
      return N2;
10053
1.90k
    
if (1.90k
N1CFP && 1.90k
N1CFP->isZero()40
)
10054
0
      return N2;
10055
6.34k
  }
10056
6.34k
  // TODO: The FMA node should have flags that propagate to these nodes.
10057
6.34k
  
if (6.34k
N0CFP && 6.34k
N0CFP->isExactlyValue(1.0)58
)
10058
0
    return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
10059
6.34k
  
if (6.34k
N1CFP && 6.34k
N1CFP->isExactlyValue(1.0)317
)
10060
2
    return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
10061
6.34k
10062
6.34k
  // Canonicalize (fma c, x, y) -> (fma x, c, y)
10063
6.34k
  
if (6.34k
isConstantFPBuildVectorOrConstantFP(N0) &&
10064
75
     !isConstantFPBuildVectorOrConstantFP(N1))
10065
57
    return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
10066
6.28k
10067
6.28k
  // TODO: FMA nodes should have flags that propagate to the created nodes.
10068
6.28k
  // For now, create a Flags object for use with all unsafe math transforms.
10069
6.28k
  SDNodeFlags Flags;
10070
6.28k
  Flags.setUnsafeAlgebra(true);
10071
6.28k
10072
6.28k
  if (
Options.UnsafeFPMath6.28k
) {
10073
1.89k
    // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
10074
1.89k
    if (
N2.getOpcode() == ISD::FMUL && 1.89k
N0 == N2.getOperand(0)92
&&
10075
17
        isConstantFPBuildVectorOrConstantFP(N1) &&
10076
1.89k
        
isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))17
) {
10077
17
      return DAG.getNode(ISD::FMUL, DL, VT, N0,
10078
17
                         DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
10079
17
                                     Flags), Flags);
10080
17
    }
10081
1.87k
10082
1.87k
    // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
10083
1.87k
    
if (1.87k
N0.getOpcode() == ISD::FMUL &&
10084
58
        isConstantFPBuildVectorOrConstantFP(N1) &&
10085
1.87k
        
isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))17
) {
10086
17
      return DAG.getNode(ISD::FMA, DL, VT,
10087
17
                         N0.getOperand(0),
10088
17
                         DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
10089
17
                                     Flags),
10090
17
                         N2);
10091
17
    }
10092
6.25k
  }
10093
6.25k
10094
6.25k
  // (fma x, 1, y) -> (fadd x, y)
10095
6.25k
  // (fma x, -1, y) -> (fadd (fneg x), y)
10096
6.25k
  
if (6.25k
N1CFP6.25k
) {
10097
313
    if (N1CFP->isExactlyValue(1.0))
10098
313
      // TODO: The FMA node should have flags that propagate to this node.
10099
0
      return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
10100
313
10101
313
    
if (313
N1CFP->isExactlyValue(-1.0) &&
10102
313
        
(!LegalOperations || 1
TLI.isOperationLegal(ISD::FNEG, VT)0
)) {
10103
1
      SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
10104
1
      AddToWorklist(RHSNeg.getNode());
10105
1
      // TODO: The FMA node should have flags that propagate to this node.
10106
1
      return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
10107
1
    }
10108
6.25k
  }
10109
6.25k
10110
6.25k
  
if (6.25k
Options.UnsafeFPMath6.25k
) {
10111
1.86k
    // (fma x, c, x) -> (fmul x, (c+1))
10112
1.86k
    if (
N1CFP && 1.86k
N0 == N236
) {
10113
1
      return DAG.getNode(ISD::FMUL, DL, VT, N0,
10114
1
                         DAG.getNode(ISD::FADD, DL, VT, N1,
10115
1
                                     DAG.getConstantFP(1.0, DL, VT), Flags),
10116
1
                         Flags);
10117
1
    }
10118
1.86k
10119
1.86k
    // (fma x, c, (fneg x)) -> (fmul x, (c-1))
10120
1.86k
    
if (1.86k
N1CFP && 1.86k
N2.getOpcode() == ISD::FNEG35
&&
N2.getOperand(0) == N01
) {
10121
1
      return DAG.getNode(ISD::FMUL, DL, VT, N0,
10122
1
                         DAG.getNode(ISD::FADD, DL, VT, N1,
10123
1
                                     DAG.getConstantFP(-1.0, DL, VT), Flags),
10124
1
                         Flags);
10125
1
    }
10126
6.24k
  }
10127
6.24k
10128
6.24k
  return SDValue();
10129
6.24k
}
10130
10131
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
10132
// reciprocal.
10133
// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
10134
// Notice that this is not always beneficial. One reason is different targets
10135
// may have different costs for FDIV and FMUL, so sometimes the cost of two
10136
// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
10137
// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
10138
121k
SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
10139
121k
  bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
10140
121k
  const SDNodeFlags Flags = N->getFlags();
10141
121k
  if (
!UnsafeMath && 121k
!Flags.hasAllowReciprocal()120k
)
10142
120k
    return SDValue();
10143
593
10144
593
  // Skip if current node is a reciprocal.
10145
593
  SDValue N0 = N->getOperand(0);
10146
593
  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10147
593
  if (
N0CFP && 593
N0CFP->isExactlyValue(1.0)105
)
10148
102
    return SDValue();
10149
491
10150
491
  // Exit early if the target does not want this transform or if there can't
10151
491
  // possibly be enough uses of the divisor to make the transform worthwhile.
10152
491
  SDValue N1 = N->getOperand(1);
10153
491
  unsigned MinUses = TLI.combineRepeatedFPDivisors();
10154
491
  if (
!MinUses || 491
N1->use_size() < MinUses251
)
10155
470
    return SDValue();
10156
21
10157
21
  // Find all FDIV users of the same divisor.
10158
21
  // Use a set because duplicates may be present in the user list.
10159
21
  SetVector<SDNode *> Users;
10160
47
  for (auto *U : N1->uses()) {
10161
47
    if (
U->getOpcode() == ISD::FDIV && 47
U->getOperand(1) == N144
) {
10162
44
      // This division is eligible for optimization only if global unsafe math
10163
44
      // is enabled or if this division allows reciprocal formation.
10164
44
      if (
UnsafeMath || 44
U->getFlags().hasAllowReciprocal()23
)
10165
40
        Users.insert(U);
10166
44
    }
10167
47
  }
10168
21
10169
21
  // Now that we have the actual number of divisor uses, make sure it meets
10170
21
  // the minimum threshold specified by the target.
10171
21
  if (Users.size() < MinUses)
10172
8
    return SDValue();
10173
13
10174
13
  EVT VT = N->getValueType(0);
10175
13
  SDLoc DL(N);
10176
13
  SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
10177
13
  SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
10178
13
10179
13
  // Dividend / Divisor -> Dividend * Reciprocal
10180
31
  for (auto *U : Users) {
10181
31
    SDValue Dividend = U->getOperand(0);
10182
31
    if (
Dividend != FPOne31
) {
10183
30
      SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
10184
30
                                    Reciprocal, Flags);
10185
30
      CombineTo(U, NewNode);
10186
31
    } else 
if (1
U != Reciprocal.getNode()1
) {
10187
0
      // In the absence of fast-math-flags, this user node is always the
10188
0
      // same node as Reciprocal, but with FMF they may be different nodes.
10189
0
      CombineTo(U, Reciprocal);
10190
0
    }
10191
31
  }
10192
121k
  return SDValue(N, 0);  // N was replaced.
10193
121k
}
10194
10195
121k
SDValue DAGCombiner::visitFDIV(SDNode *N) {
10196
121k
  SDValue N0 = N->getOperand(0);
10197
121k
  SDValue N1 = N->getOperand(1);
10198
121k
  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10199
121k
  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10200
121k
  EVT VT = N->getValueType(0);
10201
121k
  SDLoc DL(N);
10202
121k
  const TargetOptions &Options = DAG.getTarget().Options;
10203
121k
  SDNodeFlags Flags = N->getFlags();
10204
121k
10205
121k
  // fold vector ops
10206
121k
  if (VT.isVector())
10207
29.2k
    
if (SDValue 29.2k
FoldedVOp29.2k
= SimplifyVBinOp(N))
10208
0
      return FoldedVOp;
10209
121k
10210
121k
  // fold (fdiv c1, c2) -> c1/c2
10211
121k
  
if (121k
N0CFP && 121k
N1CFP30.8k
)
10212
3
    return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
10213
121k
10214
121k
  
if (SDValue 121k
NewSel121k
= foldBinOpIntoSelect(N))
10215
2
    return NewSel;
10216
121k
10217
121k
  
if (121k
Options.UnsafeFPMath121k
) {
10218
601
    // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
10219
601
    if (
N1CFP601
) {
10220
21
      // Compute the reciprocal 1.0 / c2.
10221
21
      const APFloat &N1APF = N1CFP->getValueAPF();
10222
21
      APFloat Recip(N1APF.getSemantics(), 1); // 1.0
10223
21
      APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
10224
21
      // Only do the transform if the reciprocal is a legal fp immediate that
10225
21
      // isn't too nasty (eg NaN, denormal, ...).
10226
21
      if (
(st == APFloat::opOK || 21
st == APFloat::opInexact12
) && // Not too nasty
10227
17
          (!LegalOperations ||
10228
17
           // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
10229
17
           // backend)... we should handle this gracefully after Legalize.
10230
17
           // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
10231
0
           TLI.isOperationLegal(ISD::ConstantFP, VT) ||
10232
0
           TLI.isFPImmLegal(Recip, VT)))
10233
17
        return DAG.getNode(ISD::FMUL, DL, VT, N0,
10234
17
                           DAG.getConstantFP(Recip, DL, VT), Flags);
10235
584
    }
10236
584
10237
584
    // If this FDIV is part of a reciprocal square root, it may be folded
10238
584
    // into a target-specific square root estimate instruction.
10239
584
    
if (584
N1.getOpcode() == ISD::FSQRT584
) {
10240
84
      if (SDValue 
RV84
= buildRsqrtEstimate(N1.getOperand(0), Flags)) {
10241
43
        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10242
43
      }
10243
500
    } else 
if (500
N1.getOpcode() == ISD::FP_EXTEND &&
10244
500
               
N1.getOperand(0).getOpcode() == ISD::FSQRT2
) {
10245
2
      if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10246
2
                                          Flags)) {
10247
2
        RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
10248
2
        AddToWorklist(RV.getNode());
10249
2
        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10250
2
      }
10251
498
    } else 
if (498
N1.getOpcode() == ISD::FP_ROUND &&
10252
498
               
N1.getOperand(0).getOpcode() == ISD::FSQRT2
) {
10253
2
      if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10254
2
                                          Flags)) {
10255
2
        RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
10256
2
        AddToWorklist(RV.getNode());
10257
2
        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10258
2
      }
10259
496
    } else 
if (496
N1.getOpcode() == ISD::FMUL496
) {
10260
2
      // Look through an FMUL. Even though this won't remove the FDIV directly,
10261
2
      // it's still worthwhile to get rid of the FSQRT if possible.
10262
2
      SDValue SqrtOp;
10263
2
      SDValue OtherOp;
10264
2
      if (
N1.getOperand(0).getOpcode() == ISD::FSQRT2
) {
10265
2
        SqrtOp = N1.getOperand(0);
10266
2
        OtherOp = N1.getOperand(1);
10267
2
      } else 
if (0
N1.getOperand(1).getOpcode() == ISD::FSQRT0
) {
10268
0
        SqrtOp = N1.getOperand(1);
10269
0
        OtherOp = N1.getOperand(0);
10270
0
      }
10271
2
      if (
SqrtOp.getNode()2
) {
10272
2
        // We found a FSQRT, so try to make this fold:
10273
2
        // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
10274
2
        if (SDValue 
RV2
= buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
10275
2
          RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
10276
2
          AddToWorklist(RV.getNode());
10277
2
          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10278
2
        }
10279
535
      }
10280
500
    }
10281
535
10282
535
    // Fold into a reciprocal estimate and multiply instead of a real divide.
10283
535
    
if (SDValue 535
RV535
= BuildReciprocalEstimate(N1, Flags)) {
10284
229
      AddToWorklist(RV.getNode());
10285
229
      return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10286
229
    }
10287
121k
  }
10288
121k
10289
121k
  // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
10290
121k
  
if (char 121k
LHSNeg121k
= isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
10291
5.94k
    if (char 
RHSNeg5.94k
= isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
10292
87
      // Both can be negated for free, check to see if at least one is cheaper
10293
87
      // negated.
10294
87
      if (
LHSNeg == 2 || 87
RHSNeg == 286
)
10295
9
        return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
10296
9
                           GetNegatedExpression(N0, DAG, LegalOperations),
10297
9
                           GetNegatedExpression(N1, DAG, LegalOperations),
10298
9
                           Flags);
10299
121k
    }
10300
5.94k
  }
10301
121k
10302
121k
  
if (SDValue 121k
CombineRepeatedDivisors121k
= combineRepeatedFPDivisors(N))
10303
13
    return CombineRepeatedDivisors;
10304
121k
10305
121k
  return SDValue();
10306
121k
}
10307
10308
264
SDValue DAGCombiner::visitFREM(SDNode *N) {
10309
264
  SDValue N0 = N->getOperand(0);
10310
264
  SDValue N1 = N->getOperand(1);
10311
264
  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10312
264
  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10313
264
  EVT VT = N->getValueType(0);
10314
264
10315
264
  // fold (frem c1, c2) -> fmod(c1,c2)
10316
264
  if (
N0CFP && 264
N1CFP13
)
10317
9
    return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
10318
255
10319
255
  
if (SDValue 255
NewSel255
= foldBinOpIntoSelect(N))
10320
2
    return NewSel;
10321
253
10322
253
  return SDValue();
10323
253
}
10324
10325
2.78k
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
10326
2.78k
  if (!DAG.getTarget().Options.UnsafeFPMath)
10327
2.60k
    return SDValue();
10328
179
10329
179
  SDValue N0 = N->getOperand(0);
10330
179
  if (TLI.isFsqrtCheap(N0, DAG))
10331
52
    return SDValue();
10332
127
10333
127
  // TODO: FSQRT nodes should have flags that propagate to the created nodes.
10334
127
  // For now, create a Flags object for use with all unsafe math transforms.
10335
127
  SDNodeFlags Flags;
10336
127
  Flags.setUnsafeAlgebra(true);
10337
127
  return buildSqrtEstimate(N0, Flags);
10338
127
}
10339
10340
/// copysign(x, fp_extend(y)) -> copysign(x, y)
10341
/// copysign(x, fp_round(y)) -> copysign(x, y)
10342
3.38k
static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
10343
3.38k
  SDValue N1 = N->getOperand(1);
10344
3.38k
  if ((N1.getOpcode() == ISD::FP_EXTEND ||
10345
3.38k
       
N1.getOpcode() == ISD::FP_ROUND3.35k
)) {
10346
82
    // Do not optimize out type conversion of f128 type yet.
10347
82
    // For some targets like x86_64, configuration is changed to keep one f128
10348
82
    // value in one SSE register, but instruction selection cannot handle
10349
82
    // FCOPYSIGN on SSE registers yet.
10350
82
    EVT N1VT = N1->getValueType(0);
10351
82
    EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
10352
82
    return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
10353
82
  }
10354
3.30k
  return false;
10355
3.30k
}
10356
10357
3.42k
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
10358
3.42k
  SDValue N0 = N->getOperand(0);
10359
3.42k
  SDValue N1 = N->getOperand(1);
10360
3.42k
  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10361
3.42k
  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10362
3.42k
  EVT VT = N->getValueType(0);
10363
3.42k
10364
3.42k
  if (
N0CFP && 3.42k
N1CFP1.07k
) // Constant fold
10365
0
    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
10366
3.42k
10367
3.42k
  
if (3.42k
N1CFP3.42k
) {
10368
24
    const APFloat &V = N1CFP->getValueAPF();
10369
24
    // copysign(x, c1) -> fabs(x)       iff ispos(c1)
10370
24
    // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
10371
24
    if (
!V.isNegative()24
) {
10372
24
      if (
!LegalOperations || 24
TLI.isOperationLegal(ISD::FABS, VT)0
)
10373
24
        return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10374
0
    } else {
10375
0
      if (
!LegalOperations || 0
TLI.isOperationLegal(ISD::FNEG, VT)0
)
10376
0
        return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
10377
0
                           DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
10378
3.39k
    }
10379
24
  }
10380
3.39k
10381
3.39k
  // copysign(fabs(x), y) -> copysign(x, y)
10382
3.39k
  // copysign(fneg(x), y) -> copysign(x, y)
10383
3.39k
  // copysign(copysign(x,z), y) -> copysign(x, y)
10384
3.39k
  
if (3.39k
N0.getOpcode() == ISD::FABS || 3.39k
N0.getOpcode() == ISD::FNEG3.39k
||
10385
3.39k
      N0.getOpcode() == ISD::FCOPYSIGN)
10386
6
    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
10387
3.39k
10388
3.39k
  // copysign(x, abs(y)) -> abs(x)
10389
3.39k
  
if (3.39k
N1.getOpcode() == ISD::FABS3.39k
)
10390
2
    return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10391
3.38k
10392
3.38k
  // copysign(x, copysign(y,z)) -> copysign(x, z)
10393
3.38k
  
if (3.38k
N1.getOpcode() == ISD::FCOPYSIGN3.38k
)
10394
2
    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
10395
3.38k
10396
3.38k
  // copysign(x, fp_extend(y)) -> copysign(x, y)
10397
3.38k
  // copysign(x, fp_round(y)) -> copysign(x, y)
10398
3.38k
  
if (3.38k
CanCombineFCOPYSIGN_EXTEND_ROUND(N)3.38k
)
10399
71
    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
10400
3.31k
10401
3.31k
  return SDValue();
10402
3.31k
}
10403
10404
127k
SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
10405
127k
  SDValue N0 = N->getOperand(0);
10406
127k
  EVT VT = N->getValueType(0);
10407
127k
  EVT OpVT = N0.getValueType();
10408
127k
10409
127k
  // fold (sint_to_fp c1) -> c1fp
10410
127k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10411
127k
      // ...but only if the target supports immediate floating-point values
10412
5
      (!LegalOperations ||
10413
3
       TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
10414
4
    return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10415
127k
10416
127k
  // If the input is a legal type, and SINT_TO_FP is not legal on this target,
10417
127k
  // but UINT_TO_FP is legal on this target, try to convert.
10418
127k
  
if (127k
!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
10419
127k
      
TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)3.97k
) {
10420
104
    // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
10421
104
    if (DAG.SignBitIsZero(N0))
10422
0
      return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10423
127k
  }
10424
127k
10425
127k
  // The next optimizations are desirable only if SELECT_CC can be lowered.
10426
127k
  
if (127k
TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || 127k
!LegalOperations67.0k
) {
10427
89.0k
    // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10428
89.0k
    if (
N0.getOpcode() == ISD::SETCC && 89.0k
N0.getValueType() == MVT::i1124
&&
10429
4
        !VT.isVector() &&
10430
4
        (!LegalOperations ||
10431
89.0k
         
TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)0
)) {
10432
4
      SDLoc DL(N);
10433
4
      SDValue Ops[] =
10434
4
        { N0.getOperand(0), N0.getOperand(1),
10435
4
          DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10436
4
          N0.getOperand(2) };
10437
4
      return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10438
4
    }
10439
89.0k
10440
89.0k
    // fold (sint_to_fp (zext (setcc x, y, cc))) ->
10441
89.0k
    //      (select_cc x, y, 1.0, 0.0,, cc)
10442
89.0k
    
if (89.0k
N0.getOpcode() == ISD::ZERO_EXTEND &&
10443
89.0k
        
N0.getOperand(0).getOpcode() == ISD::SETCC502
&&
!VT.isVector()27
&&
10444
22
        (!LegalOperations ||
10445
89.0k
         
TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)0
)) {
10446
22
      SDLoc DL(N);
10447
22
      SDValue Ops[] =
10448
22
        { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
10449
22
          DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10450
22
          N0.getOperand(0).getOperand(2) };
10451
22
      return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10452
22
    }
10453
127k
  }
10454
127k
10455
127k
  return SDValue();
10456
127k
}
10457
10458
92.9k
SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
10459
92.9k
  SDValue N0 = N->getOperand(0);
10460
92.9k
  EVT VT = N->getValueType(0);
10461
92.9k
  EVT OpVT = N0.getValueType();
10462
92.9k
10463
92.9k
  // fold (uint_to_fp c1) -> c1fp
10464
92.9k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10465
92.9k
      // ...but only if the target supports immediate floating-point values
10466
0
      (!LegalOperations ||
10467
0
       TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
10468
0
    return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10469
92.9k
10470
92.9k
  // If the input is a legal type, and UINT_TO_FP is not legal on this target,
10471
92.9k
  // but SINT_TO_FP is legal on this target, try to convert.
10472
92.9k
  
if (92.9k
!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
10473
92.9k
      
TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)4.96k
) {
10474
154
    // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
10475
154
    if (DAG.SignBitIsZero(N0))
10476
20
      return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10477
92.9k
  }
10478
92.9k
10479
92.9k
  // The next optimizations are desirable only if SELECT_CC can be lowered.
10480
92.9k
  
if (92.9k
TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || 92.9k
!LegalOperations35.6k
) {
10481
71.3k
    // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10482
71.3k
    if (
N0.getOpcode() == ISD::SETCC && 71.3k
!VT.isVector()509
&&
10483
445
        (!LegalOperations ||
10484
71.3k
         
TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)0
)) {
10485
445
      SDLoc DL(N);
10486
445
      SDValue Ops[] =
10487
445
        { N0.getOperand(0), N0.getOperand(1),
10488
445
          DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10489
445
          N0.getOperand(2) };
10490
445
      return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10491
445
    }
10492
92.5k
  }
10493
92.5k
10494
92.5k
  return SDValue();
10495
92.5k
}
10496
10497
// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
10498
29.2k
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
10499
29.2k
  SDValue N0 = N->getOperand(0);
10500
29.2k
  EVT VT = N->getValueType(0);
10501
29.2k
10502
29.2k
  if (
N0.getOpcode() != ISD::UINT_TO_FP && 29.2k
N0.getOpcode() != ISD::SINT_TO_FP29.2k
)
10503
29.2k
    return SDValue();
10504
35
10505
35
  SDValue Src = N0.getOperand(0);
10506
35
  EVT SrcVT = Src.getValueType();
10507
35
  bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
10508
35
  bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
10509
35
10510
35
  // We can safely assume the conversion won't overflow the output range,
10511
35
  // because (for example) (uint8_t)18293.f is undefined behavior.
10512
35
10513
35
  // Since we can assume the conversion won't overflow, our decision as to
10514
35
  // whether the input will fit in the float should depend on the minimum
10515
35
  // of the input range and output range.
10516
35
10517
35
  // This means this is also safe for a signed input and unsigned output, since
10518
35
  // a negative input would lead to undefined behavior.
10519
35
  unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
10520
35
  unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
10521
35
  unsigned ActualSize = std::min(InputSize, OutputSize);
10522
35
  const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
10523
35
10524
35
  // We can only fold away the float conversion if the input range can be
10525
35
  // represented exactly in the float range.
10526
35
  if (
APFloat::semanticsPrecision(sem) >= ActualSize35
) {
10527
5
    if (
VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()5
) {
10528
3
      unsigned ExtOp = IsInputSigned && 
IsOutputSigned1
?
ISD::SIGN_EXTEND1
10529
2
                                                       : ISD::ZERO_EXTEND;
10530
3
      return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
10531
3
    }
10532
2
    
if (2
VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits()2
)
10533
1
      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
10534
1
    return DAG.getBitcast(VT, Src);
10535
1
  }
10536
30
  return SDValue();
10537
30
}
10538
10539
12.2k
SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
10540
12.2k
  SDValue N0 = N->getOperand(0);
10541
12.2k
  EVT VT = N->getValueType(0);
10542
12.2k
10543
12.2k
  // fold (fp_to_sint c1fp) -> c1
10544
12.2k
  if (isConstantFPBuildVectorOrConstantFP(N0))
10545
5
    return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
10546
12.2k
10547
12.2k
  return FoldIntToFPToInt(N, DAG);
10548
12.2k
}
10549
10550
17.0k
SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
10551
17.0k
  SDValue N0 = N->getOperand(0);
10552
17.0k
  EVT VT = N->getValueType(0);
10553
17.0k
10554
17.0k
  // fold (fp_to_uint c1fp) -> c1
10555
17.0k
  if (isConstantFPBuildVectorOrConstantFP(N0))
10556
7
    return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
10557
17.0k
10558
17.0k
  return FoldIntToFPToInt(N, DAG);
10559
17.0k
}
10560
10561
23.5k
SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
10562
23.5k
  SDValue N0 = N->getOperand(0);
10563
23.5k
  SDValue N1 = N->getOperand(1);
10564
23.5k
  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10565
23.5k
  EVT VT = N->getValueType(0);
10566
23.5k
10567
23.5k
  // fold (fp_round c1fp) -> c1fp
10568
23.5k
  if (N0CFP)
10569
2
    return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
10570
23.5k
10571
23.5k
  // fold (fp_round (fp_extend x)) -> x
10572
23.5k
  
if (23.5k
N0.getOpcode() == ISD::FP_EXTEND && 23.5k
VT == N0.getOperand(0).getValueType()203
)
10573
203
    return N0.getOperand(0);
10574
23.3k
10575
23.3k
  // fold (fp_round (fp_round x)) -> (fp_round x)
10576
23.3k
  
if (23.3k
N0.getOpcode() == ISD::FP_ROUND23.3k
) {
10577
19
    const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
10578
19
    const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
10579
19
10580
19
    // Skip this folding if it results in an fp_round from f80 to f16.
10581
19
    //
10582
19
    // f80 to f16 always generates an expensive (and as yet, unimplemented)
10583
19
    // libcall to __truncxfhf2 instead of selecting native f16 conversion
10584
19
    // instructions from f32 or f64.  Moreover, the first (value-preserving)
10585
19
    // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
10586
19
    // x86.
10587
19
    if (
N0.getOperand(0).getValueType() == MVT::f80 && 19
VT == MVT::f167
)
10588
1
      return SDValue();
10589
18
10590
18
    // If the first fp_round isn't a value preserving truncation, it might
10591
18
    // introduce a tie in the second fp_round, that wouldn't occur in the
10592
18
    // single-step fp_round we want to fold to.
10593
18
    // In other words, double rounding isn't the same as rounding.
10594
18
    // Also, this is a value preserving truncation iff both fp_round's are.
10595
18
    
if (18
DAG.getTarget().Options.UnsafeFPMath || 18
N0IsTrunc16
) {
10596
7
      SDLoc DL(N);
10597
7
      return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
10598
0
                         DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
10599
7
    }
10600
23.3k
  }
10601
23.3k
10602
23.3k
  // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
10603
23.3k
  
if (23.3k
N0.getOpcode() == ISD::FCOPYSIGN && 23.3k
N0.getNode()->hasOneUse()8
) {
10604
8
    SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
10605
8
                              N0.getOperand(0), N1);
10606
8
    AddToWorklist(Tmp.getNode());
10607
8
    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
10608
8
                       Tmp, N0.getOperand(1));
10609
8
  }
10610
23.3k
10611
23.3k
  
if (SDValue 23.3k
NewVSel23.3k
= matchVSelectOpSizesWithSetCC(N))
10612
4
    return NewVSel;
10613
23.3k
10614
23.3k
  return SDValue();
10615
23.3k
}
10616
10617
0
SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
10618
0
  SDValue N0 = N->getOperand(0);
10619
0
  EVT VT = N->getValueType(0);
10620
0
  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10621
0
  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10622
0
10623
0
  // fold (fp_round_inreg c1fp) -> c1fp
10624
0
  if (
N0CFP && 0
isTypeLegal(EVT)0
) {
10625
0
    SDLoc DL(N);
10626
0
    SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
10627
0
    return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
10628
0
  }
10629
0
10630
0
  return SDValue();
10631
0
}
10632
10633
68.2k
SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
10634
68.2k
  SDValue N0 = N->getOperand(0);
10635
68.2k
  EVT VT = N->getValueType(0);
10636
68.2k
10637
68.2k
  // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
10638
68.2k
  if (N->hasOneUse() &&
10639
65.6k
      N->use_begin()->getOpcode() == ISD::FP_ROUND)
10640
32
    return SDValue();
10641
68.1k
10642
68.1k
  // fold (fp_extend c1fp) -> c1fp
10643
68.1k
  
if (68.1k
isConstantFPBuildVectorOrConstantFP(N0)68.1k
)
10644
10
    return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
10645
68.1k
10646
68.1k
  // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
10647
68.1k
  
if (68.1k
N0.getOpcode() == ISD::FP16_TO_FP &&
10648
692
      TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
10649
4
    return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
10650
68.1k
10651
68.1k
  // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
10652
68.1k
  // value of X.
10653
68.1k
  
if (68.1k
N0.getOpcode() == ISD::FP_ROUND
10654
68.1k
      && 
N0.getConstantOperandVal(1) == 114.4k
) {
10655
126
    SDValue In = N0.getOperand(0);
10656
126
    if (
In.getValueType() == VT126
)
return In120
;
10657
6
    
if (6
VT.bitsLT(In.getValueType())6
)
10658
5
      return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
10659
5
                         In, N0.getOperand(1));
10660
1
    return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
10661
1
  }
10662
68.0k
10663
68.0k
  // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
10664
68.0k
  
if (68.0k
ISD::isNormalLoad(N0.getNode()) && 68.0k
N0.hasOneUse()10.9k
&&
10665
68.0k
       
TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())10.4k
) {
10666
382
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10667
382
    SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10668
382
                                     LN0->getChain(),
10669
382
                                     LN0->getBasePtr(), N0.getValueType(),
10670
382
                                     LN0->getMemOperand());
10671
382
    CombineTo(N, ExtLoad);
10672
382
    CombineTo(N0.getNode(),
10673
382
              DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
10674
382
                          N0.getValueType(), ExtLoad,
10675
382
                          DAG.getIntPtrConstant(1, SDLoc(N0))),
10676
382
              ExtLoad.getValue(1));
10677
382
    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10678
382
  }
10679
67.6k
10680
67.6k
  
if (SDValue 67.6k
NewVSel67.6k
= matchVSelectOpSizesWithSetCC(N))
10681
2
    return NewVSel;
10682
67.6k
10683
67.6k
  return SDValue();
10684
67.6k
}
10685
10686
970
SDValue DAGCombiner::visitFCEIL(SDNode *N) {
10687
970
  SDValue N0 = N->getOperand(0);
10688
970
  EVT VT = N->getValueType(0);
10689
970
10690
970
  // fold (fceil c1) -> fceil(c1)
10691
970
  if (isConstantFPBuildVectorOrConstantFP(N0))
10692
0
    return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
10693
970
10694
970
  return SDValue();
10695
970
}
10696
10697
1.13k
SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
10698
1.13k
  SDValue N0 = N->getOperand(0);
10699
1.13k
  EVT VT = N->getValueType(0);
10700
1.13k
10701
1.13k
  // fold (ftrunc c1) -> ftrunc(c1)
10702
1.13k
  if (isConstantFPBuildVectorOrConstantFP(N0))
10703
0
    return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
10704
1.13k
10705
1.13k
  return SDValue();
10706
1.13k
}
10707
10708
1.53k
SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
10709
1.53k
  SDValue N0 = N->getOperand(0);
10710
1.53k
  EVT VT = N->getValueType(0);
10711
1.53k
10712
1.53k
  // fold (ffloor c1) -> ffloor(c1)
10713
1.53k
  if (isConstantFPBuildVectorOrConstantFP(N0))
10714
0
    return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
10715
1.53k
10716
1.53k
  return SDValue();
10717
1.53k
}
10718
10719
// FIXME: FNEG and FABS have a lot in common; refactor.
10720
12.7k
SDValue DAGCombiner::visitFNEG(SDNode *N) {
10721
12.7k
  SDValue N0 = N->getOperand(0);
10722
12.7k
  EVT VT = N->getValueType(0);
10723
12.7k
10724
12.7k
  // Constant fold FNEG.
10725
12.7k
  if (isConstantFPBuildVectorOrConstantFP(N0))
10726
0
    return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
10727
12.7k
10728
12.7k
  
if (12.7k
isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
10729
12.7k
                         &DAG.getTarget().Options))
10730
62
    return GetNegatedExpression(N0, DAG, LegalOperations);
10731
12.6k
10732
12.6k
  // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
10733
12.6k
  // constant pool values.
10734
12.6k
  
if (12.6k
!TLI.isFNegFree(VT) &&
10735
9.98k
      N0.getOpcode() == ISD::BITCAST &&
10736
12.6k
      
N0.getNode()->hasOneUse()240
) {
10737
237
    SDValue Int = N0.getOperand(0);
10738
237
    EVT IntVT = Int.getValueType();
10739
237
    if (
IntVT.isInteger() && 237
!IntVT.isVector()224
) {
10740
30
      APInt SignMask;
10741
30
      if (
N0.getValueType().isVector()30
) {
10742
23
        // For a vector, get a mask such as 0x80... per scalar element
10743
23
        // and splat it.
10744
23
        SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
10745
23
        SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10746
30
      } else {
10747
7
        // For a scalar, just generate 0x80...
10748
7
        SignMask = APInt::getSignMask(IntVT.getSizeInBits());
10749
7
      }
10750
30
      SDLoc DL0(N0);
10751
30
      Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
10752
30
                        DAG.getConstant(SignMask, DL0, IntVT));
10753
30
      AddToWorklist(Int.getNode());
10754
30
      return DAG.getBitcast(VT, Int);
10755
30
    }
10756
12.6k
  }
10757
12.6k
10758
12.6k
  // (fneg (fmul c, x)) -> (fmul -c, x)
10759
12.6k
  
if (12.6k
N0.getOpcode() == ISD::FMUL &&
10760
12.6k
      
(N0.getNode()->hasOneUse() || 563
!TLI.isFNegFree(VT)78
)) {
10761
515
    ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
10762
515
    if (
CFP1515
) {
10763
18
      APFloat CVal = CFP1->getValueAPF();
10764
18
      CVal.changeSign();
10765
18
      if (Level >= AfterLegalizeDAG &&
10766
8
          (TLI.isFPImmLegal(CVal, VT) ||
10767
5
           TLI.isOperationLegal(ISD::ConstantFP, VT)))
10768
3
        return DAG.getNode(
10769
3
            ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
10770
3
            DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
10771
3
            N0->getFlags());
10772
12.6k
    }
10773
515
  }
10774
12.6k
10775
12.6k
  return SDValue();
10776
12.6k
}
10777
10778
2.84k
SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
10779
2.84k
  SDValue N0 = N->getOperand(0);
10780
2.84k
  SDValue N1 = N->getOperand(1);
10781
2.84k
  EVT VT = N->getValueType(0);
10782
2.84k
  const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10783
2.84k
  const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10784
2.84k
10785
2.84k
  if (
N0CFP && 2.84k
N1CFP3
) {
10786
3
    const APFloat &C0 = N0CFP->getValueAPF();
10787
3
    const APFloat &C1 = N1CFP->getValueAPF();
10788
3
    return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
10789
3
  }
10790
2.84k
10791
2.84k
  // Canonicalize to constant on RHS.
10792
2.84k
  
if (2.84k
isConstantFPBuildVectorOrConstantFP(N0) &&
10793
8
     !isConstantFPBuildVectorOrConstantFP(N1))
10794
2
    return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
10795
2.83k
10796
2.83k
  return SDValue();
10797
2.83k
}
10798
10799
2.59k
SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
10800
2.59k
  SDValue N0 = N->getOperand(0);
10801
2.59k
  SDValue N1 = N->getOperand(1);
10802
2.59k
  EVT VT = N->getValueType(0);
10803
2.59k
  const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10804
2.59k
  const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10805
2.59k
10806
2.59k
  if (
N0CFP && 2.59k
N1CFP3
) {
10807
3
    const APFloat &C0 = N0CFP->getValueAPF();
10808
3
    const APFloat &C1 = N1CFP->getValueAPF();
10809
3
    return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
10810
3
  }
10811
2.59k
10812
2.59k
  // Canonicalize to constant on RHS.
10813
2.59k
  
if (2.59k
isConstantFPBuildVectorOrConstantFP(N0) &&
10814
8
     !isConstantFPBuildVectorOrConstantFP(N1))
10815
2
    return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
10816
2.59k
10817
2.59k
  return SDValue();
10818
2.59k
}
10819
10820
8.32k
SDValue DAGCombiner::visitFABS(SDNode *N) {
10821
8.32k
  SDValue N0 = N->getOperand(0);
10822
8.32k
  EVT VT = N->getValueType(0);
10823
8.32k
10824
8.32k
  // fold (fabs c1) -> fabs(c1)
10825
8.32k
  if (isConstantFPBuildVectorOrConstantFP(N0))
10826
0
    return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10827
8.32k
10828
8.32k
  // fold (fabs (fabs x)) -> (fabs x)
10829
8.32k
  
if (8.32k
N0.getOpcode() == ISD::FABS8.32k
)
10830
0
    return N->getOperand(0);
10831
8.32k
10832
8.32k
  // fold (fabs (fneg x)) -> (fabs x)
10833
8.32k
  // fold (fabs (fcopysign x, y)) -> (fabs x)
10834
8.32k
  
if (8.32k
N0.getOpcode() == ISD::FNEG || 8.32k
N0.getOpcode() == ISD::FCOPYSIGN8.32k
)
10835
0
    return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
10836
8.32k
10837
8.32k
  // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
10838
8.32k
  // constant pool values.
10839
8.32k
  
if (8.32k
!TLI.isFAbsFree(VT) &&
10840
6.67k
      N0.getOpcode() == ISD::BITCAST &&
10841
8.32k
      
N0.getNode()->hasOneUse()312
) {
10842
295
    SDValue Int = N0.getOperand(0);
10843
295
    EVT IntVT = Int.getValueType();
10844
295
    if (
IntVT.isInteger() && 295
!IntVT.isVector()284
) {
10845
262
      APInt SignMask;
10846
262
      if (
N0.getValueType().isVector()262
) {
10847
36
        // For a vector, get a mask such as 0x7f... per scalar element
10848
36
        // and splat it.
10849
36
        SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
10850
36
        SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10851
262
      } else {
10852
226
        // For a scalar, just generate 0x7f...
10853
226
        SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
10854
226
      }
10855
262
      SDLoc DL(N0);
10856
262
      Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
10857
262
                        DAG.getConstant(SignMask, DL, IntVT));
10858
262
      AddToWorklist(Int.getNode());
10859
262
      return DAG.getBitcast(N->getValueType(0), Int);
10860
262
    }
10861
8.06k
  }
10862
8.06k
10863
8.06k
  return SDValue();
10864
8.06k
}
10865
10866
2.86M
SDValue DAGCombiner::visitBRCOND(SDNode *N) {
10867
2.86M
  SDValue Chain = N->getOperand(0);
10868
2.86M
  SDValue N1 = N->getOperand(1);
10869
2.86M
  SDValue N2 = N->getOperand(2);
10870
2.86M
10871
2.86M
  // If N is a constant we could fold this into a fallthrough or unconditional
10872
2.86M
  // branch. However that doesn't happen very often in normal code, because
10873
2.86M
  // Instcombine/SimplifyCFG should have handled the available opportunities.
10874
2.86M
  // If we did this folding here, it would be necessary to update the
10875
2.86M
  // MachineBasicBlock CFG, which is awkward.
10876
2.86M
10877
2.86M
  // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
10878
2.86M
  // on the target.
10879
2.86M
  if (N1.getOpcode() == ISD::SETCC &&
10880
1.98M
      TLI.isOperationLegalOrCustom(ISD::BR_CC,
10881
2.86M
                                   N1.getOperand(0).getValueType())) {
10882
1.70M
    return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
10883
1.70M
                       Chain, N1.getOperand(2),
10884
1.70M
                       N1.getOperand(0), N1.getOperand(1), N2);
10885
1.70M
  }
10886
1.15M
10887
1.15M
  
if (1.15M
(N1.hasOneUse() && 1.15M
N1.getOpcode() == ISD::SRL1.15M
) ||
10888
1.15M
      
((N1.getOpcode() == ISD::TRUNCATE && 1.15M
N1.hasOneUse()24.9k
) &&
10889
24.9k
       (N1.getOperand(0).hasOneUse() &&
10890
1.15M
        
N1.getOperand(0).getOpcode() == ISD::SRL24.8k
))) {
10891
392
    SDNode *Trunc = nullptr;
10892
392
    if (
N1.getOpcode() == ISD::TRUNCATE392
) {
10893
54
      // Look pass the truncate.
10894
54
      Trunc = N1.getNode();
10895
54
      N1 = N1.getOperand(0);
10896
54
    }
10897
392
10898
392
    // Match this pattern so that we can generate simpler code:
10899
392
    //
10900
392
    //   %a = ...
10901
392
    //   %b = and i32 %a, 2
10902
392
    //   %c = srl i32 %b, 1
10903
392
    //   brcond i32 %c ...
10904
392
    //
10905
392
    // into
10906
392
    //
10907
392
    //   %a = ...
10908
392
    //   %b = and i32 %a, 2
10909
392
    //   %c = setcc eq %b, 0
10910
392
    //   brcond %c ...
10911
392
    //
10912
392
    // This applies only when the AND constant value has one bit set and the
10913
392
    // SRL constant is equal to the log2 of the AND constant. The back-end is
10914
392
    // smart enough to convert the result into a TEST/JMP sequence.
10915
392
    SDValue Op0 = N1.getOperand(0);
10916
392
    SDValue Op1 = N1.getOperand(1);
10917
392
10918
392
    if (Op0.getOpcode() == ISD::AND &&
10919
392
        
Op1.getOpcode() == ISD::Constant345
) {
10920
345
      SDValue AndOp1 = Op0.getOperand(1);
10921
345
10922
345
      if (
AndOp1.getOpcode() == ISD::Constant345
) {
10923
345
        const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
10924
345
10925
345
        if (AndConst.isPowerOf2() &&
10926
345
            
cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()345
) {
10927
345
          SDLoc DL(N);
10928
345
          SDValue SetCC =
10929
345
            DAG.getSetCC(DL,
10930
345
                         getSetCCResultType(Op0.getValueType()),
10931
345
                         Op0, DAG.getConstant(0, DL, Op0.getValueType()),
10932
345
                         ISD::SETNE);
10933
345
10934
345
          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
10935
345
                                          MVT::Other, Chain, SetCC, N2);
10936
345
          // Don't add the new BRCond into the worklist or else SimplifySelectCC
10937
345
          // will convert it back to (X & C1) >> C2.
10938
345
          CombineTo(N, NewBRCond, false);
10939
345
          // Truncate is dead.
10940
345
          if (Trunc)
10941
29
            deleteAndRecombine(Trunc);
10942
345
          // Replace the uses of SRL with SETCC
10943
345
          WorklistRemover DeadNodes(*this);
10944
345
          DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
10945
345
          deleteAndRecombine(N1.getNode());
10946
345
          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10947
345
        }
10948
47
      }
10949
345
    }
10950
47
10951
47
    
if (47
Trunc47
)
10952
47
      // Restore N1 if the above transformation doesn't match.
10953
25
      N1 = N->getOperand(1);
10954
392
  }
10955
1.15M
10956
1.15M
  // Transform br(xor(x, y)) -> br(x != y)
10957
1.15M
  // Transform br(xor(xor(x,y), 1)) -> br (x == y)
10958
1.15M
  
if (1.15M
N1.hasOneUse() && 1.15M
N1.getOpcode() == ISD::XOR1.15M
) {
10959
805k
    SDNode *TheXor = N1.getNode();
10960
805k
    SDValue Op0 = TheXor->getOperand(0);
10961
805k
    SDValue Op1 = TheXor->getOperand(1);
10962
805k
    if (
Op0.getOpcode() == Op1.getOpcode()805k
) {
10963
152
      // Avoid missing important xor optimizations.
10964
152
      if (SDValue 
Tmp152
= visitXOR(TheXor)) {
10965
1
        if (
Tmp.getNode() != TheXor1
) {
10966
0
          DEBUG(dbgs() << "\nReplacing.8 ";
10967
0
                TheXor->dump(&DAG);
10968
0
                dbgs() << "\nWith: ";
10969
0
                Tmp.getNode()->dump(&DAG);
10970
0
                dbgs() << '\n');
10971
0
          WorklistRemover DeadNodes(*this);
10972
0
          DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
10973
0
          deleteAndRecombine(TheXor);
10974
0
          return DAG.getNode(ISD::BRCOND, SDLoc(N),
10975
0
                             MVT::Other, Chain, Tmp, N2);
10976
0
        }
10977
1
10978
1
        // visitXOR has changed XOR's operands or replaced the XOR completely,
10979
1
        // bail out.
10980
1
        return SDValue(N, 0);
10981
1
      }
10982
152
    }
10983
805k
10984
805k
    
if (805k
Op0.getOpcode() != ISD::SETCC && 805k
Op1.getOpcode() != ISD::SETCC41.0k
) {
10985
41.0k
      bool Equal = false;
10986
41.0k
      if (
isOneConstant(Op0) && 41.0k
Op0.hasOneUse()0
&&
10987
41.0k
          
Op0.getOpcode() == ISD::XOR0
) {
10988
0
        TheXor = Op0.getNode();
10989
0
        Equal = true;
10990
0
      }
10991
41.0k
10992
41.0k
      EVT SetCCVT = N1.getValueType();
10993
41.0k
      if (LegalTypes)
10994
27
        SetCCVT = getSetCCResultType(SetCCVT);
10995
41.0k
      SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
10996
41.0k
                                   SetCCVT,
10997
41.0k
                                   Op0, Op1,
10998
41.0k
                                   Equal ? 
ISD::SETEQ0
:
ISD::SETNE41.0k
);
10999
41.0k
      // Replace the uses of XOR with SETCC
11000
41.0k
      WorklistRemover DeadNodes(*this);
11001
41.0k
      DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
11002
41.0k
      deleteAndRecombine(N1.getNode());
11003
41.0k
      return DAG.getNode(ISD::BRCOND, SDLoc(N),
11004
41.0k
                         MVT::Other, Chain, SetCC, N2);
11005
41.0k
    }
11006
1.11M
  }
11007
1.11M
11008
1.11M
  return SDValue();
11009
1.11M
}
11010
11011
// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
11012
//
11013
2.09M
SDValue DAGCombiner::visitBR_CC(SDNode *N) {
11014
2.09M
  CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
11015
2.09M
  SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
11016
2.09M
11017
2.09M
  // If N is a constant we could fold this into a fallthrough or unconditional
11018
2.09M
  // branch. However that doesn't happen very often in normal code, because
11019
2.09M
  // Instcombine/SimplifyCFG should have handled the available opportunities.
11020
2.09M
  // If we did this folding here, it would be necessary to update the
11021
2.09M
  // MachineBasicBlock CFG, which is awkward.
11022
2.09M
11023
2.09M
  // Use SimplifySetCC to simplify SETCC's.
11024
2.09M
  SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
11025
2.09M
                               CondLHS, CondRHS, CC->get(), SDLoc(N),
11026
2.09M
                               false);
11027
2.09M
  if (
Simp.getNode()2.09M
)
AddToWorklist(Simp.getNode())172k
;
11028
2.09M
11029
2.09M
  // fold to a simpler setcc
11030
2.09M
  if (
Simp.getNode() && 2.09M
Simp.getOpcode() == ISD::SETCC172k
)
11031
171k
    return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
11032
171k
                       N->getOperand(0), Simp.getOperand(2),
11033
171k
                       Simp.getOperand(0), Simp.getOperand(1),
11034
171k
                       N->getOperand(4));
11035
1.92M
11036
1.92M
  return SDValue();
11037
1.92M
}
11038
11039
/// Return true if 'Use' is a load or a store that uses N as its base pointer
11040
/// and that N may be folded in the load / store addressing mode.
11041
static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
11042
                                    SelectionDAG &DAG,
11043
940k
                                    const TargetLowering &TLI) {
11044
940k
  EVT VT;
11045
940k
  unsigned AS;
11046
940k
11047
940k
  if (LoadSDNode *
LD940k
= dyn_cast<LoadSDNode>(Use)) {
11048
306k
    if (
LD->isIndexed() || 306k
LD->getBasePtr().getNode() != N306k
)
11049
4
      return false;
11050
306k
    VT = LD->getMemoryVT();
11051
306k
    AS = LD->getAddressSpace();
11052
940k
  } else 
if (StoreSDNode *634k
ST634k
= dyn_cast<StoreSDNode>(Use)) {
11053
436k
    if (
ST->isIndexed() || 436k
ST->getBasePtr().getNode() != N436k
)
11054
2.30k
      return false;
11055
433k
    VT = ST->getMemoryVT();
11056
433k
    AS = ST->getAddressSpace();
11057
433k
  } else
11058
198k
    return false;
11059
739k
11060
739k
  TargetLowering::AddrMode AM;
11061
739k
  if (
N->getOpcode() == ISD::ADD739k
) {
11062
739k
    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
11063
739k
    if (Offset)
11064
739k
      // [reg +/- imm]
11065
739k
      AM.BaseOffs = Offset->getSExtValue();
11066
739k
    else
11067
739k
      // [reg +/- reg]
11068
646
      AM.Scale = 1;
11069
0
  } else 
if (0
N->getOpcode() == ISD::SUB0
) {
11070
0
    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
11071
0
    if (Offset)
11072
0
      // [reg +/- imm]
11073
0
      AM.BaseOffs = -Offset->getSExtValue();
11074
0
    else
11075
0
      // [reg +/- reg]
11076
0
      AM.Scale = 1;
11077
0
  } else
11078
0
    return false;
11079
739k
11080
739k
  return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
11081
739k
                                   VT.getTypeForEVT(*DAG.getContext()), AS);
11082
739k
}
11083
11084
/// Try turning a load/store into a pre-indexed load/store when the base
11085
/// pointer is an add or subtract and it has other uses besides the load/store.
11086
/// After the transformation, the new indexed load/store has effectively folded
11087
/// the add/subtract in and all of its other uses are redirected to the
11088
/// new load/store.
11089
14.0M
bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
11090
14.0M
  if (Level < AfterLegalizeDAG)
11091
9.85M
    return false;
11092
4.23M
11093
4.23M
  bool isLoad = true;
11094
4.23M
  SDValue Ptr;
11095
4.23M
  EVT VT;
11096
4.23M
  if (LoadSDNode *
LD4.23M
= dyn_cast<LoadSDNode>(N)) {
11097
2.29M
    if (LD->isIndexed())
11098
10.3k
      return false;
11099
2.28M
    VT = LD->getMemoryVT();
11100
2.28M
    if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
11101
265k
        !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
11102
265k
      return false;
11103
2.02M
    Ptr = LD->getBasePtr();
11104
4.23M
  } else 
if (StoreSDNode *1.94M
ST1.94M
= dyn_cast<StoreSDNode>(N)) {
11105
1.94M
    if (ST->isIndexed())
11106
12.8k
      return false;
11107
1.92M
    VT = ST->getMemoryVT();
11108
1.92M
    if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
11109
317k
        !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
11110
317k
      return false;
11111
1.61M
    Ptr = ST->getBasePtr();
11112
1.61M
    isLoad = false;
11113
1.94M
  } else {
11114
0
    return false;
11115
0
  }
11116
3.63M
11117
3.63M
  // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
11118
3.63M
  // out.  There is no reason to make this a preinc/predec.
11119
3.63M
  
if (3.63M
(Ptr.getOpcode() != ISD::ADD && 3.63M
Ptr.getOpcode() != ISD::SUB994k
) ||
11120
2.63M
      Ptr.getNode()->hasOneUse())
11121
3.28M
    return false;
11122
346k
11123
346k
  // Ask the target to do addressing mode selection.
11124
346k
  SDValue BasePtr;
11125
346k
  SDValue Offset;
11126
346k
  ISD::MemIndexedMode AM = ISD::UNINDEXED;
11127
346k
  if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
11128
109k
    return false;
11129
237k
11130
237k
  // Backends without true r+i pre-indexed forms may need to pass a
11131
237k
  // constant base with a variable offset so that constant coercion
11132
237k
  // will work with the patterns in canonical form.
11133
237k
  bool Swapped = false;
11134
237k
  if (
isa<ConstantSDNode>(BasePtr)237k
) {
11135
1
    std::swap(BasePtr, Offset);
11136
1
    Swapped = true;
11137
1
  }
11138
237k
11139
237k
  // Don't create a indexed load / store with zero offset.
11140
237k
  if (isNullConstant(Offset))
11141
0
    return false;
11142
237k
11143
237k
  // Try turning it into a pre-indexed load / store except when:
11144
237k
  // 1) The new base ptr is a frame index.
11145
237k
  // 2) If N is a store and the new base ptr is either the same as or is a
11146
237k
  //    predecessor of the value being stored.
11147
237k
  // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
11148
237k
  //    that would create a cycle.
11149
237k
  // 4) All uses are load / store ops that use it as old base ptr.
11150
237k
11151
237k
  // Check #1.  Preinc'ing a frame index would require copying the stack pointer
11152
237k
  // (plus the implicit offset) to a register to preinc anyway.
11153
237k
  
if (237k
isa<FrameIndexSDNode>(BasePtr) || 237k
isa<RegisterSDNode>(BasePtr)230k
)
11154
7.32k
    return false;
11155
230k
11156
230k
  // Check #2.
11157
230k
  
if (230k
!isLoad230k
) {
11158
90.4k
    SDValue Val = cast<StoreSDNode>(N)->getValue();
11159
90.4k
    if (
Val == BasePtr || 90.4k
BasePtr.getNode()->isPredecessorOf(Val.getNode())89.9k
)
11160
63.5k
      return false;
11161
166k
  }
11162
166k
11163
166k
  // Caches for hasPredecessorHelper.
11164
166k
  SmallPtrSet<const SDNode *, 32> Visited;
11165
166k
  SmallVector<const SDNode *, 16> Worklist;
11166
166k
  Worklist.push_back(N);
11167
166k
11168
166k
  // If the offset is a constant, there may be other adds of constants that
11169
166k
  // can be folded with this one. We should do this to avoid having to keep
11170
166k
  // a copy of the original base pointer.
11171
166k
  SmallVector<SDNode *, 16> OtherUses;
11172
166k
  if (isa<ConstantSDNode>(Offset))
11173
166k
    for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
11174
166k
                              UE = BasePtr.getNode()->use_end();
11175
642k
         
UI != UE642k
;
++UI476k
) {
11176
520k
      SDUse &Use = UI.getUse();
11177
520k
      // Skip the use that is Ptr and uses of other results from BasePtr's
11178
520k
      // node (important for nodes that return multiple results).
11179
520k
      if (
Use.getUser() == Ptr.getNode() || 520k
Use != BasePtr376k
)
11180
152k
        continue;
11181
368k
11182
368k
      
if (368k
SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist)368k
)
11183
110k
        continue;
11184
257k
11185
257k
      
if (257k
Use.getUser()->getOpcode() != ISD::ADD &&
11186
257k
          
Use.getUser()->getOpcode() != ISD::SUB43.9k
) {
11187
43.9k
        OtherUses.clear();
11188
43.9k
        break;
11189
43.9k
      }
11190
213k
11191
213k
      SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
11192
213k
      if (
!isa<ConstantSDNode>(Op1)213k
) {
11193
251
        OtherUses.clear();
11194
251
        break;
11195
251
      }
11196
213k
11197
213k
      // FIXME: In some cases, we can be smarter about this.
11198
213k
      
if (213k
Op1.getValueType() != Offset.getValueType()213k
) {
11199
0
        OtherUses.clear();
11200
0
        break;
11201
0
      }
11202
213k
11203
213k
      OtherUses.push_back(Use.getUser());
11204
213k
    }
11205
166k
11206
166k
  if (Swapped)
11207
1
    std::swap(BasePtr, Offset);
11208
166k
11209
166k
  // Now check for #3 and #4.
11210
166k
  bool RealUse = false;
11211
166k
11212
348k
  for (SDNode *Use : Ptr.getNode()->uses()) {
11213
348k
    if (Use == N)
11214
150k
      continue;
11215
198k
    
if (198k
SDNode::hasPredecessorHelper(Use, Visited, Worklist)198k
)
11216
45.9k
      return false;
11217
152k
11218
152k
    // If Ptr may be folded in addressing mode of other use, then it's
11219
152k
    // not profitable to do this transformation.
11220
152k
    
if (152k
!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)152k
)
11221
36.9k
      RealUse = true;
11222
348k
  }
11223
166k
11224
120k
  
if (120k
!RealUse120k
)
11225
87.2k
    return false;
11226
33.4k
11227
33.4k
  SDValue Result;
11228
33.4k
  if (isLoad)
11229
27.7k
    Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11230
27.7k
                                BasePtr, Offset, AM);
11231
33.4k
  else
11232
5.71k
    Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11233
5.71k
                                 BasePtr, Offset, AM);
11234
33.4k
  ++PreIndexedNodes;
11235
33.4k
  ++NodesCombined;
11236
33.4k
  DEBUG(dbgs() << "\nReplacing.4 ";
11237
33.4k
        N->dump(&DAG);
11238
33.4k
        dbgs() << "\nWith: ";
11239
33.4k
        Result.getNode()->dump(&DAG);
11240
33.4k
        dbgs() << '\n');
11241
33.4k
  WorklistRemover DeadNodes(*this);
11242
33.4k
  if (
isLoad33.4k
) {
11243
27.7k
    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11244
27.7k
    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11245
33.4k
  } else {
11246
5.71k
    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11247
5.71k
  }
11248
33.4k
11249
33.4k
  // Finally, since the node is now dead, remove it from the graph.
11250
33.4k
  deleteAndRecombine(N);
11251
33.4k
11252
33.4k
  if (Swapped)
11253
1
    std::swap(BasePtr, Offset);
11254
33.4k
11255
33.4k
  // Replace other uses of BasePtr that can be updated to use Ptr
11256
35.9k
  for (unsigned i = 0, e = OtherUses.size(); 
i != e35.9k
;
++i2.53k
) {
11257
2.53k
    unsigned OffsetIdx = 1;
11258
2.53k
    if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
11259
0
      OffsetIdx = 0;
11260
2.53k
    assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
11261
2.53k
           BasePtr.getNode() && "Expected BasePtr operand");
11262
2.53k
11263
2.53k
    // We need to replace ptr0 in the following expression:
11264
2.53k
    //   x0 * offset0 + y0 * ptr0 = t0
11265
2.53k
    // knowing that
11266
2.53k
    //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
11267
2.53k
    //
11268
2.53k
    // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
11269
2.53k
    // indexed load/store and the expression that needs to be re-written.
11270
2.53k
    //
11271
2.53k
    // Therefore, we have:
11272
2.53k
    //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
11273
2.53k
11274
2.53k
    ConstantSDNode *CN =
11275
2.53k
      cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
11276
2.53k
    int X0, X1, Y0, Y1;
11277
2.53k
    const APInt &Offset0 = CN->getAPIntValue();
11278
2.53k
    APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
11279
2.53k
11280
2.53k
    X0 = (OtherUses[i]->getOpcode() == ISD::SUB && 
OffsetIdx == 10
) ?
-10
:
12.53k
;
11281
2.53k
    Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && 
OffsetIdx == 00
) ?
-10
:
12.53k
;
11282
2.53k
    X1 = (AM == ISD::PRE_DEC && 
!Swapped12
) ?
-112
:
12.52k
;
11283
2.53k
    Y1 = (AM == ISD::PRE_DEC && 
Swapped12
) ?
-10
:
12.53k
;
11284
2.53k
11285
2.53k
    unsigned Opcode = (Y0 * Y1 < 0) ? 
ISD::SUB0
:
ISD::ADD2.53k
;
11286
2.53k
11287
2.53k
    APInt CNV = Offset0;
11288
2.53k
    if (
X0 < 02.53k
)
CNV = -CNV0
;
11289
2.53k
    if (
X1 * Y0 * Y1 < 02.53k
)
CNV = CNV + Offset112
;
11290
2.52k
    else CNV = CNV - Offset1;
11291
2.53k
11292
2.53k
    SDLoc DL(OtherUses[i]);
11293
2.53k
11294
2.53k
    // We can now generate the new expression.
11295
2.53k
    SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
11296
2.53k
    SDValue NewOp2 = Result.getValue(isLoad ? 
11.43k
:
01.10k
);
11297
2.53k
11298
2.53k
    SDValue NewUse = DAG.getNode(Opcode,
11299
2.53k
                                 DL,
11300
2.53k
                                 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
11301
2.53k
    DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
11302
2.53k
    deleteAndRecombine(OtherUses[i]);
11303
2.53k
  }
11304
33.4k
11305
33.4k
  // Replace the uses of Ptr with uses of the updated base value.
11306
33.4k
  DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 
127.7k
:
05.71k
));
11307
14.0M
  deleteAndRecombine(Ptr.getNode());
11308
14.0M
11309
14.0M
  return true;
11310
14.0M
}
11311
11312
/// Try to combine a load/store with a add/sub of the base pointer node into a
11313
/// post-indexed load/store. The transformation folded the add/subtract into the
11314
/// new indexed load/store effectively and all of its uses are redirected to the
11315
/// new load/store.
11316
14.0M
bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
11317
14.0M
  if (Level < AfterLegalizeDAG)
11318
9.85M
    return false;
11319
4.20M
11320
4.20M
  bool isLoad = true;
11321
4.20M
  SDValue Ptr;
11322
4.20M
  EVT VT;
11323
4.20M
  if (LoadSDNode *
LD4.20M
= dyn_cast<LoadSDNode>(N)) {
11324
2.26M
    if (LD->isIndexed())
11325
10.3k
      return false;
11326
2.25M
    VT = LD->getMemoryVT();
11327
2.25M
    if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
11328
260k
        !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
11329
260k
      return false;
11330
1.99M
    Ptr = LD->getBasePtr();
11331
4.20M
  } else 
if (StoreSDNode *1.93M
ST1.93M
= dyn_cast<StoreSDNode>(N)) {
11332
1.93M
    if (ST->isIndexed())
11333
12.8k
      return false;
11334
1.92M
    VT = ST->getMemoryVT();
11335
1.92M
    if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
11336
317k
        !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
11337
317k
      return false;
11338
1.60M
    Ptr = ST->getBasePtr();
11339
1.60M
    isLoad = false;
11340
1.93M
  } else {
11341
0
    return false;
11342
0
  }
11343
3.60M
11344
3.60M
  
if (3.60M
Ptr.getNode()->hasOneUse()3.60M
)
11345
2.84M
    return false;
11346
760k
11347
760k
  
for (SDNode *Op : Ptr.getNode()->uses()) 760k
{
11348
3.18M
    if (Op == N ||
11349
2.45M
        
(Op->getOpcode() != ISD::ADD && 2.45M
Op->getOpcode() != ISD::SUB1.47M
))
11350
2.19M
      continue;
11351
986k
11352
986k
    SDValue BasePtr;
11353
986k
    SDValue Offset;
11354
986k
    ISD::MemIndexedMode AM = ISD::UNINDEXED;
11355
986k
    if (
TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)986k
) {
11356
811k
      // Don't create a indexed load / store with zero offset.
11357
811k
      if (isNullConstant(Offset))
11358
0
        continue;
11359
811k
11360
811k
      // Try turning it into a post-indexed load / store except when
11361
811k
      // 1) All uses are load / store ops that use it as base ptr (and
11362
811k
      //    it may be folded as addressing mmode).
11363
811k
      // 2) Op must be independent of N, i.e. Op is neither a predecessor
11364
811k
      //    nor a successor of N. Otherwise, if Op is folded that would
11365
811k
      //    create a cycle.
11366
811k
11367
811k
      
if (811k
isa<FrameIndexSDNode>(BasePtr) || 811k
isa<RegisterSDNode>(BasePtr)623k
)
11368
188k
        continue;
11369
623k
11370
623k
      // Check for #1.
11371
623k
      bool TryNext = false;
11372
1.07M
      for (SDNode *Use : BasePtr.getNode()->uses()) {
11373
1.07M
        if (Use == Ptr.getNode())
11374
0
          continue;
11375
1.07M
11376
1.07M
        // If all the uses are load / store addresses, then don't do the
11377
1.07M
        // transformation.
11378
1.07M
        
if (1.07M
Use->getOpcode() == ISD::ADD || 1.07M
Use->getOpcode() == ISD::SUB346k
){
11379
729k
          bool RealUse = false;
11380
788k
          for (SDNode *UseUse : Use->uses()) {
11381
788k
            if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
11382
165k
              RealUse = true;
11383
788k
          }
11384
729k
11385
729k
          if (
!RealUse729k
) {
11386
565k
            TryNext = true;
11387
565k
            break;
11388
565k
          }
11389
623k
        }
11390
1.07M
      }
11391
623k
11392
623k
      if (TryNext)
11393
565k
        continue;
11394
57.0k
11395
57.0k
      // Check for #2
11396
57.0k
      
if (57.0k
!Op->isPredecessorOf(N) && 57.0k
!N->isPredecessorOf(Op)55.0k
) {
11397
55.0k
        SDValue Result = isLoad
11398
37.1k
          ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11399
37.1k
                               BasePtr, Offset, AM)
11400
17.8k
          : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11401
17.8k
                                BasePtr, Offset, AM);
11402
55.0k
        ++PostIndexedNodes;
11403
55.0k
        ++NodesCombined;
11404
55.0k
        DEBUG(dbgs() << "\nReplacing.5 ";
11405
55.0k
              N->dump(&DAG);
11406
55.0k
              dbgs() << "\nWith: ";
11407
55.0k
              Result.getNode()->dump(&DAG);
11408
55.0k
              dbgs() << '\n');
11409
55.0k
        WorklistRemover DeadNodes(*this);
11410
55.0k
        if (
isLoad55.0k
) {
11411
37.1k
          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11412
37.1k
          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11413
55.0k
        } else {
11414
17.8k
          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11415
17.8k
        }
11416
55.0k
11417
55.0k
        // Finally, since the node is now dead, remove it from the graph.
11418
55.0k
        deleteAndRecombine(N);
11419
55.0k
11420
55.0k
        // Replace the uses of Use with uses of the updated base value.
11421
55.0k
        DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
11422
55.0k
                                      Result.getValue(isLoad ? 
137.1k
:
017.8k
));
11423
55.0k
        deleteAndRecombine(Op);
11424
55.0k
        return true;
11425
55.0k
      }
11426
705k
    }
11427
3.18M
  }
11428
705k
11429
705k
  return false;
11430
705k
}
11431
11432
/// \brief Return the base-pointer arithmetic from an indexed \p LD.
11433
4
SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
11434
4
  ISD::MemIndexedMode AM = LD->getAddressingMode();
11435
4
  assert(AM != ISD::UNINDEXED);
11436
4
  SDValue BP = LD->getOperand(1);
11437
4
  SDValue Inc = LD->getOperand(2);
11438
4
11439
4
  // Some backends use TargetConstants for load offsets, but don't expect
11440
4
  // TargetConstants in general ADD nodes. We can convert these constants into
11441
4
  // regular Constants (if the constant is not opaque).
11442
4
  assert((Inc.getOpcode() != ISD::TargetConstant ||
11443
4
          !cast<ConstantSDNode>(Inc)->isOpaque()) &&
11444
4
         "Cannot split out indexing using opaque target constants");
11445
4
  if (
Inc.getOpcode() == ISD::TargetConstant4
) {
11446
3
    ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
11447
3
    Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
11448
3
                          ConstInc->getValueType(0));
11449
3
  }
11450
4
11451
4
  unsigned Opc =
11452
4
      (AM == ISD::PRE_INC || 
AM == ISD::POST_INC0
?
ISD::ADD4
:
ISD::SUB0
);
11453
4
  return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
11454
4
}
11455
11456
6.04M
SDValue DAGCombiner::visitLOAD(SDNode *N) {
11457
6.04M
  LoadSDNode *LD  = cast<LoadSDNode>(N);
11458
6.04M
  SDValue Chain = LD->getChain();
11459
6.04M
  SDValue Ptr   = LD->getBasePtr();
11460
6.04M
11461
6.04M
  // If load is not volatile and there are no uses of the loaded value (and
11462
6.04M
  // the updated indexed value in case of indexed loads), change uses of the
11463
6.04M
  // chain value into uses of the chain input (i.e. delete the dead load).
11464
6.04M
  if (
!LD->isVolatile()6.04M
) {
11465
6.01M
    if (
N->getValueType(1) == MVT::Other6.01M
) {
11466
6.00M
      // Unindexed loads.
11467
6.00M
      if (
!N->hasAnyUseOfValue(0)6.00M
) {
11468
5.21k
        // It's not safe to use the two value CombineTo variant here. e.g.
11469
5.21k
        // v1, chain2 = load chain1, loc
11470
5.21k
        // v2, chain3 = load chain2, loc
11471
5.21k
        // v3         = add v2, c
11472
5.21k
        // Now we replace use of chain2 with chain1.  This makes the second load
11473
5.21k
        // isomorphic to the one we are deleting, and thus makes this load live.
11474
5.21k
        DEBUG(dbgs() << "\nReplacing.6 ";
11475
5.21k
              N->dump(&DAG);
11476
5.21k
              dbgs() << "\nWith chain: ";
11477
5.21k
              Chain.getNode()->dump(&DAG);
11478
5.21k
              dbgs() << "\n");
11479
5.21k
        WorklistRemover DeadNodes(*this);
11480
5.21k
        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11481
5.21k
        AddUsersToWorklist(Chain.getNode());
11482
5.21k
        if (N->use_empty())
11483
5.21k
          deleteAndRecombine(N);
11484
5.21k
11485
5.21k
        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11486
5.21k
      }
11487
10.3k
    } else {
11488
10.3k
      // Indexed loads.
11489
10.3k
      assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
11490
10.3k
11491
10.3k
      // If this load has an opaque TargetConstant offset, then we cannot split
11492
10.3k
      // the indexing into an add/sub directly (that TargetConstant may not be
11493
10.3k
      // valid for a different type of node, and we cannot convert an opaque
11494
10.3k
      // target constant into a regular constant).
11495
10.3k
      bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
11496
49
                       cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
11497
10.3k
11498
10.3k
      if (!N->hasAnyUseOfValue(0) &&
11499
10.3k
          
((MaySplitLoadIndex && 4
!HasOTCInc4
) ||
!N->hasAnyUseOfValue(1)0
)) {
11500
4
        SDValue Undef = DAG.getUNDEF(N->getValueType(0));
11501
4
        SDValue Index;
11502
4
        if (
N->hasAnyUseOfValue(1) && 4
MaySplitLoadIndex4
&&
!HasOTCInc4
) {
11503
4
          Index = SplitIndexingFromLoad(LD);
11504
4
          // Try to fold the base pointer arithmetic into subsequent loads and
11505
4
          // stores.
11506
4
          AddUsersToWorklist(N);
11507
4
        } else
11508
0
          Index = DAG.getUNDEF(N->getValueType(1));
11509
4
        DEBUG(dbgs() << "\nReplacing.7 ";
11510
4
              N->dump(&DAG);
11511
4
              dbgs() << "\nWith: ";
11512
4
              Undef.getNode()->dump(&DAG);
11513
4
              dbgs() << " and 2 other values\n");
11514
4
        WorklistRemover DeadNodes(*this);
11515
4
        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
11516
4
        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
11517
4
        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
11518
4
        deleteAndRecombine(N);
11519
4
        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11520
4
      }
11521
6.04M
    }
11522
6.01M
  }
11523
6.04M
11524
6.04M
  // If this load is directly stored, replace the load value with the stored
11525
6.04M
  // value.
11526
6.04M
  // TODO: Handle store large -> read small portion.
11527
6.04M
  // TODO: Handle TRUNCSTORE/LOADEXT
11528
6.04M
  
if (6.04M
OptLevel != CodeGenOpt::None &&
11529
6.04M
      
ISD::isNormalLoad(N)6.02M
&&
!LD->isVolatile()4.52M
) {
11530
4.49M
    if (
ISD::isNON_TRUNCStore(Chain.getNode())4.49M
) {
11531
407k
      StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
11532
407k
      if (PrevST->getBasePtr() == Ptr &&
11533
7.49k
          PrevST->getValue().getValueType() == N->getValueType(0))
11534
4.30k
        return CombineTo(N, PrevST->getOperand(1), Chain);
11535
6.03M
    }
11536
4.49M
  }
11537
6.03M
11538
6.03M
  // Try to infer better alignment information than the load already has.
11539
6.03M
  
if (6.03M
OptLevel != CodeGenOpt::None && 6.03M
LD->isUnindexed()6.02M
) {
11540
6.00M
    if (unsigned 
Align6.00M
= DAG.InferPtrAlignment(Ptr)) {
11541
883k
      if (
Align > LD->getMemOperand()->getBaseAlignment()883k
) {
11542
51.6k
        SDValue NewLoad = DAG.getExtLoad(
11543
51.6k
            LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
11544
51.6k
            LD->getPointerInfo(), LD->getMemoryVT(), Align,
11545
51.6k
            LD->getMemOperand()->getFlags(), LD->getAAInfo());
11546
51.6k
        if (NewLoad.getNode() != N)
11547
0
          return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
11548
6.03M
      }
11549
883k
    }
11550
6.00M
  }
11551
6.03M
11552
6.03M
  
if (6.03M
LD->isUnindexed()6.03M
) {
11553
6.02M
    // Walk up chain skipping non-aliasing memory nodes.
11554
6.02M
    SDValue BetterChain = FindBetterChain(N, Chain);
11555
6.02M
11556
6.02M
    // If there is a better chain.
11557
6.02M
    if (
Chain != BetterChain6.02M
) {
11558
272k
      SDValue ReplLoad;
11559
272k
11560
272k
      // Replace the chain to void dependency.
11561
272k
      if (
LD->getExtensionType() == ISD::NON_EXTLOAD272k
) {
11562
224k
        ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
11563
224k
                               BetterChain, Ptr, LD->getMemOperand());
11564
272k
      } else {
11565
47.9k
        ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
11566
47.9k
                                  LD->getValueType(0),
11567
47.9k
                                  BetterChain, Ptr, LD->getMemoryVT(),
11568
47.9k
                                  LD->getMemOperand());
11569
47.9k
      }
11570
272k
11571
272k
      // Create token factor to keep old chain connected.
11572
272k
      SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
11573
272k
                                  MVT::Other, Chain, ReplLoad.getValue(1));
11574
272k
11575
272k
      // Replace uses with load result and token factor
11576
272k
      return CombineTo(N, ReplLoad.getValue(0), Token);
11577
272k
    }
11578
5.76M
  }
11579
5.76M
11580
5.76M
  // Try transforming N to an indexed load.
11581
5.76M
  
if (5.76M
CombineToPreIndexedLoadStore(N) || 5.76M
CombineToPostIndexedLoadStore(N)5.73M
)
11582
64.8k
    return SDValue(N, 0);
11583
5.69M
11584
5.69M
  // Try to slice up N to more direct loads if the slices are mapped to
11585
5.69M
  // different register banks or pairing can take place.
11586
5.69M
  
if (5.69M
SliceUpLoad(N)5.69M
)
11587
16.7k
    return SDValue(N, 0);
11588
5.68M
11589
5.68M
  return SDValue();
11590
5.68M
}
11591
11592
namespace {
11593
11594
/// \brief Helper structure used to slice a load in smaller loads.
11595
/// Basically a slice is obtained from the following sequence:
11596
/// Origin = load Ty1, Base
11597
/// Shift = srl Ty1 Origin, CstTy Amount
11598
/// Inst = trunc Shift to Ty2
11599
///
11600
/// Then, it will be rewritten into:
11601
/// Slice = load SliceTy, Base + SliceOffset
11602
/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
11603
///
11604
/// SliceTy is deduced from the number of bits that are actually used to
11605
/// build Inst.
11606
struct LoadedSlice {
11607
  /// \brief Helper structure used to compute the cost of a slice.
11608
  struct Cost {
11609
    /// Are we optimizing for code size.
11610
    bool ForCodeSize;
11611
11612
    /// Various cost.
11613
    unsigned Loads = 0;
11614
    unsigned Truncates = 0;
11615
    unsigned CrossRegisterBanksCopies = 0;
11616
    unsigned ZExts = 0;
11617
    unsigned Shift = 0;
11618
11619
33.5k
    Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
11620
11621
    /// \brief Get the cost of one isolated slice.
11622
    Cost(const LoadedSlice &LS, bool ForCodeSize = false)
11623
33.5k
        : ForCodeSize(ForCodeSize), Loads(1) {
11624
33.5k
      EVT TruncType = LS.Inst->getValueType(0);
11625
33.5k
      EVT LoadedType = LS.getLoadedType();
11626
33.5k
      if (TruncType != LoadedType &&
11627
0
          !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
11628
0
        ZExts = 1;
11629
33.5k
    }
11630
11631
    /// \brief Account for slicing gain in the current cost.
11632
    /// Slicing provide a few gains like removing a shift or a
11633
    /// truncate. This method allows to grow the cost of the original
11634
    /// load with the gain from this slice.
11635
33.5k
    void addSliceGain(const LoadedSlice &LS) {
11636
33.5k
      // Each slice saves a truncate.
11637
33.5k
      const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
11638
33.5k
      if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
11639
33.5k
                              LS.Inst->getValueType(0)))
11640
12
        ++Truncates;
11641
33.5k
      // If there is a shift amount, this slice gets rid of it.
11642
33.5k
      if (LS.Shift)
11643
16.7k
        ++Shift;
11644
33.5k
      // If this slice can merge a cross register bank copy, account for it.
11645
33.5k
      if (LS.canMergeExpensiveCrossRegisterBankCopy())
11646
4
        ++CrossRegisterBanksCopies;
11647
33.5k
    }
11648
11649
33.5k
    Cost &operator+=(const Cost &RHS) {
11650
33.5k
      Loads += RHS.Loads;
11651
33.5k
      Truncates += RHS.Truncates;
11652
33.5k
      CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
11653
33.5k
      ZExts += RHS.ZExts;
11654
33.5k
      Shift += RHS.Shift;
11655
33.5k
      return *this;
11656
33.5k
    }
11657
11658
0
    bool operator==(const Cost &RHS) const {
11659
0
      return Loads == RHS.Loads && Truncates == RHS.Truncates &&
11660
0
             CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
11661
0
             ZExts == RHS.ZExts && Shift == RHS.Shift;
11662
0
    }
11663
11664
0
    bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
11665
11666
16.7k
    bool operator<(const Cost &RHS) const {
11667
16.7k
      // Assume cross register banks copies are as expensive as loads.
11668
16.7k
      // FIXME: Do we want some more target hooks?
11669
16.7k
      unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
11670
16.7k
      unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
11671
16.7k
      // Unless we are optimizing for code size, consider the
11672
16.7k
      // expensive operation first.
11673
16.7k
      if (
!ForCodeSize && 16.7k
ExpensiveOpsLHS != ExpensiveOpsRHS16.7k
)
11674
41
        return ExpensiveOpsLHS < ExpensiveOpsRHS;
11675
16.7k
      return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
11676
16.7k
             (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
11677
16.7k
    }
11678
11679
16.7k
    bool operator>(const Cost &RHS) const { return RHS < *this; }
11680
11681
0
    bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
11682
11683
0
    bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
11684
  };
11685
11686
  // The last instruction that represent the slice. This should be a
11687
  // truncate instruction.
11688
  SDNode *Inst;
11689
11690
  // The original load instruction.
11691
  LoadSDNode *Origin;
11692
11693
  // The right shift amount in bits from the original load.
11694
  unsigned Shift;
11695
11696
  // The DAG from which Origin came from.
11697
  // This is used to get some contextual information about legal types, etc.
11698
  SelectionDAG *DAG;
11699
11700
  LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
11701
              unsigned Shift = 0, SelectionDAG *DAG = nullptr)
11702
37.7k
      : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
11703
11704
  /// \brief Get the bits used in a chunk of bits \p BitWidth large.
11705
  /// \return Result is \p BitWidth and has used bits set to 1 and
11706
  ///         not used bits set to 0.
11707
209k
  APInt getUsedBits() const {
11708
209k
    // Reproduce the trunc(lshr) sequence:
11709
209k
    // - Start from the truncated value.
11710
209k
    // - Zero extend to the desired bit width.
11711
209k
    // - Shift left.
11712
209k
    assert(Origin && "No original load to compare against.");
11713
209k
    unsigned BitWidth = Origin->getValueSizeInBits(0);
11714
209k
    assert(Inst && "This slice is not bound to an instruction");
11715
209k
    assert(Inst->getValueSizeInBits(0) <= BitWidth &&
11716
209k
           "Extracted slice is bigger than the whole type!");
11717
209k
    APInt UsedBits(Inst->getValueSizeInBits(0), 0);
11718
209k
    UsedBits.setAllBits();
11719
209k
    UsedBits = UsedBits.zext(BitWidth);
11720
209k
    UsedBits <<= Shift;
11721
209k
    return UsedBits;
11722
209k
  }
11723
11724
  /// \brief Get the size of the slice to be loaded in bytes.
11725
138k
  unsigned getLoadedSize() const {
11726
138k
    unsigned SliceSize = getUsedBits().countPopulation();
11727
138k
    assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
11728
138k
    return SliceSize / 8;
11729
138k
  }
11730
11731
  /// \brief Get the type that will be loaded for this slice.
11732
  /// Note: This may not be the final type for the slice.
11733
138k
  EVT getLoadedType() const {
11734
138k
    assert(DAG && "Missing context");
11735
138k
    LLVMContext &Ctxt = *DAG->getContext();
11736
138k
    return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
11737
138k
  }
11738
11739
  /// \brief Get the alignment of the load used for this slice.
11740
50.2k
  unsigned getAlignment() const {
11741
50.2k
    unsigned Alignment = Origin->getAlignment();
11742
50.2k
    unsigned Offset = getOffsetFromBase();
11743
50.2k
    if (Offset != 0)
11744
16.7k
      Alignment = MinAlign(Alignment, Alignment + Offset);
11745
50.2k
    return Alignment;
11746
50.2k
  }
11747
11748
  /// \brief Check if this slice can be rewritten with legal operations.
11749
37.7k
  bool isLegal() const {
11750
37.7k
    // An invalid slice is not legal.
11751
37.7k
    if (
!Origin || 37.7k
!Inst37.7k
||
!DAG37.7k
)
11752
0
      return false;
11753
37.7k
11754
37.7k
    // Offsets are for indexed load only, we do not handle that.
11755
37.7k
    
if (37.7k
!Origin->getOffset().isUndef()37.7k
)
11756
0
      return false;
11757
37.7k
11758
37.7k
    const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11759
37.7k
11760
37.7k
    // Check that the type is legal.
11761
37.7k
    EVT SliceType = getLoadedType();
11762
37.7k
    if (!TLI.isTypeLegal(SliceType))
11763
56
      return false;
11764
37.6k
11765
37.6k
    // Check that the load is legal for this type.
11766
37.6k
    
if (37.6k
!TLI.isOperationLegal(ISD::LOAD, SliceType)37.6k
)
11767
296
      return false;
11768
37.3k
11769
37.3k
    // Check that the offset can be computed.
11770
37.3k
    // 1. Check its type.
11771
37.3k
    EVT PtrType = Origin->getBasePtr().getValueType();
11772
37.3k
    if (
PtrType == MVT::Untyped || 37.3k
PtrType.isExtended()37.3k
)
11773
0
      return false;
11774
37.3k
11775
37.3k
    // 2. Check that it fits in the immediate.
11776
37.3k
    
if (37.3k
!TLI.isLegalAddImmediate(getOffsetFromBase())37.3k
)
11777
0
      return false;
11778
37.3k
11779
37.3k
    // 3. Check that the computation is legal.
11780
37.3k
    
if (37.3k
!TLI.isOperationLegal(ISD::ADD, PtrType)37.3k
)
11781
0
      return false;
11782
37.3k
11783
37.3k
    // Check that the zext is legal if it needs one.
11784
37.3k
    EVT TruncateType = Inst->getValueType(0);
11785
37.3k
    if (TruncateType != SliceType &&
11786
289
        !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
11787
0
      return false;
11788
37.3k
11789
37.3k
    return true;
11790
37.3k
  }
11791
11792
  /// \brief Get the offset in bytes of this slice in the original chunk of
11793
  /// bits.
11794
  /// \pre DAG != nullptr.
11795
154k
  uint64_t getOffsetFromBase() const {
11796
154k
    assert(DAG && "Missing context.");
11797
154k
    bool IsBigEndian = DAG->getDataLayout().isBigEndian();
11798
154k
    assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
11799
154k
    uint64_t Offset = Shift / 8;
11800
154k
    unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
11801
154k
    assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
11802
154k
           "The size of the original loaded type is not a multiple of a"
11803
154k
           " byte.");
11804
154k
    // If Offset is bigger than TySizeInBytes, it means we are loading all
11805
154k
    // zeros. This should have been optimized before in the process.
11806
154k
    assert(TySizeInBytes > Offset &&
11807
154k
           "Invalid shift amount for given loaded size");
11808
154k
    if (IsBigEndian)
11809
86
      Offset = TySizeInBytes - Offset - getLoadedSize();
11810
154k
    return Offset;
11811
154k
  }
11812
11813
  /// \brief Generate the sequence of instructions to load the slice
11814
  /// represented by this object and redirect the uses of this slice to
11815
  /// this new sequence of instructions.
11816
  /// \pre this->Inst && this->Origin are valid Instructions and this
11817
  /// object passed the legal check: LoadedSlice::isLegal returned true.
11818
  /// \return The last instruction of the sequence used to load the slice.
11819
33.5k
  SDValue loadSlice() const {
11820
33.5k
    assert(Inst && Origin && "Unable to replace a non-existing slice.");
11821
33.5k
    const SDValue &OldBaseAddr = Origin->getBasePtr();
11822
33.5k
    SDValue BaseAddr = OldBaseAddr;
11823
33.5k
    // Get the offset in that chunk of bytes w.r.t. the endianness.
11824
33.5k
    int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
11825
33.5k
    assert(Offset >= 0 && "Offset too big to fit in int64_t!");
11826
33.5k
    if (
Offset33.5k
) {
11827
16.7k
      // BaseAddr = BaseAddr + Offset.
11828
16.7k
      EVT ArithType = BaseAddr.getValueType();
11829
16.7k
      SDLoc DL(Origin);
11830
16.7k
      BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
11831
16.7k
                              DAG->getConstant(Offset, DL, ArithType));
11832
16.7k
    }
11833
33.5k
11834
33.5k
    // Create the type of the loaded slice according to its size.
11835
33.5k
    EVT SliceType = getLoadedType();
11836
33.5k
11837
33.5k
    // Create the load for the slice.
11838
33.5k
    SDValue LastInst =
11839
33.5k
        DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
11840
33.5k
                     Origin->getPointerInfo().getWithOffset(Offset),
11841
33.5k
                     getAlignment(), Origin->getMemOperand()->getFlags());
11842
33.5k
    // If the final type is not the same as the loaded type, this means that
11843
33.5k
    // we have to pad with zero. Create a zero extend for that.
11844
33.5k
    EVT FinalType = Inst->getValueType(0);
11845
33.5k
    if (SliceType != FinalType)
11846
1
      LastInst =
11847
1
          DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
11848
33.5k
    return LastInst;
11849
33.5k
  }
11850
11851
  /// \brief Check if this slice can be merged with an expensive cross register
11852
  /// bank copy. E.g.,
11853
  /// i = load i32
11854
  /// f = bitcast i32 i to float
11855
33.5k
  bool canMergeExpensiveCrossRegisterBankCopy() const {
11856
33.5k
    if (
!Inst || 33.5k
!Inst->hasOneUse()33.5k
)
11857
6.97k
      return false;
11858
26.6k
    SDNode *Use = *Inst->use_begin();
11859
26.6k
    if (Use->getOpcode() != ISD::BITCAST)
11860
26.6k
      return false;
11861
26.6k
    assert(DAG && "Missing context");
11862
4
    const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11863
4
    EVT ResVT = Use->getValueType(0);
11864
4
    const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
11865
4
    const TargetRegisterClass *ArgRC =
11866
4
        TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
11867
4
    if (
ArgRC == ResRC || 4
!TLI.isOperationLegal(ISD::LOAD, ResVT)4
)
11868
0
      return false;
11869
4
11870
4
    // At this point, we know that we perform a cross-register-bank copy.
11871
4
    // Check if it is expensive.
11872
4
    const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
11873
4
    // Assume bitcasts are cheap, unless both register classes do not
11874
4
    // explicitly share a common sub class.
11875
4
    if (
!TRI || 4
TRI->getCommonSubClass(ArgRC, ResRC)4
)
11876
0
      return false;
11877
4
11878
4
    // Check if it will be merged with the load.
11879
4
    // 1. Check the alignment constraint.
11880
4
    unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
11881
4
        ResVT.getTypeForEVT(*DAG->getContext()));
11882
4
11883
4
    if (RequiredAlignment > getAlignment())
11884
0
      return false;
11885
4
11886
4
    // 2. Check that the load is a legal operation for that type.
11887
4
    
if (4
!TLI.isOperationLegal(ISD::LOAD, ResVT)4
)
11888
0
      return false;
11889
4
11890
4
    // 3. Check that we do not have a zext in the way.
11891
4
    
if (4
Inst->getValueType(0) != getLoadedType()4
)
11892
0
      return false;
11893
4
11894
4
    return true;
11895
4
  }
11896
};
11897
11898
} // end anonymous namespace
11899
11900
/// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
11901
/// \p UsedBits looks like 0..0 1..1 0..0.
11902
33.5k
static bool areUsedBitsDense(const APInt &UsedBits) {
11903
33.5k
  // If all the bits are one, this is dense!
11904
33.5k
  if (UsedBits.isAllOnesValue())
11905
33.5k
    return true;
11906
2
11907
2
  // Get rid of the unused bits on the right.
11908
2
  APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
11909
2
  // Get rid of the unused bits on the left.
11910
2
  if (NarrowedUsedBits.countLeadingZeros())
11911
1
    NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
11912
33.5k
  // Check that the chunk of bits is completely used.
11913
33.5k
  return NarrowedUsedBits.isAllOnesValue();
11914
33.5k
}
11915
11916
/// \brief Check whether or not \p First and \p Second are next to each other
11917
/// in memory. This means that there is no hole between the bits loaded
11918
/// by \p First and the bits loaded by \p Second.
11919
static bool areSlicesNextToEachOther(const LoadedSlice &First,
11920
16.7k
                                     const LoadedSlice &Second) {
11921
16.7k
  assert(First.Origin == Second.Origin && First.Origin &&
11922
16.7k
         "Unable to match different memory origins.");
11923
16.7k
  APInt UsedBits = First.getUsedBits();
11924
16.7k
  assert((UsedBits & Second.getUsedBits()) == 0 &&
11925
16.7k
         "Slices are not supposed to overlap.");
11926
16.7k
  UsedBits |= Second.getUsedBits();
11927
16.7k
  return areUsedBitsDense(UsedBits);
11928
16.7k
}
11929
11930
/// \brief Adjust the \p GlobalLSCost according to the target
11931
/// paring capabilities and the layout of the slices.
11932
/// \pre \p GlobalLSCost should account for at least as many loads as
11933
/// there is in the slices in \p LoadedSlices.
11934
static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
11935
16.7k
                                 LoadedSlice::Cost &GlobalLSCost) {
11936
16.7k
  unsigned NumberOfSlices = LoadedSlices.size();
11937
16.7k
  // If there is less than 2 elements, no pairing is possible.
11938
16.7k
  if (NumberOfSlices < 2)
11939
0
    return;
11940
16.7k
11941
16.7k
  // Sort the slices so that elements that are likely to be next to each
11942
16.7k
  // other in memory are next to each other in the list.
11943
16.7k
  std::sort(LoadedSlices.begin(), LoadedSlices.end(),
11944
16.7k
            [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
11945
16.7k
    assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
11946
16.7k
    return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
11947
16.7k
  });
11948
16.7k
  const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
11949
16.7k
  // First (resp. Second) is the first (resp. Second) potentially candidate
11950
16.7k
  // to be placed in a paired load.
11951
16.7k
  const LoadedSlice *First = nullptr;
11952
16.7k
  const LoadedSlice *Second = nullptr;
11953
50.3k
  for (unsigned CurrSlice = 0; 
CurrSlice < NumberOfSlices50.3k
; ++CurrSlice,
11954
33.5k
                // Set the beginning of the pair.
11955
16.7k
                                                           First = Second) {
11956
33.5k
    Second = &LoadedSlices[CurrSlice];
11957
33.5k
11958
33.5k
    // If First is NULL, it means we start a new pair.
11959
33.5k
    // Get to the next slice.
11960
33.5k
    if (!First)
11961
16.7k
      continue;
11962
16.7k
11963
16.7k
    EVT LoadedType = First->getLoadedType();
11964
16.7k
11965
16.7k
    // If the types of the slices are different, we cannot pair them.
11966
16.7k
    if (LoadedType != Second->getLoadedType())
11967
0
      continue;
11968
16.7k
11969
16.7k
    // Check if the target supplies paired loads for this type.
11970
16.7k
    unsigned RequiredAlignment = 0;
11971
16.7k
    if (
!TLI.hasPairedLoad(LoadedType, RequiredAlignment)16.7k
) {
11972
40
      // move to the next pair, this type is hopeless.
11973
40
      Second = nullptr;
11974
40
      continue;
11975
40
    }
11976
16.7k
    // Check if we meet the alignment requirement.
11977
16.7k
    
if (16.7k
RequiredAlignment > First->getAlignment()16.7k
)
11978
0
      continue;
11979
16.7k
11980
16.7k
    // Check that both loads are next to each other in memory.
11981
16.7k
    
if (16.7k
!areSlicesNextToEachOther(*First, *Second)16.7k
)
11982
0
      continue;
11983
16.7k
11984
16.7k
    assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
11985
16.7k
    --GlobalLSCost.Loads;
11986
16.7k
    // Move to the next pair.
11987
16.7k
    Second = nullptr;
11988
16.7k
  }
11989
16.7k
}
11990
11991
/// \brief Check the profitability of all involved LoadedSlice.
11992
/// Currently, it is considered profitable if there is exactly two
11993
/// involved slices (1) which are (2) next to each other in memory, and
11994
/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
11995
///
11996
/// Note: The order of the elements in \p LoadedSlices may be modified, but not
11997
/// the elements themselves.
11998
///
11999
/// FIXME: When the cost model will be mature enough, we can relax
12000
/// constraints (1) and (2).
12001
static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
12002
17.2k
                                const APInt &UsedBits, bool ForCodeSize) {
12003
17.2k
  unsigned NumberOfSlices = LoadedSlices.size();
12004
17.2k
  if (StressLoadSlicing)
12005
2
    return NumberOfSlices > 1;
12006
17.2k
12007
17.2k
  // Check (1).
12008
17.2k
  
if (17.2k
NumberOfSlices != 217.2k
)
12009
401
    return false;
12010
16.8k
12011
16.8k
  // Check (2).
12012
16.8k
  
if (16.8k
!areUsedBitsDense(UsedBits)16.8k
)
12013
2
    return false;
12014
16.7k
12015
16.7k
  // Check (3).
12016
16.7k
  LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
12017
16.7k
  // The original code has one big load.
12018
16.7k
  OrigCost.Loads = 1;
12019
50.3k
  for (unsigned CurrSlice = 0; 
CurrSlice < NumberOfSlices50.3k
;
++CurrSlice33.5k
) {
12020
33.5k
    const LoadedSlice &LS = LoadedSlices[CurrSlice];
12021
33.5k
    // Accumulate the cost of all the slices.
12022
33.5k
    LoadedSlice::Cost SliceCost(LS, ForCodeSize);
12023
33.5k
    GlobalSlicingCost += SliceCost;
12024
33.5k
12025
33.5k
    // Account as cost in the original configuration the gain obtained
12026
33.5k
    // with the current slices.
12027
33.5k
    OrigCost.addSliceGain(LS);
12028
33.5k
  }
12029
17.2k
12030
17.2k
  // If the target supports paired load, adjust the cost accordingly.
12031
17.2k
  adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
12032
17.2k
  return OrigCost > GlobalSlicingCost;
12033
17.2k
}
12034
12035
/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
12036
/// operations, split it in the various pieces being extracted.
12037
///
12038
/// This sort of thing is introduced by SROA.
12039
/// This slicing takes care not to insert overlapping loads.
12040
/// \pre LI is a simple load (i.e., not an atomic or volatile load).
12041
5.69M
bool DAGCombiner::SliceUpLoad(SDNode *N) {
12042
5.69M
  if (Level < AfterLegalizeDAG)
12043
3.46M
    return false;
12044
2.23M
12045
2.23M
  LoadSDNode *LD = cast<LoadSDNode>(N);
12046
2.23M
  if (
LD->isVolatile() || 2.23M
!ISD::isNormalLoad(LD)2.21M
||
12047
1.73M
      !LD->getValueType(0).isInteger())
12048
611k
    return false;
12049
1.62M
12050
1.62M
  // Keep track of already used bits to detect overlapping values.
12051
1.62M
  // In that case, we will just abort the transformation.
12052
1.62M
  APInt UsedBits(LD->getValueSizeInBits(0), 0);
12053
1.62M
12054
1.62M
  SmallVector<LoadedSlice, 4> LoadedSlices;
12055
1.62M
12056
1.62M
  // Check if this load is used as several smaller chunks of bits.
12057
1.62M
  // Basically, look for uses in trunc or trunc(lshr) and record a new chain
12058
1.62M
  // of computation for each trunc.
12059
1.62M
  for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
12060
2.05M
       
UI != UIEnd2.05M
;
++UI436k
) {
12061
2.04M
    // Skip the uses of the chain.
12062
2.04M
    if (UI.getUse().getResNo() != 0)
12063
399k
      continue;
12064
1.64M
12065
1.64M
    SDNode *User = *UI;
12066
1.64M
    unsigned Shift = 0;
12067
1.64M
12068
1.64M
    // Check if this is a trunc(lshr).
12069
1.64M
    if (
User->getOpcode() == ISD::SRL && 1.64M
User->hasOneUse()23.2k
&&
12070
1.64M
        
isa<ConstantSDNode>(User->getOperand(1))22.9k
) {
12071
18.6k
      Shift = User->getConstantOperandVal(1);
12072
18.6k
      User = *User->use_begin();
12073
18.6k
    }
12074
1.64M
12075
1.64M
    // At this point, User is a Truncate, iff we encountered, trunc or
12076
1.64M
    // trunc(lshr).
12077
1.64M
    if (User->getOpcode() != ISD::TRUNCATE)
12078
1.60M
      return false;
12079
37.7k
12080
37.7k
    // The width of the type must be a power of 2 and greater than 8-bits.
12081
37.7k
    // Otherwise the load cannot be represented in LLVM IR.
12082
37.7k
    // Moreover, if we shifted with a non-8-bits multiple, the slice
12083
37.7k
    // will be across several bytes. We do not support that.
12084
37.7k
    unsigned Width = User->getValueSizeInBits(0);
12085
37.7k
    if (
Width < 8 || 37.7k
!isPowerOf2_32(Width)37.7k
||
(Shift & 0x7)37.7k
)
12086
46
      return false;
12087
37.7k
12088
37.7k
    // Build the slice for this chain of computations.
12089
37.7k
    LoadedSlice LS(User, LD, Shift, &DAG);
12090
37.7k
    APInt CurrentUsedBits = LS.getUsedBits();
12091
37.7k
12092
37.7k
    // Check if this slice overlaps with another.
12093
37.7k
    if ((CurrentUsedBits & UsedBits) != 0)
12094
15
      return false;
12095
37.7k
    // Update the bits used globally.
12096
37.7k
    UsedBits |= CurrentUsedBits;
12097
37.7k
12098
37.7k
    // Check if the new slice would be legal.
12099
37.7k
    if (!LS.isLegal())
12100
352
      return false;
12101
37.3k
12102
37.3k
    // Record the slice.
12103
37.3k
    LoadedSlices.push_back(LS);
12104
37.3k
  }
12105
1.62M
12106
1.62M
  // Abort slicing if it does not seem to be profitable.
12107
17.2k
  
if (17.2k
!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)17.2k
)
12108
442
    return false;
12109
16.7k
12110
16.7k
  ++SlicedLoads;
12111
16.7k
12112
16.7k
  // Rewrite each chain to use an independent load.
12113
16.7k
  // By construction, each chain can be represented by a unique load.
12114
16.7k
12115
16.7k
  // Prepare the argument for the new token factor for all the slices.
12116
16.7k
  SmallVector<SDValue, 8> ArgChains;
12117
16.7k
  for (SmallVectorImpl<LoadedSlice>::const_iterator
12118
16.7k
           LSIt = LoadedSlices.begin(),
12119
16.7k
           LSItEnd = LoadedSlices.end();
12120
50.2k
       
LSIt != LSItEnd50.2k
;
++LSIt33.5k
) {
12121
33.5k
    SDValue SliceInst = LSIt->loadSlice();
12122
33.5k
    CombineTo(LSIt->Inst, SliceInst, true);
12123
33.5k
    if (SliceInst.getOpcode() != ISD::LOAD)
12124
1
      SliceInst = SliceInst.getOperand(0);
12125
33.5k
    assert(SliceInst->getOpcode() == ISD::LOAD &&
12126
33.5k
           "It takes more than a zext to get to the loaded slice!!");
12127
33.5k
    ArgChains.push_back(SliceInst.getValue(1));
12128
33.5k
  }
12129
5.69M
12130
5.69M
  SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
12131
5.69M
                              ArgChains);
12132
5.69M
  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
12133
5.69M
  AddToWorklist(Chain.getNode());
12134
5.69M
  return true;
12135
5.69M
}
12136
12137
/// Check to see if V is (and load (ptr), imm), where the load is having
12138
/// specific bytes cleared out.  If so, return the byte size being masked out
12139
/// and the shift amount.
12140
static std::pair<unsigned, unsigned>
12141
30.3k
CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
12142
30.3k
  std::pair<unsigned, unsigned> Result(0, 0);
12143
30.3k
12144
30.3k
  // Check for the structure we're looking for.
12145
30.3k
  if (V->getOpcode() != ISD::AND ||
12146
10.7k
      !isa<ConstantSDNode>(V->getOperand(1)) ||
12147
9.42k
      !ISD::isNormalLoad(V->getOperand(0).getNode()))
12148
23.6k
    return Result;
12149
6.76k
12150
6.76k
  // Check the chain and pointer.
12151
6.76k
  LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
12152
6.76k
  if (
LD->getBasePtr() != Ptr6.76k
)
return Result2.98k
; // Not from same pointer.
12153
3.78k
12154
3.78k
  // The store should be chained directly to the load or be an operand of a
12155
3.78k
  // tokenfactor.
12156
3.78k
  
if (3.78k
LD == Chain.getNode()3.78k
)
12157
1.92k
    ; // ok.
12158
1.86k
  else 
if (1.86k
Chain->getOpcode() != ISD::TokenFactor1.86k
)
12159
6
    return Result; // Fail.
12160
1.85k
  else {
12161
1.85k
    bool isOk = false;
12162
1.85k
    for (const SDValue &ChainOp : Chain->op_values())
12163
4.21k
      
if (4.21k
ChainOp.getNode() == LD4.21k
) {
12164
1.85k
        isOk = true;
12165
1.85k
        break;
12166
1.85k
      }
12167
1.85k
    if (
!isOk1.85k
)
return Result0
;
12168
3.77k
  }
12169
3.77k
12170
3.77k
  // This only handles simple types.
12171
3.77k
  
if (3.77k
V.getValueType() != MVT::i16 &&
12172
3.19k
      V.getValueType() != MVT::i32 &&
12173
325
      V.getValueType() != MVT::i64)
12174
266
    return Result;
12175
3.51k
12176
3.51k
  // Check the constant mask.  Invert it so that the bits being masked out are
12177
3.51k
  // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
12178
3.51k
  // follow the sign bit for uniformity.
12179
3.51k
  uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
12180
3.51k
  unsigned NotMaskLZ = countLeadingZeros(NotMask);
12181
3.51k
  if (
NotMaskLZ & 73.51k
)
return Result1.85k
; // Must be multiple of a byte.
12182
1.66k
  unsigned NotMaskTZ = countTrailingZeros(NotMask);
12183
1.66k
  if (
NotMaskTZ & 71.66k
)
return Result614
; // Must be multiple of a byte.
12184
1.04k
  
if (1.04k
NotMaskLZ == 641.04k
)
return Result0
; // All zero mask.
12185
1.04k
12186
1.04k
  // See if we have a continuous run of bits.  If so, we have 0*1+0*
12187
1.04k
  
if (1.04k
countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 641.04k
)
12188
38
    return Result;
12189
1.00k
12190
1.00k
  // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
12191
1.00k
  
if (1.00k
V.getValueType() != MVT::i64 && 1.00k
NotMaskLZ994
)
12192
787
    NotMaskLZ -= 64-V.getValueSizeInBits();
12193
1.00k
12194
1.00k
  unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
12195
1.00k
  switch (MaskedBytes) {
12196
1.00k
  case 1:
12197
1.00k
  case 2:
12198
1.00k
  case 4: break;
12199
2
  default: return Result; // All one mask, or 5-byte mask.
12200
1.00k
  }
12201
1.00k
12202
1.00k
  // Verify that the first bit starts at a multiple of mask so that the access
12203
1.00k
  // is aligned the same as the access width.
12204
1.00k
  
if (1.00k
NotMaskTZ && 1.00k
NotMaskTZ/8 % MaskedBytes306
)
return Result0
;
12205
1.00k
12206
1.00k
  Result.first = MaskedBytes;
12207
1.00k
  Result.second = NotMaskTZ/8;
12208
1.00k
  return Result;
12209
1.00k
}
12210
12211
/// Check to see if IVal is something that provides a value as specified by
12212
/// MaskInfo. If so, replace the specified store with a narrower store of
12213
/// truncated IVal.
12214
static SDNode *
12215
ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
12216
                                SDValue IVal, StoreSDNode *St,
12217
1.00k
                                DAGCombiner *DC) {
12218
1.00k
  unsigned NumBytes = MaskInfo.first;
12219
1.00k
  unsigned ByteShift = MaskInfo.second;
12220
1.00k
  SelectionDAG &DAG = DC->getDAG();
12221
1.00k
12222
1.00k
  // Check to see if IVal is all zeros in the part being masked in by the 'or'
12223
1.00k
  // that uses this.  If not, this is not a replacement.
12224
1.00k
  APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
12225
1.00k
                                  ByteShift*8, (ByteShift+NumBytes)*8);
12226
1.00k
  if (
!DAG.MaskedValueIsZero(IVal, Mask)1.00k
)
return nullptr2
;
12227
1.00k
12228
1.00k
  // Check that it is legal on the target to do this.  It is legal if the new
12229
1.00k
  // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
12230
1.00k
  // legalization.
12231
1.00k
  MVT VT = MVT::getIntegerVT(NumBytes*8);
12232
1.00k
  if (!DC->isTypeLegal(VT))
12233
966
    return nullptr;
12234
38
12235
38
  // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
12236
38
  // shifted by ByteShift and truncated down to NumBytes.
12237
38
  
if (38
ByteShift38
) {
12238
26
    SDLoc DL(IVal);
12239
26
    IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
12240
26
                       DAG.getConstant(ByteShift*8, DL,
12241
26
                                    DC->getShiftAmountTy(IVal.getValueType())));
12242
26
  }
12243
38
12244
38
  // Figure out the offset for the store and the alignment of the access.
12245
38
  unsigned StOffset;
12246
38
  unsigned NewAlign = St->getAlignment();
12247
38
12248
38
  if (DAG.getDataLayout().isLittleEndian())
12249
38
    StOffset = ByteShift;
12250
38
  else
12251
0
    StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
12252
38
12253
38
  SDValue Ptr = St->getBasePtr();
12254
38
  if (
StOffset38
) {
12255
26
    SDLoc DL(IVal);
12256
26
    Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
12257
26
                      Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
12258
26
    NewAlign = MinAlign(NewAlign, StOffset);
12259
26
  }
12260
1.00k
12261
1.00k
  // Truncate down to the new size.
12262
1.00k
  IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
12263
1.00k
12264
1.00k
  ++OpsNarrowed;
12265
1.00k
  return DAG
12266
1.00k
      .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
12267
1.00k
                St->getPointerInfo().getWithOffset(StOffset), NewAlign)
12268
1.00k
      .getNode();
12269
1.00k
}
12270
12271
/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
12272
/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
12273
/// narrowing the load and store if it would end up being a win for performance
12274
/// or code size.
12275
8.29M
SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
12276
8.29M
  StoreSDNode *ST  = cast<StoreSDNode>(N);
12277
8.29M
  if (ST->isVolatile())
12278
38.6k
    return SDValue();
12279
8.25M
12280
8.25M
  SDValue Chain = ST->getChain();
12281
8.25M
  SDValue Value = ST->getValue();
12282
8.25M
  SDValue Ptr   = ST->getBasePtr();
12283
8.25M
  EVT VT = Value.getValueType();
12284
8.25M
12285
8.25M
  if (
ST->isTruncatingStore() || 8.25M
VT.isVector()7.32M
||
!Value.hasOneUse()4.35M
)
12286
5.63M
    return SDValue();
12287
2.61M
12288
2.61M
  unsigned Opc = Value.getOpcode();
12289
2.61M
12290
2.61M
  // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
12291
2.61M
  // is a byte mask indicating a consecutive number of bytes, check to see if
12292
2.61M
  // Y is known to provide just those bytes.  If so, we try to replace the
12293
2.61M
  // load + replace + store sequence with a single (narrower) store, which makes
12294
2.61M
  // the load dead.
12295
2.61M
  if (
Opc == ISD::OR2.61M
) {
12296
15.2k
    std::pair<unsigned, unsigned> MaskedLoad;
12297
15.2k
    MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
12298
15.2k
    if (MaskedLoad.first)
12299
1.00k
      
if (SDNode *1.00k
NewST1.00k
= ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12300
1.00k
                                                  Value.getOperand(1), ST,this))
12301
33
        return SDValue(NewST, 0);
12302
15.1k
12303
15.1k
    // Or is commutative, so try swapping X and Y.
12304
15.1k
    MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
12305
15.1k
    if (MaskedLoad.first)
12306
5
      
if (SDNode *5
NewST5
= ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12307
5
                                                  Value.getOperand(0), ST,this))
12308
5
        return SDValue(NewST, 0);
12309
2.61M
  }
12310
2.61M
12311
2.61M
  
if (2.61M
(Opc != ISD::OR && 2.61M
Opc != ISD::XOR2.60M
&&
Opc != ISD::AND2.59M
) ||
12312
28.5k
      Value.getOperand(1).getOpcode() != ISD::Constant)
12313
2.60M
    return SDValue();
12314
14.1k
12315
14.1k
  SDValue N0 = Value.getOperand(0);
12316
14.1k
  if (
ISD::isNormalLoad(N0.getNode()) && 14.1k
N0.hasOneUse()4.36k
&&
12317
14.1k
      
Chain == SDValue(N0.getNode(), 1)4.23k
) {
12318
2.58k
    LoadSDNode *LD = cast<LoadSDNode>(N0);
12319
2.58k
    if (LD->getBasePtr() != Ptr ||
12320
2.28k
        LD->getPointerInfo().getAddrSpace() !=
12321
2.28k
        ST->getPointerInfo().getAddrSpace())
12322
306
      return SDValue();
12323
2.28k
12324
2.28k
    // Find the type to narrow it the load / op / store to.
12325
2.28k
    SDValue N1 = Value.getOperand(1);
12326
2.28k
    unsigned BitWidth = N1.getValueSizeInBits();
12327
2.28k
    APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
12328
2.28k
    if (Opc == ISD::AND)
12329
680
      Imm ^= APInt::getAllOnesValue(BitWidth);
12330
2.28k
    if (
Imm == 0 || 2.28k
Imm.isAllOnesValue()2.28k
)
12331
8
      return SDValue();
12332
2.27k
    unsigned ShAmt = Imm.countTrailingZeros();
12333
2.27k
    unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
12334
2.27k
    unsigned NewBW = NextPowerOf2(MSB - ShAmt);
12335
2.27k
    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12336
2.27k
    // The narrowing should be profitable, the load/store operation should be
12337
2.27k
    // legal (or custom) and the store size should be equal to the NewVT width.
12338
10.4k
    while (NewBW < BitWidth &&
12339
8.23k
           (NewVT.getStoreSizeInBits() != NewBW ||
12340
2.73k
            !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
12341
10.4k
            
!TLI.isNarrowingProfitable(VT, NewVT)83
)) {
12342
8.20k
      NewBW = NextPowerOf2(NewBW);
12343
8.20k
      NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12344
8.20k
    }
12345
2.27k
    if (NewBW >= BitWidth)
12346
2.24k
      return SDValue();
12347
27
12348
27
    // If the lsb changed does not start at the type bitwidth boundary,
12349
27
    // start at the previous one.
12350
27
    
if (27
ShAmt % NewBW27
)
12351
18
      ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
12352
27
    APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
12353
27
                                   std::min(BitWidth, ShAmt + NewBW));
12354
27
    if (
(Imm & Mask) == Imm27
) {
12355
20
      APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
12356
20
      if (Opc == ISD::AND)
12357
12
        NewImm ^= APInt::getAllOnesValue(NewBW);
12358
20
      uint64_t PtrOff = ShAmt / 8;
12359
20
      // For big endian targets, we need to adjust the offset to the pointer to
12360
20
      // load the correct bytes.
12361
20
      if (DAG.getDataLayout().isBigEndian())
12362
0
        PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
12363
20
12364
20
      unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
12365
20
      Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
12366
20
      if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
12367
0
        return SDValue();
12368
20
12369
20
      SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
12370
20
                                   Ptr.getValueType(), Ptr,
12371
20
                                   DAG.getConstant(PtrOff, SDLoc(LD),
12372
20
                                                   Ptr.getValueType()));
12373
20
      SDValue NewLD =
12374
20
          DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
12375
20
                      LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12376
20
                      LD->getMemOperand()->getFlags(), LD->getAAInfo());
12377
20
      SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
12378
20
                                   DAG.getConstant(NewImm, SDLoc(Value),
12379
20
                                                   NewVT));
12380
20
      SDValue NewST =
12381
20
          DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
12382
20
                       ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
12383
20
12384
20
      AddToWorklist(NewPtr.getNode());
12385
20
      AddToWorklist(NewLD.getNode());
12386
20
      AddToWorklist(NewVal.getNode());
12387
20
      WorklistRemover DeadNodes(*this);
12388
20
      DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
12389
20
      ++OpsNarrowed;
12390
20
      return NewST;
12391
20
    }
12392
2.58k
  }
12393
11.5k
12394
11.5k
  return SDValue();
12395
11.5k
}
12396
12397
/// For a given floating point load / store pair, if the load value isn't used
12398
/// by any other operations, then consider transforming the pair to integer
12399
/// load / store operations if the target deems the transformation profitable.
12400
8.90M
SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
12401
8.90M
  StoreSDNode *ST  = cast<StoreSDNode>(N);
12402
8.90M
  SDValue Chain = ST->getChain();
12403
8.90M
  SDValue Value = ST->getValue();
12404
8.90M
  if (
ISD::isNormalStore(ST) && 8.90M
ISD::isNormalLoad(Value.getNode())7.92M
&&
12405
697k
      Value.hasOneUse() &&
12406
8.90M
      
Chain == SDValue(Value.getNode(), 1)503k
) {
12407
180k
    LoadSDNode *LD = cast<LoadSDNode>(Value);
12408
180k
    EVT VT = LD->getMemoryVT();
12409
180k
    if (!VT.isFloatingPoint() ||
12410
1.00k
        VT != ST->getMemoryVT() ||
12411
1.00k
        LD->isNonTemporal() ||
12412
1.00k
        ST->isNonTemporal() ||
12413
1.00k
        LD->getPointerInfo().getAddrSpace() != 0 ||
12414
653
        ST->getPointerInfo().getAddrSpace() != 0)
12415
179k
      return SDValue();
12416
600
12417
600
    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
12418
600
    if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
12419
256
        !TLI.isOperationLegal(ISD::STORE, IntVT) ||
12420
256
        !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
12421
3
        !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
12422
597
      return SDValue();
12423
3
12424
3
    unsigned LDAlign = LD->getAlignment();
12425
3
    unsigned STAlign = ST->getAlignment();
12426
3
    Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
12427
3
    unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
12428
3
    if (
LDAlign < ABIAlign || 3
STAlign < ABIAlign3
)
12429
0
      return SDValue();
12430
3
12431
3
    SDValue NewLD =
12432
3
        DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
12433
3
                    LD->getPointerInfo(), LDAlign);
12434
3
12435
3
    SDValue NewST =
12436
3
        DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
12437
3
                     ST->getPointerInfo(), STAlign);
12438
3
12439
3
    AddToWorklist(NewLD.getNode());
12440
3
    AddToWorklist(NewST.getNode());
12441
3
    WorklistRemover DeadNodes(*this);
12442
3
    DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
12443
3
    ++LdStFP2Int;
12444
3
    return NewST;
12445
3
  }
12446
8.72M
12447
8.72M
  return SDValue();
12448
8.72M
}
12449
12450
// This is a helper function for visitMUL to check the profitability
12451
// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
12452
// MulNode is the original multiply, AddNode is (add x, c1),
12453
// and ConstNode is c2.
12454
//
12455
// If the (add x, c1) has multiple uses, we could increase
12456
// the number of adds if we make this transformation.
12457
// It would only be worth doing this if we can remove a
12458
// multiply in the process. Check for that here.
12459
// To illustrate:
12460
//     (A + c1) * c3
12461
//     (A + c2) * c3
12462
// We're checking for cases where we have common "c3 * A" expressions.
12463
bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
12464
                                              SDValue &AddNode,
12465
977
                                              SDValue &ConstNode) {
12466
977
  APInt Val;
12467
977
12468
977
  // If the add only has one use, this would be OK to do.
12469
977
  if (AddNode.getNode()->hasOneUse())
12470
509
    return true;
12471
468
12472
468
  // Walk all the users of the constant with which we're multiplying.
12473
468
  
for (SDNode *Use : ConstNode->uses()) 468
{
12474
1.78k
    if (Use == MulNode) // This use is the one we're on right now. Skip it.
12475
452
      continue;
12476
1.32k
12477
1.32k
    
if (1.32k
Use->getOpcode() == ISD::MUL1.32k
) { // We have another multiply use.
12478
1.20k
      SDNode *OtherOp;
12479
1.20k
      SDNode *MulVar = AddNode.getOperand(0).getNode();
12480
1.20k
12481
1.20k
      // OtherOp is what we're multiplying against the constant.
12482
1.20k
      if (Use->getOperand(0) == ConstNode)
12483
0
        OtherOp = Use->getOperand(1).getNode();
12484
1.20k
      else
12485
1.20k
        OtherOp = Use->getOperand(0).getNode();
12486
1.20k
12487
1.20k
      // Check to see if multiply is with the same operand of our "add".
12488
1.20k
      //
12489
1.20k
      //     ConstNode  = CONST
12490
1.20k
      //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
12491
1.20k
      //     ...
12492
1.20k
      //     AddNode  = (A + c1)  <-- MulVar is A.
12493
1.20k
      //         = AddNode * ConstNode   <-- current visiting instruction.
12494
1.20k
      //
12495
1.20k
      // If we make this transformation, we will have a common
12496
1.20k
      // multiply (ConstNode * A) that we can save.
12497
1.20k
      if (OtherOp == MulVar)
12498
30
        return true;
12499
1.17k
12500
1.17k
      // Now check to see if a future expansion will give us a common
12501
1.17k
      // multiply.
12502
1.17k
      //
12503
1.17k
      //     ConstNode  = CONST
12504
1.17k
      //     AddNode    = (A + c1)
12505
1.17k
      //     ...   = AddNode * ConstNode <-- current visiting instruction.
12506
1.17k
      //     ...
12507
1.17k
      //     OtherOp = (A + c2)
12508
1.17k
      //     Use     = OtherOp * ConstNode <-- visiting Use.
12509
1.17k
      //
12510
1.17k
      // If we make this transformation, we will have a common
12511
1.17k
      // multiply (CONST * A) after we also do the same transformation
12512
1.17k
      // to the "t2" instruction.
12513
1.17k
      
if (1.17k
OtherOp->getOpcode() == ISD::ADD &&
12514
1.01k
          DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
12515
1.00k
          OtherOp->getOperand(0).getNode() == MulVar)
12516
2
        return true;
12517
436
    }
12518
1.78k
  }
12519
436
12520
436
  // Didn't find a case where this would be profitable.
12521
436
  return false;
12522
436
}
12523
12524
59.3M
static SDValue peekThroughBitcast(SDValue V) {
12525
59.4M
  while (V.getOpcode() == ISD::BITCAST)
12526
84.1k
    V = V.getOperand(0);
12527
59.3M
  return V;
12528
59.3M
}
12529
12530
SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
12531
117k
                                         unsigned NumStores) {
12532
117k
  SmallVector<SDValue, 8> Chains;
12533
117k
  SmallPtrSet<const SDNode *, 8> Visited;
12534
117k
  SDLoc StoreDL(StoreNodes[0].MemNode);
12535
117k
12536
354k
  for (unsigned i = 0; 
i < NumStores354k
;
++i236k
) {
12537
236k
    Visited.insert(StoreNodes[i].MemNode);
12538
236k
  }
12539
117k
12540
117k
  // don't include nodes that are children
12541
354k
  for (unsigned i = 0; 
i < NumStores354k
;
++i236k
) {
12542
236k
    if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
12543
236k
      Chains.push_back(StoreNodes[i].MemNode->getChain());
12544
236k
  }
12545
117k
12546
117k
  assert(Chains.size() > 0 && "Chain should have generated a chain");
12547
117k
  return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
12548
117k
}
12549
12550
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
12551
    SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
12552
114k
    bool IsConstantSrc, bool UseVector, bool UseTrunc) {
12553
114k
  // Make sure we have something to merge.
12554
114k
  if (NumStores < 2)
12555
0
    return false;
12556
114k
12557
114k
  // The latest Node in the DAG.
12558
114k
  SDLoc DL(StoreNodes[0].MemNode);
12559
114k
12560
114k
  int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12561
114k
  unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
12562
114k
  unsigned NumMemElts = MemVT.isVector() ? 
MemVT.getVectorNumElements()11
:
1114k
;
12563
114k
12564
114k
  EVT StoreTy;
12565
114k
  if (
UseVector114k
) {
12566
28.7k
    unsigned Elts = NumStores * NumMemElts;
12567
28.7k
    // Get the type for the merged vector store.
12568
28.7k
    StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12569
28.7k
  } else
12570
86.0k
    StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
12571
114k
12572
114k
  SDValue StoredVal;
12573
114k
  if (
UseVector114k
) {
12574
28.7k
    if (
IsConstantSrc28.7k
) {
12575
28.5k
      SmallVector<SDValue, 8> BuildVector;
12576
85.8k
      for (unsigned I = 0; 
I != NumStores85.8k
;
++I57.2k
) {
12577
57.2k
        StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
12578
57.2k
        SDValue Val = St->getValue();
12579
57.2k
        // If constant is of the wrong type, convert it now.
12580
57.2k
        if (
MemVT != Val.getValueType()57.2k
) {
12581
60
          Val = peekThroughBitcast(Val);
12582
60
          // Deal with constants of wrong size.
12583
60
          if (
ElementSizeBytes * 8 != Val.getValueSizeInBits()60
) {
12584
0
            EVT IntMemVT =
12585
0
                EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
12586
0
            if (auto *CFP = dyn_cast<ConstantFPSDNode>(Val))
12587
0
              Val = DAG.getConstant(
12588
0
                  CFP->getValueAPF().bitcastToAPInt().zextOrTrunc(
12589
0
                      8 * ElementSizeBytes),
12590
0
                  SDLoc(CFP), IntMemVT);
12591
0
            else 
if (auto *0
C0
= dyn_cast<ConstantSDNode>(Val))
12592
0
              Val = DAG.getConstant(
12593
0
                  C->getAPIntValue().zextOrTrunc(8 * ElementSizeBytes),
12594
0
                  SDLoc(C), IntMemVT);
12595
0
          }
12596
60
          // Make sure correctly size type is the correct type.
12597
60
          Val = DAG.getBitcast(MemVT, Val);
12598
60
        }
12599
57.2k
        BuildVector.push_back(Val);
12600
57.2k
      }
12601
0
      StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
12602
28.5k
                                               : ISD::BUILD_VECTOR,
12603
28.5k
                              DL, StoreTy, BuildVector);
12604
28.7k
    } else {
12605
129
      SmallVector<SDValue, 8> Ops;
12606
449
      for (unsigned i = 0; 
i < NumStores449
;
++i320
) {
12607
320
        StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12608
320
        SDValue Val = peekThroughBitcast(St->getValue());
12609
320
        // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
12610
320
        // type MemVT. If the underlying value is not the correct
12611
320
        // type, but it is an extraction of an appropriate vector we
12612
320
        // can recast Val to be of the correct type. This may require
12613
320
        // converting between EXTRACT_VECTOR_ELT and
12614
320
        // EXTRACT_SUBVECTOR.
12615
320
        if ((MemVT != Val.getValueType()) &&
12616
2
            (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12617
320
             
Val.getOpcode() == ISD::EXTRACT_SUBVECTOR0
)) {
12618
2
          SDValue Vec = Val.getOperand(0);
12619
2
          EVT MemVTScalarTy = MemVT.getScalarType();
12620
2
          // We may need to add a bitcast here to get types to line up.
12621
2
          if (
MemVTScalarTy != Vec.getValueType()2
) {
12622
2
            unsigned Elts = Vec.getValueType().getSizeInBits() /
12623
2
                            MemVTScalarTy.getSizeInBits();
12624
2
            EVT NewVecTy =
12625
2
                EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
12626
2
            Vec = DAG.getBitcast(NewVecTy, Vec);
12627
2
          }
12628
0
          auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
12629
2
                                        : ISD::EXTRACT_VECTOR_ELT;
12630
2
          Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1));
12631
2
        }
12632
320
        Ops.push_back(Val);
12633
320
      }
12634
129
12635
129
      // Build the extracted vector elements back into a vector.
12636
11
      StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
12637
118
                                               : ISD::BUILD_VECTOR,
12638
129
                              DL, StoreTy, Ops);
12639
129
    }
12640
114k
  } else {
12641
86.0k
    // We should always use a vector store when merging extracted vector
12642
86.0k
    // elements, so this path implies a store of constants.
12643
86.0k
    assert(IsConstantSrc && "Merged vector elements should use vector store");
12644
86.0k
12645
86.0k
    APInt StoreInt(SizeInBits, 0);
12646
86.0k
12647
86.0k
    // Construct a single integer constant which is made of the smaller
12648
86.0k
    // constant inputs.
12649
86.0k
    bool IsLE = DAG.getDataLayout().isLittleEndian();
12650
258k
    for (unsigned i = 0; 
i < NumStores258k
;
++i172k
) {
12651
172k
      unsigned Idx = IsLE ? 
(NumStores - 1 - i)172k
:
i24
;
12652
172k
      StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
12653
172k
12654
172k
      SDValue Val = St->getValue();
12655
172k
      StoreInt <<= ElementSizeBytes * 8;
12656
172k
      if (ConstantSDNode *
C172k
= dyn_cast<ConstantSDNode>(Val)) {
12657
172k
        StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits);
12658
172k
      } else 
if (ConstantFPSDNode *146
C146
= dyn_cast<ConstantFPSDNode>(Val)) {
12659
146
        StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
12660
146
      } else {
12661
0
        llvm_unreachable("Invalid constant element type");
12662
146
      }
12663
172k
    }
12664
86.0k
12665
86.0k
    // Create the new Load and Store operations.
12666
86.0k
    StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
12667
86.0k
  }
12668
114k
12669
114k
  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12670
114k
  SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
12671
114k
12672
114k
  // make sure we use trunc store if it's necessary to be legal.
12673
114k
  SDValue NewStore;
12674
114k
  if (
!UseTrunc114k
) {
12675
106k
    NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
12676
106k
                            FirstInChain->getPointerInfo(),
12677
106k
                            FirstInChain->getAlignment());
12678
114k
  } else { // Must be realized as a trunc store
12679
8.61k
    EVT LegalizedStoredValueTy =
12680
8.61k
        TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
12681
8.61k
    unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits();
12682
8.61k
    ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
12683
8.61k
    SDValue ExtendedStoreVal =
12684
8.61k
        DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
12685
8.61k
                        LegalizedStoredValueTy);
12686
8.61k
    NewStore = DAG.getTruncStore(
12687
8.61k
        NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
12688
8.61k
        FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
12689
8.61k
        FirstInChain->getAlignment(),
12690
8.61k
        FirstInChain->getMemOperand()->getFlags());
12691
8.61k
  }
12692
114k
12693
114k
  // Replace all merged stores with the new store.
12694
345k
  for (unsigned i = 0; 
i < NumStores345k
;
++i230k
)
12695
230k
    CombineTo(StoreNodes[i].MemNode, NewStore);
12696
114k
12697
114k
  AddToWorklist(NewChain.getNode());
12698
114k
  return true;
12699
114k
}
12700
12701
void DAGCombiner::getStoreMergeCandidates(
12702
2.41M
    StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
12703
2.41M
  // This holds the base pointer, index, and the offset in bytes from the base
12704
2.41M
  // pointer.
12705
2.41M
  BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
12706
2.41M
  EVT MemVT = St->getMemoryVT();
12707
2.41M
12708
2.41M
  SDValue Val = peekThroughBitcast(St->getValue());
12709
2.41M
  // We must have a base and an offset.
12710
2.41M
  if (!BasePtr.getBase().getNode())
12711
0
    return;
12712
2.41M
12713
2.41M
  // Do not handle stores to undef base pointers.
12714
2.41M
  
if (2.41M
BasePtr.getBase().isUndef()2.41M
)
12715
823
    return;
12716
2.41M
12717
2.41M
  
bool IsConstantSrc = isa<ConstantSDNode>(Val) || 2.41M
isa<ConstantFPSDNode>(Val)1.12M
;
12718
2.41M
  bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12719
2.40M
                          Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
12720
2.41M
  bool IsLoadSrc = isa<LoadSDNode>(Val);
12721
2.41M
  BaseIndexOffset LBasePtr;
12722
2.41M
  // Match on loadbaseptr if relevant.
12723
2.41M
  EVT LoadVT;
12724
2.41M
  if (
IsLoadSrc2.41M
) {
12725
414k
    auto *Ld = cast<LoadSDNode>(Val);
12726
414k
    LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
12727
414k
    LoadVT = Ld->getMemoryVT();
12728
414k
    // Load and store should be the same type.
12729
414k
    if (MemVT != LoadVT)
12730
26.5k
      return;
12731
2.39M
  }
12732
2.39M
  auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
12733
33.6M
                            int64_t &Offset) -> bool {
12734
33.6M
    if (
Other->isVolatile() || 33.6M
Other->isIndexed()33.5M
)
12735
21.1k
      return false;
12736
33.5M
    SDValue Val = peekThroughBitcast(Other->getValue());
12737
33.5M
    // Allow merging constants of different types as integers.
12738
13.9M
    bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
12739
19.6M
                                           : Other->getMemoryVT() != MemVT;
12740
33.5M
    if (
IsLoadSrc33.5M
) {
12741
1.78M
      if (NoTypeMatch)
12742
577k
        return false;
12743
1.20M
      // The Load's Base Ptr must also match
12744
1.20M
      
if (LoadSDNode *1.20M
OtherLd1.20M
= dyn_cast<LoadSDNode>(Val)) {
12745
1.11M
        auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
12746
1.11M
        if (LoadVT != OtherLd->getMemoryVT())
12747
2.22k
          return false;
12748
1.11M
        
if (1.11M
!(LBasePtr.equalBaseIndex(LPtr, DAG))1.11M
)
12749
19.8k
          return false;
12750
1.20M
      } else
12751
88.1k
        return false;
12752
32.8M
    }
12753
32.8M
    
if (32.8M
IsConstantSrc32.8M
) {
12754
12.1M
      if (NoTypeMatch)
12755
6.78M
        return false;
12756
5.40M
      
if (5.40M
!(isa<ConstantSDNode>(Val) || 5.40M
isa<ConstantFPSDNode>(Val)1.60M
))
12757
1.53M
        return false;
12758
24.5M
    }
12759
24.5M
    
if (24.5M
IsExtractVecSrc24.5M
) {
12760
19.6M
      // Do not merge truncated stores here.
12761
19.6M
      if (Other->isTruncatingStore())
12762
16.4k
        return false;
12763
19.5M
      
if (19.5M
!MemVT.bitsEq(Val.getValueType())19.5M
)
12764
2.82M
        return false;
12765
16.7M
      
if (16.7M
Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
12766
16.7M
          Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
12767
4.91k
        return false;
12768
21.7M
    }
12769
21.7M
    Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
12770
21.7M
    return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
12771
21.7M
  };
12772
2.39M
12773
2.39M
  // We looking for a root node which is an ancestor to all mergable
12774
2.39M
  // stores. We search up through a load, to our root and then down
12775
2.39M
  // through all children. For instance we will find Store{1,2,3} if
12776
2.39M
  // St is Store1, Store2. or Store3 where the root is not a load
12777
2.39M
  // which always true for nonvolatile ops. TODO: Expand
12778
2.39M
  // the search to find all valid candidates through multiple layers of loads.
12779
2.39M
  //
12780
2.39M
  // Root
12781
2.39M
  // |-------|-------|
12782
2.39M
  // Load    Load    Store3
12783
2.39M
  // |       |
12784
2.39M
  // Store1   Store2
12785
2.39M
  //
12786
2.39M
  // FIXME: We should be able to climb and
12787
2.39M
  // descend TokenFactors to find candidates as well.
12788
2.39M
12789
2.39M
  SDNode *RootNode = (St->getChain()).getNode();
12790
2.39M
12791
2.39M
  if (LoadSDNode *
Ldn2.39M
= dyn_cast<LoadSDNode>(RootNode)) {
12792
381k
    RootNode = Ldn->getChain().getNode();
12793
1.27M
    for (auto I = RootNode->use_begin(), E = RootNode->use_end(); 
I != E1.27M
;
++I898k
)
12794
898k
      
if (898k
I.getOperandNo() == 0 && 898k
isa<LoadSDNode>(*I)892k
) // walk down chain
12795
2.57M
        
for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); 412k
I2 != E22.57M
;
++I22.16M
)
12796
2.16M
          
if (2.16M
I2.getOperandNo() == 02.16M
)
12797
1.66M
            
if (StoreSDNode *1.66M
OtherST1.66M
= dyn_cast<StoreSDNode>(*I2)) {
12798
1.40M
              BaseIndexOffset Ptr;
12799
1.40M
              int64_t PtrDiff;
12800
1.40M
              if (CandidateMatch(OtherST, Ptr, PtrDiff))
12801
591k
                StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
12802
898k
            }
12803
381k
  } else
12804
38.2M
    
for (auto I = RootNode->use_begin(), E = RootNode->use_end(); 2.01M
I != E38.2M
;
++I36.2M
)
12805
36.2M
      
if (36.2M
I.getOperandNo() == 036.2M
)
12806
36.1M
        
if (StoreSDNode *36.1M
OtherST36.1M
= dyn_cast<StoreSDNode>(*I)) {
12807
32.1M
          BaseIndexOffset Ptr;
12808
32.1M
          int64_t PtrDiff;
12809
32.1M
          if (CandidateMatch(OtherST, Ptr, PtrDiff))
12810
21.0M
            StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
12811
2.01M
        }
12812
2.41M
}
12813
12814
// We need to check that merging these stores does not cause a loop in
12815
// the DAG. Any store candidate may depend on another candidate
12816
// indirectly through its operand (we already consider dependencies
12817
// through the chain). Check in parallel by searching up from
12818
// non-chain operands of candidates.
12819
bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
12820
1.14M
    SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
12821
1.14M
  // FIXME: We should be able to truncate a full search of
12822
1.14M
  // predecessors by doing a BFS and keeping tabs the originating
12823
1.14M
  // stores from which worklist nodes come from in a similar way to
12824
1.14M
  // TokenFactor simplfication.
12825
1.14M
12826
1.14M
  SmallPtrSet<const SDNode *, 16> Visited;
12827
1.14M
  SmallVector<const SDNode *, 8> Worklist;
12828
1.14M
  unsigned int Max = 8192;
12829
1.14M
  // Search Ops of store candidates.
12830
20.2M
  for (unsigned i = 0; 
i < NumStores20.2M
;
++i19.1M
) {
12831
19.1M
    SDNode *n = StoreNodes[i].MemNode;
12832
19.1M
    // Potential loops may happen only through non-chain operands
12833
76.5M
    for (unsigned j = 1; 
j < n->getNumOperands()76.5M
;
++j57.4M
)
12834
57.4M
      Worklist.push_back(n->getOperand(j).getNode());
12835
19.1M
  }
12836
1.14M
  // Search through DAG. We can stop early if we find a store node.
12837
20.2M
  for (unsigned i = 0; 
i < NumStores20.2M
;
++i19.1M
) {
12838
19.1M
    if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
12839
19.1M
                                     Max))
12840
2.99k
      return false;
12841
19.1M
    // Check if we ended early, failing conservatively if so.
12842
19.1M
    
if (19.1M
Visited.size() >= Max19.1M
)
12843
0
      return false;
12844
19.1M
  }
12845
1.14M
  return true;
12846
1.14M
}
12847
12848
5.68M
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
12849
5.68M
  if (OptLevel == CodeGenOpt::None)
12850
7.53k
    return false;
12851
5.67M
12852
5.67M
  EVT MemVT = St->getMemoryVT();
12853
5.67M
  int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12854
5.67M
  unsigned NumMemElts = MemVT.isVector() ? 
MemVT.getVectorNumElements()2.41M
:
13.25M
;
12855
5.67M
12856
5.67M
  if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
12857
241k
    return false;
12858
5.43M
12859
5.43M
  bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
12860
5.43M
      Attribute::NoImplicitFloat);
12861
5.43M
12862
5.43M
  // This function cannot currently deal with non-byte-sized memory sizes.
12863
5.43M
  if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
12864
32.4k
    return false;
12865
5.40M
12866
5.40M
  
if (5.40M
!MemVT.isSimple()5.40M
)
12867
1.66k
    return false;
12868
5.40M
12869
5.40M
  // Perform an early exit check. Do not bother looking at stored values that
12870
5.40M
  // are not constants, loads, or extracted vector elements.
12871
5.40M
  SDValue StoredVal = peekThroughBitcast(St->getValue());
12872
5.40M
  bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
12873
5.40M
  bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
12874
4.10M
                       isa<ConstantFPSDNode>(StoredVal);
12875
5.40M
  bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12876
5.39M
                          StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
12877
5.40M
12878
5.40M
  if (
!IsConstantSrc && 5.40M
!IsLoadSrc4.09M
&&
!IsExtractVecSrc3.68M
)
12879
2.98M
    return false;
12880
2.41M
12881
2.41M
  SmallVector<MemOpLink, 8> StoreNodes;
12882
2.41M
  // Find potential store merge candidates by searching through chain sub-DAG
12883
2.41M
  getStoreMergeCandidates(St, StoreNodes);
12884
2.41M
12885
2.41M
  // Check if there is anything to merge.
12886
2.41M
  if (StoreNodes.size() < 2)
12887
744k
    return false;
12888
1.67M
12889
1.67M
  // Sort the memory operands according to their distance from the
12890
1.67M
  // base pointer.
12891
1.67M
  std::sort(StoreNodes.begin(), StoreNodes.end(),
12892
104M
            [](MemOpLink LHS, MemOpLink RHS) {
12893
104M
              return LHS.OffsetFromBase < RHS.OffsetFromBase;
12894
104M
            });
12895
1.67M
12896
1.67M
  // Store Merge attempts to merge the lowest stores. This generally
12897
1.67M
  // works out as if successful, as the remaining stores are checked
12898
1.67M
  // after the first collection of stores is merged. However, in the
12899
1.67M
  // case that a non-mergeable store is found first, e.g., {p[-2],
12900
1.67M
  // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
12901
1.67M
  // mergeable cases. To prevent this, we prune such stores from the
12902
1.67M
  // front of StoreNodes here.
12903
1.67M
12904
1.67M
  bool RV = false;
12905
2.82M
  while (
StoreNodes.size() > 12.82M
) {
12906
1.97M
    unsigned StartIdx = 0;
12907
4.31M
    while ((StartIdx + 1 < StoreNodes.size()) &&
12908
3.48M
           StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
12909
3.48M
               StoreNodes[StartIdx + 1].OffsetFromBase)
12910
2.34M
      ++StartIdx;
12911
1.97M
12912
1.97M
    // Bail if we don't have enough candidates to merge.
12913
1.97M
    if (StartIdx + 1 >= StoreNodes.size())
12914
826k
      return RV;
12915
1.14M
12916
1.14M
    
if (1.14M
StartIdx1.14M
)
12917
151k
      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
12918
1.14M
12919
1.14M
    // Scan the memory operations on the chain and find the first
12920
1.14M
    // non-consecutive store memory address.
12921
1.14M
    unsigned NumConsecutiveStores = 1;
12922
1.14M
    int64_t StartAddress = StoreNodes[0].OffsetFromBase;
12923
1.14M
    // Check that the addresses are consecutive starting from the second
12924
1.14M
    // element in the list of stores.
12925
19.1M
    for (unsigned i = 1, e = StoreNodes.size(); 
i < e19.1M
;
++i17.9M
) {
12926
18.1M
      int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
12927
18.1M
      if (CurrAddress - StartAddress != (ElementSizeBytes * i))
12928
168k
        break;
12929
17.9M
      NumConsecutiveStores = i + 1;
12930
17.9M
    }
12931
1.14M
12932
1.14M
    if (
NumConsecutiveStores < 21.14M
) {
12933
0
      StoreNodes.erase(StoreNodes.begin(),
12934
0
                       StoreNodes.begin() + NumConsecutiveStores);
12935
0
      continue;
12936
0
    }
12937
1.14M
12938
1.14M
    // Check that we can merge these candidates without causing a cycle
12939
1.14M
    
if (1.14M
!checkMergeStoreCandidatesForDependencies(StoreNodes,
12940
1.14M
                                                  NumConsecutiveStores)) {
12941
2.99k
      StoreNodes.erase(StoreNodes.begin(),
12942
2.99k
                       StoreNodes.begin() + NumConsecutiveStores);
12943
2.99k
      continue;
12944
2.99k
    }
12945
1.14M
12946
1.14M
    // The node with the lowest store address.
12947
1.14M
    LLVMContext &Context = *DAG.getContext();
12948
1.14M
    const DataLayout &DL = DAG.getDataLayout();
12949
1.14M
12950
1.14M
    // Store the constants into memory as one consecutive store.
12951
1.14M
    if (
IsConstantSrc1.14M
) {
12952
209k
      LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12953
209k
      unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12954
209k
      unsigned FirstStoreAlign = FirstInChain->getAlignment();
12955
209k
      unsigned LastLegalType = 1;
12956
209k
      unsigned LastLegalVectorType = 1;
12957
209k
      bool LastIntegerTrunc = false;
12958
209k
      bool NonZero = false;
12959
209k
      unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
12960
960k
      for (unsigned i = 0; 
i < NumConsecutiveStores960k
;
++i750k
) {
12961
750k
        StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
12962
750k
        SDValue StoredVal = ST->getValue();
12963
750k
        bool IsElementZero = false;
12964
750k
        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
12965
679k
          IsElementZero = C->isNullValue();
12966
70.6k
        else 
if (ConstantFPSDNode *70.6k
C70.6k
= dyn_cast<ConstantFPSDNode>(StoredVal))
12967
70.6k
          IsElementZero = C->getConstantFPValue()->isNullValue();
12968
750k
        if (
IsElementZero750k
) {
12969
262k
          if (
NonZero && 262k
FirstZeroAfterNonZero == NumConsecutiveStores31.0k
)
12970
18.0k
            FirstZeroAfterNonZero = i;
12971
262k
        }
12972
750k
        NonZero |= !IsElementZero;
12973
750k
12974
750k
        // Find a legal type for the constant store.
12975
750k
        unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
12976
750k
        EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
12977
750k
        bool IsFast = false;
12978
750k
        if (TLI.isTypeLegal(StoreTy) &&
12979
234k
            TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
12980
233k
            TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12981
233k
                                   FirstStoreAlign, &IsFast) &&
12982
750k
            
IsFast232k
) {
12983
232k
          LastIntegerTrunc = false;
12984
232k
          LastLegalType = i + 1;
12985
232k
          // Or check whether a truncstore is legal.
12986
750k
        } else 
if (518k
TLI.getTypeAction(Context, StoreTy) ==
12987
518k
                   TargetLowering::TypePromoteInteger) {
12988
322k
          EVT LegalizedStoredValueTy =
12989
322k
              TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
12990
322k
          if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
12991
63.9k
              TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
12992
63.9k
              TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12993
63.9k
                                     FirstStoreAlign, &IsFast) &&
12994
322k
              
IsFast63.6k
) {
12995
63.6k
            LastIntegerTrunc = true;
12996
63.6k
            LastLegalType = i + 1;
12997
63.6k
          }
12998
518k
        }
12999
750k
13000
750k
        // We only use vectors if the constant is known to be zero or the target
13001
750k
        // allows it and the function is not marked with the noimplicitfloat
13002
750k
        // attribute.
13003
750k
        if ((!NonZero ||
13004
519k
             TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
13005
750k
            
!NoVectors265k
) {
13006
265k
          // Find a legal type for the vector store.
13007
265k
          unsigned Elts = (i + 1) * NumMemElts;
13008
265k
          EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13009
265k
          if (TLI.isTypeLegal(Ty) &&
13010
73.8k
              TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
13011
67.8k
              TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
13012
67.8k
                                     FirstStoreAlign, &IsFast) &&
13013
67.0k
              IsFast)
13014
59.8k
            LastLegalVectorType = i + 1;
13015
265k
        }
13016
750k
      }
13017
209k
13018
28.5k
      bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
13019
209k
      unsigned NumElem = (UseVector) ? 
LastLegalVectorType28.5k
:
LastLegalType181k
;
13020
209k
13021
209k
      // Check if we found a legal integer type that creates a meaningful merge.
13022
209k
      if (
NumElem < 2209k
) {
13023
95.2k
        // We know that candidate stores are in order and of correct
13024
95.2k
        // shape. While there is no mergeable sequence from the
13025
95.2k
        // beginning one may start later in the sequence. The only
13026
95.2k
        // reason a merge of size N could have failed where another of
13027
95.2k
        // the same size would not have, is if the alignment has
13028
95.2k
        // improved or we've dropped a non-zero value. Drop as many
13029
95.2k
        // candidates as we can here.
13030
95.2k
        unsigned NumSkip = 1;
13031
95.2k
        while (
13032
434k
            (NumSkip < NumConsecutiveStores) &&
13033
357k
            (NumSkip < FirstZeroAfterNonZero) &&
13034
344k
            
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)344k
) {
13035
339k
          NumSkip++;
13036
339k
        }
13037
95.2k
        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13038
95.2k
        continue;
13039
95.2k
      }
13040
114k
13041
114k
      bool Merged = MergeStoresOfConstantsOrVecElts(
13042
114k
          StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
13043
114k
      RV |= Merged;
13044
114k
13045
114k
      // Remove merged stores for next iteration.
13046
114k
      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13047
114k
      continue;
13048
114k
    }
13049
934k
13050
934k
    // When extracting multiple vector elements, try to store them
13051
934k
    // in one vector store rather than a sequence of scalar stores.
13052
934k
    
if (934k
IsExtractVecSrc934k
) {
13053
688k
      LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13054
688k
      unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13055
688k
      unsigned FirstStoreAlign = FirstInChain->getAlignment();
13056
688k
      unsigned NumStoresToMerge = 1;
13057
17.3M
      for (unsigned i = 0; 
i < NumConsecutiveStores17.3M
;
++i16.6M
) {
13058
16.6M
        StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
13059
16.6M
        SDValue StVal = peekThroughBitcast(St->getValue());
13060
16.6M
        // This restriction could be loosened.
13061
16.6M
        // Bail out if any stored values are not elements extracted from a
13062
16.6M
        // vector. It should be possible to handle mixed sources, but load
13063
16.6M
        // sources need more careful handling (see the block of code below that
13064
16.6M
        // handles consecutive loads).
13065
16.6M
        if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
13066
16.6M
            StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13067
0
          return RV;
13068
16.6M
13069
16.6M
        // Find a legal type for the vector store.
13070
16.6M
        unsigned Elts = (i + 1) * NumMemElts;
13071
16.6M
        EVT Ty =
13072
16.6M
            EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
13073
16.6M
        bool IsFast;
13074
16.6M
        if (TLI.isTypeLegal(Ty) &&
13075
1.37M
            TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
13076
1.37M
            TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
13077
1.37M
                                   FirstStoreAlign, &IsFast) &&
13078
1.37M
            IsFast)
13079
686k
          NumStoresToMerge = i + 1;
13080
16.6M
      }
13081
688k
13082
688k
      // Check if we found a legal integer type that creates a meaningful merge.
13083
688k
      
if (688k
NumStoresToMerge < 2688k
) {
13084
688k
        // We know that candidate stores are in order and of correct
13085
688k
        // shape. While there is no mergeable sequence from the
13086
688k
        // beginning one may start later in the sequence. The only
13087
688k
        // reason a merge of size N could have failed where another of
13088
688k
        // the same size would not have, is if the alignment has
13089
688k
        // improved. Drop as many candidates as we can here.
13090
688k
        unsigned NumSkip = 1;
13091
16.6M
        while ((NumSkip < NumConsecutiveStores) &&
13092
15.9M
               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
13093
15.9M
          NumSkip++;
13094
688k
13095
688k
        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13096
688k
        continue;
13097
688k
      }
13098
129
13099
129
      bool Merged = MergeStoresOfConstantsOrVecElts(
13100
129
          StoreNodes, MemVT, NumStoresToMerge, false, true, false);
13101
129
      if (
!Merged129
) {
13102
0
        StoreNodes.erase(StoreNodes.begin(),
13103
0
                         StoreNodes.begin() + NumStoresToMerge);
13104
0
        continue;
13105
0
      }
13106
129
      // Remove merged stores for next iteration.
13107
129
      StoreNodes.erase(StoreNodes.begin(),
13108
129
                       StoreNodes.begin() + NumStoresToMerge);
13109
129
      RV = true;
13110
129
      continue;
13111
129
    }
13112
246k
13113
246k
    // Below we handle the case of multiple consecutive stores that
13114
246k
    // come from multiple consecutive loads. We merge them into a single
13115
246k
    // wide load and a single wide store.
13116
246k
13117
246k
    // Look for load nodes which are used by the stored values.
13118
246k
    SmallVector<MemOpLink, 8> LoadNodes;
13119
246k
13120
246k
    // Find acceptable loads. Loads need to have the same chain (token factor),
13121
246k
    // must not be zext, volatile, indexed, and they must be consecutive.
13122
246k
    BaseIndexOffset LdBasePtr;
13123
511k
    for (unsigned i = 0; 
i < NumConsecutiveStores511k
;
++i264k
) {
13124
415k
      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
13125
415k
      SDValue Val = peekThroughBitcast(St->getValue());
13126
415k
      LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val);
13127
415k
      if (!Ld)
13128
0
        break;
13129
415k
13130
415k
      // Loads must only have one use.
13131
415k
      
if (415k
!Ld->hasNUsesOfValue(1, 0)415k
)
13132
150k
        break;
13133
265k
13134
265k
      // The memory operands must not be volatile.
13135
265k
      
if (265k
Ld->isVolatile() || 265k
Ld->isIndexed()264k
)
13136
219
        break;
13137
264k
13138
264k
      // The stored memory type must be the same.
13139
264k
      
if (264k
Ld->getMemoryVT() != MemVT264k
)
13140
0
        break;
13141
264k
13142
264k
      BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
13143
264k
      // If this is not the first ptr that we check.
13144
264k
      int64_t LdOffset = 0;
13145
264k
      if (
LdBasePtr.getBase().getNode()264k
) {
13146
168k
        // The base ptr must be the same.
13147
168k
        if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
13148
0
          break;
13149
96.5k
      } else {
13150
96.5k
        // Check that all other base pointers are the same as this one.
13151
96.5k
        LdBasePtr = LdPtr;
13152
96.5k
      }
13153
264k
13154
264k
      // We found a potential memory operand to merge.
13155
264k
      LoadNodes.push_back(MemOpLink(Ld, LdOffset));
13156
264k
    }
13157
246k
13158
246k
    if (
LoadNodes.size() < 2246k
) {
13159
150k
      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
13160
150k
      continue;
13161
150k
    }
13162
96.0k
13163
96.0k
    // If we have load/store pair instructions and we only have two values,
13164
96.0k
    // don't bother merging.
13165
96.0k
    unsigned RequiredAlignment;
13166
96.0k
    if (
LoadNodes.size() == 2 && 96.0k
TLI.hasPairedLoad(MemVT, RequiredAlignment)83.6k
&&
13167
96.0k
        
StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment77.5k
) {
13168
77.5k
      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
13169
77.5k
      continue;
13170
77.5k
    }
13171
18.5k
    LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13172
18.5k
    unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13173
18.5k
    unsigned FirstStoreAlign = FirstInChain->getAlignment();
13174
18.5k
    LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
13175
18.5k
    unsigned FirstLoadAS = FirstLoad->getAddressSpace();
13176
18.5k
    unsigned FirstLoadAlign = FirstLoad->getAlignment();
13177
18.5k
13178
18.5k
    // Scan the memory operations on the chain and find the first
13179
18.5k
    // non-consecutive load memory address. These variables hold the index in
13180
18.5k
    // the store node array.
13181
18.5k
    unsigned LastConsecutiveLoad = 1;
13182
18.5k
    // This variable refers to the size and not index in the array.
13183
18.5k
    unsigned LastLegalVectorType = 1;
13184
18.5k
    unsigned LastLegalIntegerType = 1;
13185
18.5k
    bool isDereferenceable = true;
13186
18.5k
    bool DoIntegerTruncate = false;
13187
18.5k
    StartAddress = LoadNodes[0].OffsetFromBase;
13188
18.5k
    SDValue FirstChain = FirstLoad->getChain();
13189
99.8k
    for (unsigned i = 1; 
i < LoadNodes.size()99.8k
;
++i81.3k
) {
13190
84.2k
      // All loads must share the same chain.
13191
84.2k
      if (LoadNodes[i].MemNode->getChain() != FirstChain)
13192
1.89k
        break;
13193
82.3k
13194
82.3k
      int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
13195
82.3k
      if (CurrAddress - StartAddress != (ElementSizeBytes * i))
13196
913
        break;
13197
81.3k
      LastConsecutiveLoad = i;
13198
81.3k
13199
81.3k
      if (
isDereferenceable && 81.3k
!LoadNodes[i].MemNode->isDereferenceable()21.0k
)
13200
13.9k
        isDereferenceable = false;
13201
81.3k
13202
81.3k
      // Find a legal type for the vector store.
13203
81.3k
      unsigned Elts = (i + 1) * NumMemElts;
13204
81.3k
      EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13205
81.3k
13206
81.3k
      bool IsFastSt, IsFastLd;
13207
81.3k
      if (TLI.isTypeLegal(StoreTy) &&
13208
8.13k
          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13209
6.00k
          TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13210
6.00k
                                 FirstStoreAlign, &IsFastSt) &&
13211
5.97k
          IsFastSt &&
13212
3.02k
          TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13213
3.02k
                                 FirstLoadAlign, &IsFastLd) &&
13214
81.3k
          
IsFastLd3.02k
) {
13215
3.00k
        LastLegalVectorType = i + 1;
13216
3.00k
      }
13217
81.3k
13218
81.3k
      // Find a legal type for the integer store.
13219
81.3k
      unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
13220
81.3k
      StoreTy = EVT::getIntegerVT(Context, SizeInBits);
13221
81.3k
      if (TLI.isTypeLegal(StoreTy) &&
13222
7.00k
          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13223
6.99k
          TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13224
6.99k
                                 FirstStoreAlign, &IsFastSt) &&
13225
92
          IsFastSt &&
13226
92
          TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13227
92
                                 FirstLoadAlign, &IsFastLd) &&
13228
81.3k
          
IsFastLd92
) {
13229
92
        LastLegalIntegerType = i + 1;
13230
92
        DoIntegerTruncate = false;
13231
92
        // Or check whether a truncstore and extload is legal.
13232
81.3k
      } else 
if (81.3k
TLI.getTypeAction(Context, StoreTy) ==
13233
81.3k
                 TargetLowering::TypePromoteInteger) {
13234
54.8k
        EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
13235
54.8k
        if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
13236
736
            TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
13237
736
            TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
13238
736
                               StoreTy) &&
13239
736
            TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
13240
736
                               StoreTy) &&
13241
736
            TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
13242
736
            TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13243
736
                                   FirstStoreAlign, &IsFastSt) &&
13244
24
            IsFastSt &&
13245
24
            TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13246
24
                                   FirstLoadAlign, &IsFastLd) &&
13247
54.8k
            
IsFastLd24
) {
13248
24
          LastLegalIntegerType = i + 1;
13249
24
          DoIntegerTruncate = true;
13250
24
        }
13251
81.3k
      }
13252
84.2k
    }
13253
18.5k
13254
18.5k
    // Only use vector types if the vector type is larger than the integer type.
13255
18.5k
    // If they are the same, use integers.
13256
2.98k
    bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
13257
18.5k
    unsigned LastLegalType =
13258
18.5k
        std::max(LastLegalVectorType, LastLegalIntegerType);
13259
18.5k
13260
18.5k
    // We add +1 here because the LastXXX variables refer to location while
13261
18.5k
    // the NumElem refers to array/index size.
13262
18.5k
    unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
13263
18.5k
    NumElem = std::min(LastLegalType, NumElem);
13264
18.5k
13265
18.5k
    if (
NumElem < 218.5k
) {
13266
15.4k
      // We know that candidate stores are in order and of correct
13267
15.4k
      // shape. While there is no mergeable sequence from the
13268
15.4k
      // beginning one may start later in the sequence. The only
13269
15.4k
      // reason a merge of size N could have failed where another of
13270
15.4k
      // the same size would not have is if the alignment or either
13271
15.4k
      // the load or store has improved. Drop as many candidates as we
13272
15.4k
      // can here.
13273
15.4k
      unsigned NumSkip = 1;
13274
97.4k
      while ((NumSkip < LoadNodes.size()) &&
13275
82.7k
             (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
13276
82.3k
             (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
13277
82.0k
        NumSkip++;
13278
15.4k
      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13279
15.4k
      continue;
13280
15.4k
    }
13281
3.07k
13282
3.07k
    // Find if it is better to use vectors or integers to load and store
13283
3.07k
    // to memory.
13284
3.07k
    EVT JointMemOpVT;
13285
3.07k
    if (
UseVectorTy3.07k
) {
13286
2.98k
      // Find a legal type for the vector store.
13287
2.98k
      unsigned Elts = NumElem * NumMemElts;
13288
2.98k
      JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13289
3.07k
    } else {
13290
92
      unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
13291
92
      JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
13292
92
    }
13293
3.07k
13294
3.07k
    SDLoc LoadDL(LoadNodes[0].MemNode);
13295
3.07k
    SDLoc StoreDL(StoreNodes[0].MemNode);
13296
3.07k
13297
3.07k
    // The merged loads are required to have the same incoming chain, so
13298
3.07k
    // using the first's chain is acceptable.
13299
3.07k
13300
3.07k
    SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
13301
3.07k
    AddToWorklist(NewStoreChain.getNode());
13302
3.07k
13303
3.07k
    MachineMemOperand::Flags MMOFlags = isDereferenceable ?
13304
103
                                          MachineMemOperand::MODereferenceable:
13305
2.97k
                                          MachineMemOperand::MONone;
13306
3.07k
13307
3.07k
    SDValue NewLoad, NewStore;
13308
3.07k
    if (
UseVectorTy || 3.07k
!DoIntegerTruncate92
) {
13309
3.06k
      NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
13310
3.06k
                            FirstLoad->getBasePtr(),
13311
3.06k
                            FirstLoad->getPointerInfo(), FirstLoadAlign,
13312
3.06k
                            MMOFlags);
13313
3.06k
      NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
13314
3.06k
                              FirstInChain->getBasePtr(),
13315
3.06k
                              FirstInChain->getPointerInfo(), FirstStoreAlign);
13316
3.07k
    } else { // This must be the truncstore/extload case
13317
16
      EVT ExtendedTy =
13318
16
          TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
13319
16
      NewLoad =
13320
16
          DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
13321
16
                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
13322
16
                         JointMemOpVT, FirstLoadAlign, MMOFlags);
13323
16
      NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
13324
16
                                   FirstInChain->getBasePtr(),
13325
16
                                   FirstInChain->getPointerInfo(), JointMemOpVT,
13326
16
                                   FirstInChain->getAlignment(),
13327
16
                                   FirstInChain->getMemOperand()->getFlags());
13328
16
    }
13329
3.07k
13330
3.07k
    // Transfer chain users from old loads to the new load.
13331
9.29k
    for (unsigned i = 0; 
i < NumElem9.29k
;
++i6.21k
) {
13332
6.21k
      LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
13333
6.21k
      DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
13334
6.21k
                                    SDValue(NewLoad.getNode(), 1));
13335
6.21k
    }
13336
3.07k
13337
3.07k
    // Replace the all stores with the new store. Recursively remove
13338
3.07k
    // corresponding value if its no longer used.
13339
9.29k
    for (unsigned i = 0; 
i < NumElem9.29k
;
++i6.21k
) {
13340
6.21k
      SDValue Val = StoreNodes[i].MemNode->getOperand(1);
13341
6.21k
      CombineTo(StoreNodes[i].MemNode, NewStore);
13342
6.21k
      if (Val.getNode()->use_empty())
13343
6.21k
        recursivelyDeleteUnusedNodes(Val.getNode());
13344
6.21k
    }
13345
1.97M
13346
1.97M
    RV = true;
13347
1.97M
    StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13348
1.97M
  }
13349
848k
  return RV;
13350
5.68M
}
13351
13352
941k
SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
13353
941k
  SDLoc SL(ST);
13354
941k
  SDValue ReplStore;
13355
941k
13356
941k
  // Replace the chain to avoid dependency.
13357
941k
  if (
ST->isTruncatingStore()941k
) {
13358
5.12k
    ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
13359
5.12k
                                  ST->getBasePtr(), ST->getMemoryVT(),
13360
5.12k
                                  ST->getMemOperand());
13361
941k
  } else {
13362
936k
    ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
13363
936k
                             ST->getMemOperand());
13364
936k
  }
13365
941k
13366
941k
  // Create token to keep both nodes around.
13367
941k
  SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
13368
941k
                              MVT::Other, ST->getChain(), ReplStore);
13369
941k
13370
941k
  // Make sure the new and old chains are cleaned up.
13371
941k
  AddToWorklist(Token.getNode());
13372
941k
13373
941k
  // Don't add users to work list.
13374
941k
  return CombineTo(ST, Token, false);
13375
941k
}
13376
13377
8.46k
SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
13378
8.46k
  SDValue Value = ST->getValue();
13379
8.46k
  if (Value.getOpcode() == ISD::TargetConstantFP)
13380
0
    return SDValue();
13381
8.46k
13382
8.46k
  SDLoc DL(ST);
13383
8.46k
13384
8.46k
  SDValue Chain = ST->getChain();
13385
8.46k
  SDValue Ptr = ST->getBasePtr();
13386
8.46k
13387
8.46k
  const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
13388
8.46k
13389
8.46k
  // NOTE: If the original store is volatile, this transform must not increase
13390
8.46k
  // the number of stores.  For example, on x86-32 an f64 can be stored in one
13391
8.46k
  // processor operation but an i64 (which is not legal) requires two.  So the
13392
8.46k
  // transform should not be done in this case.
13393
8.46k
13394
8.46k
  SDValue Tmp;
13395
8.46k
  switch (CFP->getSimpleValueType(0).SimpleTy) {
13396
0
  default:
13397
0
    llvm_unreachable("Unknown FP type");
13398
250
  case MVT::f16:    // We don't do this for these yet.
13399
250
  case MVT::f80:
13400
250
  case MVT::f128:
13401
250
  case MVT::ppcf128:
13402
250
    return SDValue();
13403
1.81k
  case MVT::f32:
13404
1.81k
    if (
(isTypeLegal(MVT::i32) && 1.81k
!LegalOperations1.81k
&&
!ST->isVolatile()1.81k
) ||
13405
1.81k
        
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)14
) {
13406
1.81k
      ;
13407
1.81k
      Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
13408
1.81k
                            bitcastToAPInt().getZExtValue(), SDLoc(CFP),
13409
1.81k
                            MVT::i32);
13410
1.81k
      return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
13411
1.81k
    }
13412
0
13413
0
    return SDValue();
13414
6.40k
  case MVT::f64:
13415
6.40k
    if (
(TLI.isTypeLegal(MVT::i64) && 6.40k
!LegalOperations6.06k
&&
13416
6.06k
         !ST->isVolatile()) ||
13417
6.40k
        
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)351
) {
13418
6.06k
      ;
13419
6.06k
      Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
13420
6.06k
                            getZExtValue(), SDLoc(CFP), MVT::i64);
13421
6.06k
      return DAG.getStore(Chain, DL, Tmp,
13422
6.06k
                          Ptr, ST->getMemOperand());
13423
6.06k
    }
13424
340
13425
340
    
if (340
!ST->isVolatile() &&
13426
340
        
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)332
) {
13427
328
      // Many FP stores are not made apparent until after legalize, e.g. for
13428
328
      // argument passing.  Since this is so common, custom legalize the
13429
328
      // 64-bit integer store into two 32-bit stores.
13430
328
      uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
13431
328
      SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
13432
328
      SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
13433
328
      if (DAG.getDataLayout().isBigEndian())
13434
50
        std::swap(Lo, Hi);
13435
328
13436
328
      unsigned Alignment = ST->getAlignment();
13437
328
      MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13438
328
      AAMDNodes AAInfo = ST->getAAInfo();
13439
328
13440
328
      SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13441
328
                                 ST->getAlignment(), MMOFlags, AAInfo);
13442
328
      Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13443
328
                        DAG.getConstant(4, DL, Ptr.getValueType()));
13444
328
      Alignment = MinAlign(Alignment, 4U);
13445
328
      SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
13446
328
                                 ST->getPointerInfo().getWithOffset(4),
13447
328
                                 Alignment, MMOFlags, AAInfo);
13448
328
      return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13449
328
                         St0, St1);
13450
328
    }
13451
12
13452
12
    return SDValue();
13453
8.46k
  }
13454
8.46k
}
13455
13456
8.90M
SDValue DAGCombiner::visitSTORE(SDNode *N) {
13457
8.90M
  StoreSDNode *ST  = cast<StoreSDNode>(N);
13458
8.90M
  SDValue Chain = ST->getChain();
13459
8.90M
  SDValue Value = ST->getValue();
13460
8.90M
  SDValue Ptr   = ST->getBasePtr();
13461
8.90M
13462
8.90M
  // If this is a store of a bit convert, store the input value if the
13463
8.90M
  // resultant store does not need a higher alignment than the original.
13464
8.90M
  if (
Value.getOpcode() == ISD::BITCAST && 8.90M
!ST->isTruncatingStore()171k
&&
13465
8.90M
      
ST->isUnindexed()170k
) {
13466
170k
    EVT SVT = Value.getOperand(0).getValueType();
13467
170k
    if (
((!LegalOperations && 170k
!ST->isVolatile()8.97k
) ||
13468
162k
         TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
13469
170k
        
TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)12.0k
) {
13470
2.71k
      unsigned OrigAlign = ST->getAlignment();
13471
2.71k
      bool Fast = false;
13472
2.71k
      if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
13473
2.71k
                                 ST->getAddressSpace(), OrigAlign, &Fast) &&
13474
2.71k
          
Fast2.69k
) {
13475
2.35k
        return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
13476
2.35k
                            ST->getPointerInfo(), OrigAlign,
13477
2.35k
                            ST->getMemOperand()->getFlags(), ST->getAAInfo());
13478
2.35k
      }
13479
8.90M
    }
13480
170k
  }
13481
8.90M
13482
8.90M
  // Turn 'store undef, Ptr' -> nothing.
13483
8.90M
  
if (8.90M
Value.isUndef() && 8.90M
ST->isUnindexed()2.09k
)
13484
2.09k
    return Chain;
13485
8.90M
13486
8.90M
  // Try to infer better alignment information than the store already has.
13487
8.90M
  
if (8.90M
OptLevel != CodeGenOpt::None && 8.90M
ST->isUnindexed()8.88M
) {
13488
8.87M
    if (unsigned 
Align8.87M
= DAG.InferPtrAlignment(Ptr)) {
13489
2.16M
      if (
Align > ST->getAlignment()2.16M
) {
13490
29.7k
        SDValue NewStore =
13491
29.7k
            DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
13492
29.7k
                              ST->getMemoryVT(), Align,
13493
29.7k
                              ST->getMemOperand()->getFlags(), ST->getAAInfo());
13494
29.7k
        if (NewStore.getNode() != N)
13495
0
          return CombineTo(ST, NewStore, true);
13496
8.90M
      }
13497
2.16M
    }
13498
8.87M
  }
13499
8.90M
13500
8.90M
  // Try transforming a pair floating point load / store ops to integer
13501
8.90M
  // load / store ops.
13502
8.90M
  
if (SDValue 8.90M
NewST8.90M
= TransformFPLoadStorePair(N))
13503
3
    return NewST;
13504
8.90M
13505
8.90M
  
if (8.90M
ST->isUnindexed()8.90M
) {
13506
8.88M
    // Walk up chain skipping non-aliasing memory nodes, on this store and any
13507
8.88M
    // adjacent stores.
13508
8.88M
    if (
findBetterNeighborChains(ST)8.88M
) {
13509
445k
      // replaceStoreChain uses CombineTo, which handled all of the worklist
13510
445k
      // manipulation. Return the original node to not do anything else.
13511
445k
      return SDValue(ST, 0);
13512
445k
    }
13513
8.44M
    Chain = ST->getChain();
13514
8.44M
  }
13515
8.90M
13516
8.90M
  // FIXME: is there such a thing as a truncating indexed store?
13517
8.45M
  
if (8.45M
ST->isTruncatingStore() && 8.45M
ST->isUnindexed()961k
&&
13518
8.45M
      
Value.getValueType().isInteger()957k
) {
13519
957k
    // See if we can simplify the input to this truncstore with knowledge that
13520
957k
    // only the low bits are being used.  For example:
13521
957k
    // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
13522
957k
    SDValue Shorter = DAG.GetDemandedBits(
13523
957k
        Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13524
957k
                                    ST->getMemoryVT().getScalarSizeInBits()));
13525
957k
    AddToWorklist(Value.getNode());
13526
957k
    if (Shorter.getNode())
13527
2.47k
      return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
13528
2.47k
                               Ptr, ST->getMemoryVT(), ST->getMemOperand());
13529
954k
13530
954k
    // Otherwise, see if we can simplify the operation with
13531
954k
    // SimplifyDemandedBits, which only works if the value has a single use.
13532
954k
    
if (954k
SimplifyDemandedBits(
13533
954k
            Value,
13534
954k
            APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13535
954k
                                 ST->getMemoryVT().getScalarSizeInBits()))) {
13536
16.8k
      // Re-visit the store if anything changed and the store hasn't been merged
13537
16.8k
      // with another node (N is deleted) SimplifyDemandedBits will add Value's
13538
16.8k
      // node back to the worklist if necessary, but we also need to re-visit
13539
16.8k
      // the Store node itself.
13540
16.8k
      if (N->getOpcode() != ISD::DELETED_NODE)
13541
16.8k
        AddToWorklist(N);
13542
16.8k
      return SDValue(N, 0);
13543
16.8k
    }
13544
8.43M
  }
13545
8.43M
13546
8.43M
  // If this is a load followed by a store to the same location, then the store
13547
8.43M
  // is dead/noop.
13548
8.43M
  
if (LoadSDNode *8.43M
Ld8.43M
= dyn_cast<LoadSDNode>(Value)) {
13549
1.09M
    if (
Ld->getBasePtr() == Ptr && 1.09M
ST->getMemoryVT() == Ld->getMemoryVT()4.08k
&&
13550
1.09M
        
ST->isUnindexed()4.04k
&&
!ST->isVolatile()4.04k
&&
13551
1.09M
        // There can't be any side effects between the load and store, such as
13552
1.09M
        // a call or store.
13553
1.09M
        
Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))1.45k
) {
13554
239
      // The store is dead, remove it.
13555
239
      return Chain;
13556
239
    }
13557
8.43M
  }
13558
8.43M
13559
8.43M
  
if (StoreSDNode *8.43M
ST18.43M
= dyn_cast<StoreSDNode>(Chain)) {
13560
252k
    if (
ST->isUnindexed() && 252k
!ST->isVolatile()251k
&&
ST1->isUnindexed()239k
&&
13561
252k
        
!ST1->isVolatile()239k
&&
ST1->getBasePtr() == Ptr238k
&&
13562
252k
        
ST->getMemoryVT() == ST1->getMemoryVT()2.10k
) {
13563
1.70k
      // If this is a store followed by a store with the same value to the same
13564
1.70k
      // location, then the store is dead/noop.
13565
1.70k
      if (
ST1->getValue() == Value1.70k
) {
13566
303
        // The store is dead, remove it.
13567
303
        return Chain;
13568
303
      }
13569
1.40k
13570
1.40k
      // If this is a store who's preceeding store to the same location
13571
1.40k
      // and no one other node is chained to that store we can effectively
13572
1.40k
      // drop the store. Do not remove stores to undef as they may be used as
13573
1.40k
      // data sinks.
13574
1.40k
      
if (1.40k
OptLevel != CodeGenOpt::None && 1.40k
ST1->hasOneUse()1.31k
&&
13575
1.40k
          
!ST1->getBasePtr().isUndef()502
) {
13576
424
        // ST1 is fully overwritten and can be elided. Combine with it's chain
13577
424
        // value.
13578
424
        CombineTo(ST1, ST1->getChain());
13579
424
        return SDValue();
13580
424
      }
13581
8.43M
    }
13582
252k
  }
13583
8.43M
13584
8.43M
  // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
13585
8.43M
  // truncating store.  We can do this even if this is already a truncstore.
13586
8.43M
  
if (8.43M
(Value.getOpcode() == ISD::FP_ROUND || 8.43M
Value.getOpcode() == ISD::TRUNCATE8.43M
)
13587
8.43M
      && 
Value.getNode()->hasOneUse()99.8k
&&
ST->isUnindexed()69.6k
&&
13588
69.3k
      TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
13589
8.43M
                            ST->getMemoryVT())) {
13590
51.1k
    return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
13591
51.1k
                             Ptr, ST->getMemoryVT(), ST->getMemOperand());
13592
51.1k
  }
13593
8.38M
13594
8.38M
  // Always perform this optimization before types are legal. If the target
13595
8.38M
  // prefers, also try this after legalization to catch stores that were created
13596
8.38M
  // by intrinsics or other nodes.
13597
8.38M
  
if (8.38M
!LegalTypes || 8.38M
(TLI.mergeStoresAfterLegalization())2.93M
) {
13598
5.68M
    while (
true5.68M
) {
13599
5.68M
      // There can be multiple store sequences on the same chain.
13600
5.68M
      // Keep trying to merge store sequences until we are unable to do so
13601
5.68M
      // or until we merge the last store on the chain.
13602
5.68M
      bool Changed = MergeConsecutiveStores(ST);
13603
5.68M
      if (
!Changed5.68M
)
break5.61M
;
13604
74.7k
      // Return N as merge only uses CombineTo and no worklist clean
13605
74.7k
      // up is necessary.
13606
74.7k
      
if (74.7k
N->getOpcode() == ISD::DELETED_NODE || 74.7k
!isa<StoreSDNode>(N)22.0k
)
13607
61.0k
        return SDValue(N, 0);
13608
5.68M
    }
13609
5.67M
  }
13610
8.38M
13611
8.38M
  // Try transforming N to an indexed store.
13612
8.32M
  
if (8.32M
CombineToPreIndexedLoadStore(N) || 8.32M
CombineToPostIndexedLoadStore(N)8.31M
)
13613
23.5k
    return SDValue(N, 0);
13614
8.30M
13615
8.30M
  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
13616
8.30M
  //
13617
8.30M
  // Make sure to do this only after attempting to merge stores in order to
13618
8.30M
  //  avoid changing the types of some subset of stores due to visit order,
13619
8.30M
  //  preventing their merging.
13620
8.30M
  
if (8.30M
isa<ConstantFPSDNode>(ST->getValue())8.30M
) {
13621
8.46k
    if (SDValue NewSt = replaceStoreOfFPConstant(ST))
13622
8.20k
      return NewSt;
13623
8.29M
  }
13624
8.29M
13625
8.29M
  
if (SDValue 8.29M
NewSt8.29M
= splitMergedValStore(ST))
13626
0
    return NewSt;
13627
8.29M
13628
8.29M
  return ReduceLoadOpStoreWidth(N);
13629
8.29M
}
13630
13631
/// For the instruction sequence of store below, F and I values
13632
/// are bundled together as an i64 value before being stored into memory.
13633
/// Sometimes it is more efficent to generate separate stores for F and I,
13634
/// which can remove the bitwise instructions or sink them to colder places.
13635
///
13636
///   (store (or (zext (bitcast F to i32) to i64),
13637
///              (shl (zext I to i64), 32)), addr)  -->
13638
///   (store F, addr) and (store I, addr+4)
13639
///
13640
/// Similarly, splitting for other merged store can also be beneficial, like:
13641
/// For pair of {i32, i32}, i64 store --> two i32 stores.
13642
/// For pair of {i32, i16}, i64 store --> two i32 stores.
13643
/// For pair of {i16, i16}, i32 store --> two i16 stores.
13644
/// For pair of {i16, i8},  i32 store --> two i16 stores.
13645
/// For pair of {i8, i8},   i16 store --> two i8 stores.
13646
///
13647
/// We allow each target to determine specifically which kind of splitting is
13648
/// supported.
13649
///
13650
/// The store patterns are commonly seen from the simple code snippet below
13651
/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
13652
///   void goo(const std::pair<int, float> &);
13653
///   hoo() {
13654
///     ...
13655
///     goo(std::make_pair(tmp, ftmp));
13656
///     ...
13657
///   }
13658
///
13659
8.29M
SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
13660
8.29M
  if (OptLevel == CodeGenOpt::None)
13661
12.1k
    return SDValue();
13662
8.27M
13663
8.27M
  SDValue Val = ST->getValue();
13664
8.27M
  SDLoc DL(ST);
13665
8.27M
13666
8.27M
  // Match OR operand.
13667
8.27M
  if (
!Val.getValueType().isScalarInteger() || 8.27M
Val.getOpcode() != ISD::OR5.10M
)
13668
8.24M
    return SDValue();
13669
32.4k
13670
32.4k
  // Match SHL operand and get Lower and Higher parts of Val.
13671
32.4k
  SDValue Op1 = Val.getOperand(0);
13672
32.4k
  SDValue Op2 = Val.getOperand(1);
13673
32.4k
  SDValue Lo, Hi;
13674
32.4k
  if (
Op1.getOpcode() != ISD::SHL32.4k
) {
13675
28.6k
    std::swap(Op1, Op2);
13676
28.6k
    if (Op1.getOpcode() != ISD::SHL)
13677
24.4k
      return SDValue();
13678
7.99k
  }
13679
7.99k
  Lo = Op2;
13680
7.99k
  Hi = Op1.getOperand(0);
13681
7.99k
  if (!Op1.hasOneUse())
13682
250
    return SDValue();
13683
7.74k
13684
7.74k
  // Match shift amount to HalfValBitSize.
13685
7.74k
  unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
13686
7.74k
  ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
13687
7.74k
  if (
!ShAmt || 7.74k
ShAmt->getAPIntValue() != HalfValBitSize5.13k
)
13688
5.95k
    return SDValue();
13689
1.79k
13690
1.79k
  // Lo and Hi are zero-extended from int with size less equal than 32
13691
1.79k
  // to i64.
13692
1.79k
  
if (1.79k
Lo.getOpcode() != ISD::ZERO_EXTEND || 1.79k
!Lo.hasOneUse()645
||
13693
645
      !Lo.getOperand(0).getValueType().isScalarInteger() ||
13694
645
      Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
13695
1.79k
      
Hi.getOpcode() != ISD::ZERO_EXTEND645
||
!Hi.hasOneUse()145
||
13696
145
      !Hi.getOperand(0).getValueType().isScalarInteger() ||
13697
145
      Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
13698
1.65k
    return SDValue();
13699
145
13700
145
  // Use the EVT of low and high parts before bitcast as the input
13701
145
  // of target query.
13702
145
  EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
13703
0
                  ? Lo.getOperand(0).getValueType()
13704
145
                  : Lo.getValueType();
13705
145
  EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
13706
0
                   ? Hi.getOperand(0).getValueType()
13707
145
                   : Hi.getValueType();
13708
145
  if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
13709
145
    return SDValue();
13710
0
13711
0
  // Start to split store.
13712
0
  unsigned Alignment = ST->getAlignment();
13713
0
  MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13714
0
  AAMDNodes AAInfo = ST->getAAInfo();
13715
0
13716
0
  // Change the sizes of Lo and Hi's value types to HalfValBitSize.
13717
0
  EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
13718
0
  Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
13719
0
  Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
13720
0
13721
0
  SDValue Chain = ST->getChain();
13722
0
  SDValue Ptr = ST->getBasePtr();
13723
0
  // Lower value store.
13724
0
  SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13725
0
                             ST->getAlignment(), MMOFlags, AAInfo);
13726
0
  Ptr =
13727
0
      DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13728
0
                  DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
13729
0
  // Higher value store.
13730
0
  SDValue St1 =
13731
0
      DAG.getStore(St0, DL, Hi, Ptr,
13732
0
                   ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
13733
0
                   Alignment / 2, MMOFlags, AAInfo);
13734
0
  return St1;
13735
0
}
13736
13737
60.5k
SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
13738
60.5k
  SDValue InVec = N->getOperand(0);
13739
60.5k
  SDValue InVal = N->getOperand(1);
13740
60.5k
  SDValue EltNo = N->getOperand(2);
13741
60.5k
  SDLoc DL(N);
13742
60.5k
13743
60.5k
  // If the inserted element is an UNDEF, just use the input vector.
13744
60.5k
  if (InVal.isUndef())
13745
252
    return InVec;
13746
60.2k
13747
60.2k
  EVT VT = InVec.getValueType();
13748
60.2k
13749
60.2k
  // Remove redundant insertions:
13750
60.2k
  // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
13751
60.2k
  if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13752
60.2k
      
InVec == InVal.getOperand(0)17.8k
&&
EltNo == InVal.getOperand(1)27
)
13753
7
    return InVec;
13754
60.2k
13755
60.2k
  // Check that we know which element is being inserted
13756
60.2k
  
if (60.2k
!isa<ConstantSDNode>(EltNo)60.2k
)
13757
570
    return SDValue();
13758
59.7k
  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13759
59.7k
13760
59.7k
  // Canonicalize insert_vector_elt dag nodes.
13761
59.7k
  // Example:
13762
59.7k
  // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
13763
59.7k
  // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
13764
59.7k
  //
13765
59.7k
  // Do this only if the child insert_vector node has one use; also
13766
59.7k
  // do this only if indices are both constants and Idx1 < Idx0.
13767
59.7k
  if (
InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && 59.7k
InVec.hasOneUse()13.2k
13768
59.7k
      && 
isa<ConstantSDNode>(InVec.getOperand(2))13.1k
) {
13769
13.1k
    unsigned OtherElt = InVec.getConstantOperandVal(2);
13770
13.1k
    if (
Elt < OtherElt13.1k
) {
13771
206
      // Swap nodes.
13772
206
      SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
13773
206
                                  InVec.getOperand(0), InVal, EltNo);
13774
206
      AddToWorklist(NewOp.getNode());
13775
206
      return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
13776
206
                         VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
13777
206
    }
13778
59.4k
  }
13779
59.4k
13780
59.4k
  // If we can't generate a legal BUILD_VECTOR, exit
13781
59.4k
  
if (59.4k
LegalOperations && 59.4k
!TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)24.4k
)
13782
24.3k
    return SDValue();
13783
35.1k
13784
35.1k
  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
13785
35.1k
  // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
13786
35.1k
  // vector elements.
13787
35.1k
  SmallVector<SDValue, 8> Ops;
13788
35.1k
  // Do not combine these two vectors if the output vector will not replace
13789
35.1k
  // the input vector.
13790
35.1k
  if (
InVec.getOpcode() == ISD::BUILD_VECTOR && 35.1k
InVec.hasOneUse()12.3k
) {
13791
12.0k
    Ops.append(InVec.getNode()->op_begin(),
13792
12.0k
               InVec.getNode()->op_end());
13793
35.1k
  } else 
if (23.0k
InVec.isUndef()23.0k
) {
13794
9.57k
    unsigned NElts = VT.getVectorNumElements();
13795
9.57k
    Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
13796
23.0k
  } else {
13797
13.5k
    return SDValue();
13798
13.5k
  }
13799
21.6k
13800
21.6k
  // Insert the element
13801
21.6k
  
if (21.6k
Elt < Ops.size()21.6k
) {
13802
21.6k
    // All the operands of BUILD_VECTOR must have the same type;
13803
21.6k
    // we enforce that here.
13804
21.6k
    EVT OpVT = Ops[0].getValueType();
13805
21.6k
    Ops[Elt] = OpVT.isInteger() ? 
DAG.getAnyExtOrTrunc(InVal, DL, OpVT)12.2k
:
InVal9.33k
;
13806
21.6k
  }
13807
60.5k
13808
60.5k
  // Return the new vector
13809
60.5k
  return DAG.getBuildVector(VT, DL, Ops);
13810
60.5k
}
13811
13812
SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
13813
426
    SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
13814
426
  assert(!OriginalLoad->isVolatile());
13815
426
13816
426
  EVT ResultVT = EVE->getValueType(0);
13817
426
  EVT VecEltVT = InVecVT.getVectorElementType();
13818
426
  unsigned Align = OriginalLoad->getAlignment();
13819
426
  unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
13820
426
      VecEltVT.getTypeForEVT(*DAG.getContext()));
13821
426
13822
426
  if (
NewAlign > Align || 426
!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)424
)
13823
18
    return SDValue();
13824
408
13825
408
  ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
13826
408
    
ISD::NON_EXTLOAD0
:
ISD::EXTLOAD408
;
13827
408
  if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
13828
5
    return SDValue();
13829
403
13830
403
  Align = NewAlign;
13831
403
13832
403
  SDValue NewPtr = OriginalLoad->getBasePtr();
13833
403
  SDValue Offset;
13834
403
  EVT PtrType = NewPtr.getValueType();
13835
403
  MachinePointerInfo MPI;
13836
403
  SDLoc DL(EVE);
13837
403
  if (auto *
ConstEltNo403
= dyn_cast<ConstantSDNode>(EltNo)) {
13838
398
    int Elt = ConstEltNo->getZExtValue();
13839
398
    unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
13840
398
    Offset = DAG.getConstant(PtrOff, DL, PtrType);
13841
398
    MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
13842
403
  } else {
13843
5
    Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
13844
5
    Offset = DAG.getNode(
13845
5
        ISD::MUL, DL, PtrType, Offset,
13846
5
        DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
13847
5
    MPI = OriginalLoad->getPointerInfo();
13848
5
  }
13849
403
  NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
13850
403
13851
403
  // The replacement we need to do here is a little tricky: we need to
13852
403
  // replace an extractelement of a load with a load.
13853
403
  // Use ReplaceAllUsesOfValuesWith to do the replacement.
13854
403
  // Note that this replacement assumes that the extractvalue is the only
13855
403
  // use of the load; that's okay because we don't want to perform this
13856
403
  // transformation in other cases anyway.
13857
403
  SDValue Load;
13858
403
  SDValue Chain;
13859
403
  if (
ResultVT.bitsGT(VecEltVT)403
) {
13860
0
    // If the result type of vextract is wider than the load, then issue an
13861
0
    // extending load instead.
13862
0
    ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
13863
0
                                                  VecEltVT)
13864
0
                                   ? ISD::ZEXTLOAD
13865
0
                                   : ISD::EXTLOAD;
13866
0
    Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
13867
0
                          OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
13868
0
                          Align, OriginalLoad->getMemOperand()->getFlags(),
13869
0
                          OriginalLoad->getAAInfo());
13870
0
    Chain = Load.getValue(1);
13871
403
  } else {
13872
403
    Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
13873
403
                       MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
13874
403
                       OriginalLoad->getAAInfo());
13875
403
    Chain = Load.getValue(1);
13876
403
    if (ResultVT.bitsLT(VecEltVT))
13877
0
      Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
13878
403
    else
13879
403
      Load = DAG.getBitcast(ResultVT, Load);
13880
403
  }
13881
426
  WorklistRemover DeadNodes(*this);
13882
426
  SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
13883
426
  SDValue To[] = { Load, Chain };
13884
426
  DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
13885
426
  // Since we're explicitly calling ReplaceAllUses, add the new node to the
13886
426
  // worklist explicitly as well.
13887
426
  AddToWorklist(Load.getNode());
13888
426
  AddUsersToWorklist(Load.getNode()); // Add users too
13889
426
  // Make sure to revisit this node to clean it up; it will usually be dead.
13890
426
  AddToWorklist(EVE);
13891
426
  ++OpsNarrowed;
13892
426
  return SDValue(EVE, 0);
13893
426
}
13894
13895
349k
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
13896
349k
  // (vextract (scalar_to_vector val, 0) -> val
13897
349k
  SDValue InVec = N->getOperand(0);
13898
349k
  EVT VT = InVec.getValueType();
13899
349k
  EVT NVT = N->getValueType(0);
13900
349k
13901
349k
  if (InVec.isUndef())
13902
66
    return DAG.getUNDEF(NVT);
13903
349k
13904
349k
  
if (349k
InVec.getOpcode() == ISD::SCALAR_TO_VECTOR349k
) {
13905
335
    // Check if the result type doesn't match the inserted element type. A
13906
335
    // SCALAR_TO_VECTOR may truncate the inserted element and the
13907
335
    // EXTRACT_VECTOR_ELT may widen the extracted vector.
13908
335
    SDValue InOp = InVec.getOperand(0);
13909
335
    if (
InOp.getValueType() != NVT335
) {
13910
2
      assert(InOp.getValueType().isInteger() && NVT.isInteger());
13911
2
      return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
13912
2
    }
13913
333
    return InOp;
13914
333
  }
13915
349k
13916
349k
  SDValue EltNo = N->getOperand(1);
13917
349k
  ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
13918
349k
13919
349k
  // extract_vector_elt (build_vector x, y), 1 -> y
13920
349k
  if (ConstEltNo &&
13921
347k
      InVec.getOpcode() == ISD::BUILD_VECTOR &&
13922
31.0k
      TLI.isTypeLegal(VT) &&
13923
30.7k
      (InVec.hasOneUse() ||
13924
349k
       
TLI.aggressivelyPreferBuildVectorSources(VT)24.4k
)) {
13925
29.8k
    SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
13926
29.8k
    EVT InEltVT = Elt.getValueType();
13927
29.8k
13928
29.8k
    // Sometimes build_vector's scalar input types do not match result type.
13929
29.8k
    if (NVT == InEltVT)
13930
29.8k
      return Elt;
13931
319k
13932
319k
    // TODO: It may be useful to truncate if free if the build_vector implicitly
13933
319k
    // converts.
13934
319k
  }
13935
319k
13936
319k
  // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
13937
319k
  bool isLE = DAG.getDataLayout().isLittleEndian();
13938
319k
  unsigned EltTrunc = isLE ? 
0311k
:
VT.getVectorNumElements() - 17.96k
;
13939
319k
  if (
ConstEltNo && 319k
InVec.getOpcode() == ISD::BITCAST317k
&&
InVec.hasOneUse()62.2k
&&
13940
319k
      
ConstEltNo->getZExtValue() == EltTrunc15.5k
&&
VT.isInteger()8.05k
) {
13941
6.00k
    SDValue BCSrc = InVec.getOperand(0);
13942
6.00k
    if (BCSrc.getValueType().isScalarInteger())
13943
727
      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
13944
318k
  }
13945
318k
13946
318k
  // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
13947
318k
  //
13948
318k
  // This only really matters if the index is non-constant since other combines
13949
318k
  // on the constant elements already work.
13950
318k
  
if (318k
InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
13951
318k
      
EltNo == InVec.getOperand(2)1.25k
) {
13952
74
    SDValue Elt = InVec.getOperand(1);
13953
74
    return VT.isInteger() ? 
DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT)72
:
Elt2
;
13954
74
  }
13955
318k
13956
318k
  // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
13957
318k
  // We only perform this optimization before the op legalization phase because
13958
318k
  // we may introduce new vector instructions which are not backed by TD
13959
318k
  // patterns. For example on AVX, extracting elements from a wide vector
13960
318k
  // without using extract_subvector. However, if we can find an underlying
13961
318k
  // scalar value, then we can always use that.
13962
318k
  
if (318k
ConstEltNo && 318k
InVec.getOpcode() == ISD::VECTOR_SHUFFLE316k
) {
13963
1.46k
    int NumElem = VT.getVectorNumElements();
13964
1.46k
    ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
13965
1.46k
    // Find the new index to extract from.
13966
1.46k
    int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
13967
1.46k
13968
1.46k
    // Extracting an undef index is undef.
13969
1.46k
    if (OrigElt == -1)
13970
49
      return DAG.getUNDEF(NVT);
13971
1.41k
13972
1.41k
    // Select the right vector half to extract from.
13973
1.41k
    SDValue SVInVec;
13974
1.41k
    if (
OrigElt < NumElem1.41k
) {
13975
776
      SVInVec = InVec->getOperand(0);
13976
1.41k
    } else {
13977
636
      SVInVec = InVec->getOperand(1);
13978
636
      OrigElt -= NumElem;
13979
636
    }
13980
1.41k
13981
1.41k
    if (
SVInVec.getOpcode() == ISD::BUILD_VECTOR1.41k
) {
13982
131
      SDValue InOp = SVInVec.getOperand(OrigElt);
13983
131
      if (
InOp.getValueType() != NVT131
) {
13984
0
        assert(InOp.getValueType().isInteger() && NVT.isInteger());
13985
0
        InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
13986
0
      }
13987
131
13988
131
      return InOp;
13989
131
    }
13990
1.28k
13991
1.28k
    // FIXME: We should handle recursing on other vector shuffles and
13992
1.28k
    // scalar_to_vector here as well.
13993
1.28k
13994
1.28k
    
if (1.28k
!LegalOperations ||
13995
1.28k
        // FIXME: Should really be just isOperationLegalOrCustom.
13996
992
        TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
13997
1.28k
        
TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)992
) {
13998
301
      EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
13999
301
      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
14000
301
                         DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
14001
301
    }
14002
318k
  }
14003
318k
14004
318k
  bool BCNumEltsChanged = false;
14005
318k
  EVT ExtVT = VT.getVectorElementType();
14006
318k
  EVT LVT = ExtVT;
14007
318k
14008
318k
  // If the result of load has to be truncated, then it's not necessarily
14009
318k
  // profitable.
14010
318k
  if (
NVT.bitsLT(LVT) && 318k
!TLI.isTruncateFree(LVT, NVT)0
)
14011
0
    return SDValue();
14012
318k
14013
318k
  
if (318k
InVec.getOpcode() == ISD::BITCAST318k
) {
14014
61.5k
    // Don't duplicate a load with other uses.
14015
61.5k
    if (!InVec.hasOneUse())
14016
46.7k
      return SDValue();
14017
14.8k
14018
14.8k
    EVT BCVT = InVec.getOperand(0).getValueType();
14019
14.8k
    if (
!BCVT.isVector() || 14.8k
ExtVT.bitsGT(BCVT.getVectorElementType())9.27k
)
14020
10.3k
      return SDValue();
14021
4.46k
    
if (4.46k
VT.getVectorNumElements() != BCVT.getVectorNumElements()4.46k
)
14022
3.61k
      BCNumEltsChanged = true;
14023
61.5k
    InVec = InVec.getOperand(0);
14024
61.5k
    ExtVT = BCVT.getVectorElementType();
14025
61.5k
  }
14026
318k
14027
318k
  // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
14028
261k
  
if (261k
!LegalOperations && 261k
!ConstEltNo88.9k
&&
InVec.hasOneUse()1.48k
&&
14029
679
      ISD::isNormalLoad(InVec.getNode()) &&
14030
261k
      
!N->getOperand(1)->hasPredecessor(InVec.getNode())41
) {
14031
41
    SDValue Index = N->getOperand(1);
14032
41
    if (LoadSDNode *
OrigLoad41
= dyn_cast<LoadSDNode>(InVec)) {
14033
41
      if (
!OrigLoad->isVolatile()41
) {
14034
23
        return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
14035
23
                                                             OrigLoad);
14036
23
      }
14037
261k
    }
14038
41
  }
14039
261k
14040
261k
  // Perform only after legalization to ensure build_vector / vector_shuffle
14041
261k
  // optimizations have already been done.
14042
261k
  
if (261k
!LegalOperations261k
)
return SDValue()88.9k
;
14043
172k
14044
172k
  // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
14045
172k
  // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
14046
172k
  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
14047
172k
14048
172k
  
if (172k
ConstEltNo172k
) {
14049
171k
    int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
14050
171k
14051
171k
    LoadSDNode *LN0 = nullptr;
14052
171k
    const ShuffleVectorSDNode *SVN = nullptr;
14053
171k
    if (
ISD::isNormalLoad(InVec.getNode())171k
) {
14054
56.8k
      LN0 = cast<LoadSDNode>(InVec);
14055
171k
    } else 
if (115k
InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
14056
16
               InVec.getOperand(0).getValueType() == ExtVT &&
14057
115k
               
ISD::isNormalLoad(InVec.getOperand(0).getNode())16
) {
14058
6
      // Don't duplicate a load with other uses.
14059
6
      if (!InVec.hasOneUse())
14060
5
        return SDValue();
14061
1
14062
1
      LN0 = cast<LoadSDNode>(InVec.getOperand(0));
14063
115k
    } else 
if (115k
(SVN = dyn_cast<ShuffleVectorSDNode>(InVec))115k
) {
14064
982
      // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
14065
982
      // =>
14066
982
      // (load $addr+1*size)
14067
982
14068
982
      // Don't duplicate a load with other uses.
14069
982
      if (!InVec.hasOneUse())
14070
933
        return SDValue();
14071
49
14072
49
      // If the bit convert changed the number of elements, it is unsafe
14073
49
      // to examine the mask.
14074
49
      
if (49
BCNumEltsChanged49
)
14075
1
        return SDValue();
14076
48
14077
48
      // Select the input vector, guarding against out of range extract vector.
14078
48
      unsigned NumElems = VT.getVectorNumElements();
14079
48
      int Idx = (Elt > (int)NumElems) ? 
-10
:
SVN->getMaskElt(Elt)48
;
14080
48
      InVec = (Idx < (int)NumElems) ? 
InVec.getOperand(0)43
:
InVec.getOperand(1)5
;
14081
48
14082
48
      if (
InVec.getOpcode() == ISD::BITCAST48
) {
14083
38
        // Don't duplicate a load with other uses.
14084
38
        if (!InVec.hasOneUse())
14085
1
          return SDValue();
14086
37
14087
37
        InVec = InVec.getOperand(0);
14088
37
      }
14089
47
      
if (47
ISD::isNormalLoad(InVec.getNode())47
) {
14090
11
        LN0 = cast<LoadSDNode>(InVec);
14091
11
        Elt = (Idx < (int)NumElems) ? 
Idx8
:
Idx - (int)NumElems3
;
14092
11
        EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
14093
11
      }
14094
115k
    }
14095
171k
14096
171k
    // Make sure we found a non-volatile load and the extractelement is
14097
171k
    // the only use.
14098
171k
    
if (171k
!LN0 || 171k
!LN0->hasNUsesOfValue(1,0)56.8k
||
LN0->isVolatile()411
)
14099
170k
      return SDValue();
14100
403
14101
403
    // If Idx was -1 above, Elt is going to be -1, so just return undef.
14102
403
    
if (403
Elt == -1403
)
14103
0
      return DAG.getUNDEF(LVT);
14104
403
14105
403
    return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
14106
403
  }
14107
188
14108
188
  return SDValue();
14109
188
}
14110
14111
// Simplify (build_vec (ext )) to (bitcast (build_vec ))
14112
366k
SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
14113
366k
  // We perform this optimization post type-legalization because
14114
366k
  // the type-legalizer often scalarizes integer-promoted vectors.
14115
366k
  // Performing this optimization before may create bit-casts which
14116
366k
  // will be type-legalized to complex code sequences.
14117
366k
  // We perform this optimization only before the operation legalizer because we
14118
366k
  // may introduce illegal operations.
14119
366k
  if (
Level != AfterLegalizeVectorOps && 366k
Level != AfterLegalizeTypes313k
)
14120
226k
    return SDValue();
14121
140k
14122
140k
  unsigned NumInScalars = N->getNumOperands();
14123
140k
  SDLoc DL(N);
14124
140k
  EVT VT = N->getValueType(0);
14125
140k
14126
140k
  // Check to see if this is a BUILD_VECTOR of a bunch of values
14127
140k
  // which come from any_extend or zero_extend nodes. If so, we can create
14128
140k
  // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
14129
140k
  // optimizations. We do not handle sign-extend because we can't fill the sign
14130
140k
  // using shuffles.
14131
140k
  EVT SourceType = MVT::Other;
14132
140k
  bool AllAnyExt = true;
14133
140k
14134
146k
  for (unsigned i = 0; 
i != NumInScalars146k
;
++i6.57k
) {
14135
144k
    SDValue In = N->getOperand(i);
14136
144k
    // Ignore undef inputs.
14137
144k
    if (
In.isUndef()144k
)
continue543
;
14138
144k
14139
144k
    bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
14140
144k
    bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
14141
144k
14142
144k
    // Abort if the element is not an extension.
14143
144k
    if (
!ZeroExt && 144k
!AnyExt140k
) {
14144
137k
      SourceType = MVT::Other;
14145
137k
      break;
14146
137k
    }
14147
6.19k
14148
6.19k
    // The input is a ZeroExt or AnyExt. Check the original type.
14149
6.19k
    EVT InTy = In.getOperand(0).getValueType();
14150
6.19k
14151
6.19k
    // Check that all of the widened source types are the same.
14152
6.19k
    if (SourceType == MVT::Other)
14153
6.19k
      // First time.
14154
2.77k
      SourceType = InTy;
14155
3.42k
    else 
if (3.42k
InTy != SourceType3.42k
) {
14156
168
      // Multiple income types. Abort.
14157
168
      SourceType = MVT::Other;
14158
168
      break;
14159
168
    }
14160
6.02k
14161
6.02k
    // Check if all of the extends are ANY_EXTENDs.
14162
6.02k
    AllAnyExt &= AnyExt;
14163
6.02k
  }
14164
140k
14165
140k
  // In order to have valid types, all of the inputs must be extended from the
14166
140k
  // same source type and all of the inputs must be any or zero extend.
14167
140k
  // Scalar sizes must be a power of two.
14168
140k
  EVT OutScalarTy = VT.getScalarType();
14169
140k
  bool ValidTypes = SourceType != MVT::Other &&
14170
1.94k
                 isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
14171
1.94k
                 isPowerOf2_32(SourceType.getSizeInBits());
14172
140k
14173
140k
  // Create a new simpler BUILD_VECTOR sequence which other optimizations can
14174
140k
  // turn into a single shuffle instruction.
14175
140k
  if (!ValidTypes)
14176
138k
    return SDValue();
14177
1.94k
14178
1.94k
  bool isLE = DAG.getDataLayout().isLittleEndian();
14179
1.94k
  unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
14180
1.94k
  assert(ElemRatio > 1 && "Invalid element size ratio");
14181
722
  SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
14182
1.22k
                               DAG.getConstant(0, DL, SourceType);
14183
1.94k
14184
1.94k
  unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
14185
1.94k
  SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
14186
1.94k
14187
1.94k
  // Populate the new build_vector
14188
7.17k
  for (unsigned i = 0, e = N->getNumOperands(); 
i != e7.17k
;
++i5.23k
) {
14189
5.23k
    SDValue Cast = N->getOperand(i);
14190
5.23k
    assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
14191
5.23k
            Cast.getOpcode() == ISD::ZERO_EXTEND ||
14192
5.23k
            Cast.isUndef()) && "Invalid cast opcode");
14193
5.23k
    SDValue In;
14194
5.23k
    if (Cast.isUndef())
14195
107
      In = DAG.getUNDEF(SourceType);
14196
5.23k
    else
14197
5.12k
      In = Cast->getOperand(0);
14198
5.21k
    unsigned Index = isLE ? (i * ElemRatio) :
14199
20
                            (i * ElemRatio + (ElemRatio - 1));
14200
5.23k
14201
5.23k
    assert(Index < Ops.size() && "Invalid index");
14202
5.23k
    Ops[Index] = In;
14203
5.23k
  }
14204
1.94k
14205
1.94k
  // The type of the new BUILD_VECTOR node.
14206
1.94k
  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
14207
1.94k
  assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
14208
1.94k
         "Invalid vector size");
14209
1.94k
  // Check if the new vector type is legal.
14210
1.94k
  if (
!isTypeLegal(VecVT)1.94k
)
return SDValue()968
;
14211
979
14212
979
  // Make the new BUILD_VECTOR.
14213
979
  SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
14214
979
14215
979
  // The new BUILD_VECTOR node has the potential to be further optimized.
14216
979
  AddToWorklist(BV.getNode());
14217
979
  // Bitcast to the desired type.
14218
979
  return DAG.getBitcast(VT, BV);
14219
979
}
14220
14221
365k
SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
14222
365k
  EVT VT = N->getValueType(0);
14223
365k
14224
365k
  unsigned NumInScalars = N->getNumOperands();
14225
365k
  SDLoc DL(N);
14226
365k
14227
365k
  EVT SrcVT = MVT::Other;
14228
365k
  unsigned Opcode = ISD::DELETED_NODE;
14229
365k
  unsigned NumDefs = 0;
14230
365k
14231
371k
  for (unsigned i = 0; 
i != NumInScalars371k
;
++i6.20k
) {
14232
370k
    SDValue In = N->getOperand(i);
14233
370k
    unsigned Opc = In.getOpcode();
14234
370k
14235
370k
    if (Opc == ISD::UNDEF)
14236
3.55k
      continue;
14237
367k
14238
367k
    // If all scalar values are floats and converted from integers.
14239
367k
    
if (367k
Opcode == ISD::DELETED_NODE &&
14240
367k
        
(Opc == ISD::UINT_TO_FP || 365k
Opc == ISD::SINT_TO_FP364k
)) {
14241
952
      Opcode = Opc;
14242
952
    }
14243
367k
14244
367k
    if (Opc != Opcode)
14245
364k
      return SDValue();
14246
2.65k
14247
2.65k
    EVT InVT = In.getOperand(0).getValueType();
14248
2.65k
14249
2.65k
    // If all scalar values are typed differently, bail out. It's chosen to
14250
2.65k
    // simplify BUILD_VECTOR of integer types.
14251
2.65k
    if (SrcVT == MVT::Other)
14252
952
      SrcVT = InVT;
14253
2.65k
    if (SrcVT != InVT)
14254
0
      return SDValue();
14255
2.65k
    NumDefs++;
14256
2.65k
  }
14257
365k
14258
365k
  // If the vector has just one element defined, it's not worth to fold it into
14259
365k
  // a vectorized one.
14260
905
  
if (905
NumDefs < 2905
)
14261
115
    return SDValue();
14262
790
14263
905
  assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
14264
790
         && "Should only handle conversion from integer to float.");
14265
790
  assert(SrcVT != MVT::Other && "Cannot determine source type!");
14266
790
14267
790
  EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
14268
790
14269
790
  if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
14270
574
    return SDValue();
14271
216
14272
216
  // Just because the floating-point vector type is legal does not necessarily
14273
216
  // mean that the corresponding integer vector type is.
14274
216
  
if (216
!isTypeLegal(NVT)216
)
14275
0
    return SDValue();
14276
216
14277
216
  SmallVector<SDValue, 8> Opnds;
14278
696
  for (unsigned i = 0; 
i != NumInScalars696
;
++i480
) {
14279
480
    SDValue In = N->getOperand(i);
14280
480
14281
480
    if (In.isUndef())
14282
12
      Opnds.push_back(DAG.getUNDEF(SrcVT));
14283
480
    else
14284
468
      Opnds.push_back(In.getOperand(0));
14285
480
  }
14286
365k
  SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
14287
365k
  AddToWorklist(BV.getNode());
14288
365k
14289
365k
  return DAG.getNode(Opcode, DL, VT, BV);
14290
365k
}
14291
14292
SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
14293
                                           ArrayRef<int> VectorMask,
14294
                                           SDValue VecIn1, SDValue VecIn2,
14295
5.91k
                                           unsigned LeftIdx) {
14296
5.91k
  MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
14297
5.91k
  SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
14298
5.91k
14299
5.91k
  EVT VT = N->getValueType(0);
14300
5.91k
  EVT InVT1 = VecIn1.getValueType();
14301
5.91k
  EVT InVT2 = VecIn2.getNode() ? 
VecIn2.getValueType()1.32k
:
InVT14.59k
;
14302
5.91k
14303
5.91k
  unsigned Vec2Offset = 0;
14304
5.91k
  unsigned NumElems = VT.getVectorNumElements();
14305
5.91k
  unsigned ShuffleNumElems = NumElems;
14306
5.91k
14307
5.91k
  // In case both the input vectors are extracted from same base
14308
5.91k
  // vector we do not need extra addend (Vec2Offset) while
14309
5.91k
  // computing shuffle mask.
14310
5.91k
  if (
!VecIn2 || 5.91k
!(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR)1.32k
||
14311
135
      !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
14312
132
      !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
14313
5.78k
    Vec2Offset = InVT1.getVectorNumElements();
14314
5.91k
14315
5.91k
  // We can't generate a shuffle node with mismatched input and output types.
14316
5.91k
  // Try to make the types match the type of the output.
14317
5.91k
  if (
InVT1 != VT || 5.91k
InVT2 != VT3.77k
) {
14318
2.15k
    if (
(VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && 2.15k
InVT1 == InVT2996
) {
14319
985
      // If the output vector length is a multiple of both input lengths,
14320
985
      // we can concatenate them and pad the rest with undefs.
14321
985
      unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
14322
985
      assert(NumConcats >= 2 && "Concat needs at least two inputs!");
14323
985
      SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
14324
985
      ConcatOps[0] = VecIn1;
14325
985
      ConcatOps[1] = VecIn2 ? 
VecIn2105
:
DAG.getUNDEF(InVT1)880
;
14326
985
      VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14327
985
      VecIn2 = SDValue();
14328
2.15k
    } else 
if (1.17k
InVT1.getSizeInBits() == VT.getSizeInBits() * 21.17k
) {
14329
824
      if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
14330
317
        return SDValue();
14331
507
14332
507
      
if (507
!VecIn2.getNode()507
) {
14333
367
        // If we only have one input vector, and it's twice the size of the
14334
367
        // output, split it in two.
14335
367
        VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
14336
367
                             DAG.getConstant(NumElems, DL, IdxTy));
14337
367
        VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
14338
367
        // Since we now have shorter input vectors, adjust the offset of the
14339
367
        // second vector's start.
14340
367
        Vec2Offset = NumElems;
14341
507
      } else 
if (140
InVT2.getSizeInBits() <= InVT1.getSizeInBits()140
) {
14342
138
        // VecIn1 is wider than the output, and we have another, possibly
14343
138
        // smaller input. Pad the smaller input with undefs, shuffle at the
14344
138
        // input vector width, and extract the output.
14345
138
        // The shuffle type is different than VT, so check legality again.
14346
138
        if (LegalOperations &&
14347
0
            !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
14348
0
          return SDValue();
14349
138
14350
138
        // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
14351
138
        // lower it back into a BUILD_VECTOR. So if the inserted type is
14352
138
        // illegal, don't even try.
14353
138
        
if (138
InVT1 != InVT2138
) {
14354
9
          if (!TLI.isTypeLegal(InVT2))
14355
1
            return SDValue();
14356
8
          VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
14357
8
                               DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
14358
8
        }
14359
137
        ShuffleNumElems = NumElems * 2;
14360
140
      } else {
14361
2
        // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
14362
2
        // than VecIn1. We can't handle this for now - this case will disappear
14363
2
        // when we start sorting the vectors by type.
14364
2
        return SDValue();
14365
2
      }
14366
346
    } else 
if (346
InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
14367
346
               
InVT1.getSizeInBits() == VT.getSizeInBits()6
) {
14368
6
      SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
14369
6
      ConcatOps[0] = VecIn2;
14370
6
      VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14371
346
    } else {
14372
340
      // TODO: Support cases where the length mismatch isn't exactly by a
14373
340
      // factor of 2.
14374
340
      // TODO: Move this check upwards, so that if we have bad type
14375
340
      // mismatches, we don't create any DAG nodes.
14376
340
      return SDValue();
14377
340
    }
14378
5.25k
  }
14379
5.25k
14380
5.25k
  // Initialize mask to undef.
14381
5.25k
  SmallVector<int, 8> Mask(ShuffleNumElems, -1);
14382
5.25k
14383
5.25k
  // Only need to run up to the number of elements actually used, not the
14384
5.25k
  // total number of elements in the shuffle - if we are shuffling a wider
14385
5.25k
  // vector, the high lanes should be set to undef.
14386
56.1k
  for (unsigned i = 0; 
i != NumElems56.1k
;
++i50.9k
) {
14387
50.9k
    if (VectorMask[i] <= 0)
14388
26.8k
      continue;
14389
24.0k
14390
24.0k
    unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
14391
24.0k
    if (
VectorMask[i] == (int)LeftIdx24.0k
) {
14392
18.1k
      Mask[i] = ExtIndex;
14393
24.0k
    } else 
if (5.95k
VectorMask[i] == (int)LeftIdx + 15.95k
) {
14394
3.88k
      Mask[i] = Vec2Offset + ExtIndex;
14395
3.88k
    }
14396
50.9k
  }
14397
5.25k
14398
5.25k
  // The type the input vectors may have changed above.
14399
5.25k
  InVT1 = VecIn1.getValueType();
14400
5.25k
14401
5.25k
  // If we already have a VecIn2, it should have the same type as VecIn1.
14402
5.25k
  // If we don't, get an undef/zero vector of the appropriate type.
14403
5.25k
  VecIn2 = VecIn2.getNode() ? 
VecIn21.49k
:
DAG.getUNDEF(InVT1)3.75k
;
14404
5.25k
  assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
14405
5.25k
14406
5.25k
  SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
14407
5.25k
  if (ShuffleNumElems > NumElems)
14408
137
    Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
14409
5.91k
14410
5.91k
  return Shuffle;
14411
5.91k
}
14412
14413
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
14414
// operations. If the types of the vectors we're extracting from allow it,
14415
// turn this into a vector_shuffle node.
14416
364k
SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
14417
364k
  SDLoc DL(N);
14418
364k
  EVT VT = N->getValueType(0);
14419
364k
14420
364k
  // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
14421
364k
  if (!isTypeLegal(VT))
14422
0
    return SDValue();
14423
364k
14424
364k
  // May only combine to shuffle after legalize if shuffle is legal.
14425
364k
  
if (364k
LegalOperations && 364k
!TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)140k
)
14426
140k
    return SDValue();
14427
224k
14428
224k
  bool UsesZeroVector = false;
14429
224k
  unsigned NumElems = N->getNumOperands();
14430
224k
14431
224k
  // Record, for each element of the newly built vector, which input vector
14432
224k
  // that element comes from. -1 stands for undef, 0 for the zero vector,
14433
224k
  // and positive values for the input vectors.
14434
224k
  // VectorMask maps each element to its vector number, and VecIn maps vector
14435
224k
  // numbers to their initial SDValues.
14436
224k
14437
224k
  SmallVector<int, 8> VectorMask(NumElems, -1);
14438
224k
  SmallVector<SDValue, 8> VecIn;
14439
224k
  VecIn.push_back(SDValue());
14440
224k
14441
464k
  for (unsigned i = 0; 
i != NumElems464k
;
++i239k
) {
14442
432k
    SDValue Op = N->getOperand(i);
14443
432k
14444
432k
    if (Op.isUndef())
14445
26.3k
      continue;
14446
405k
14447
405k
    // See if we can use a blend with a zero vector.
14448
405k
    // TODO: Should we generalize this to a blend with an arbitrary constant
14449
405k
    // vector?
14450
405k
    
if (405k
isNullConstant(Op) || 405k
isNullFPConstant(Op)235k
) {
14451
187k
      UsesZeroVector = true;
14452
187k
      VectorMask[i] = 0;
14453
187k
      continue;
14454
187k
    }
14455
218k
14456
218k
    // Not an undef or zero. If the input is something other than an
14457
218k
    // EXTRACT_VECTOR_ELT with a constant index, bail out.
14458
218k
    
if (218k
Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14459
41.9k
        !isa<ConstantSDNode>(Op.getOperand(1)))
14460
176k
      return SDValue();
14461
41.8k
    SDValue ExtractedFromVec = Op.getOperand(0);
14462
41.8k
14463
41.8k
    // All inputs must have the same element type as the output.
14464
41.8k
    if (VT.getVectorElementType() !=
14465
41.8k
        ExtractedFromVec.getValueType().getVectorElementType())
14466
16.4k
      return SDValue();
14467
25.3k
14468
25.3k
    // Have we seen this input vector before?
14469
25.3k
    // The vectors are expected to be tiny (usually 1 or 2 elements), so using
14470
25.3k
    // a map back from SDValues to numbers isn't worth it.
14471
25.3k
    unsigned Idx = std::distance(
14472
25.3k
        VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
14473
25.3k
    if (Idx == VecIn.size())
14474
7.37k
      VecIn.push_back(ExtractedFromVec);
14475
432k
14476
432k
    VectorMask[i] = Idx;
14477
432k
  }
14478
224k
14479
224k
  // If we didn't find at least one input vector, bail out.
14480
31.9k
  
if (31.9k
VecIn.size() < 231.9k
)
14481
26.1k
    return SDValue();
14482
5.77k
14483
5.77k
  // If all the Operands of BUILD_VECTOR extract from same
14484
5.77k
  // vector, then split the vector efficiently based on the maximum
14485
5.77k
  // vector access index and adjust the VectorMask and
14486
5.77k
  // VecIn accordingly.
14487
5.77k
  
if (5.77k
VecIn.size() == 25.77k
) {
14488
4.62k
    unsigned MaxIndex = 0;
14489
4.62k
    unsigned NearestPow2 = 0;
14490
4.62k
    SDValue Vec = VecIn.back();
14491
4.62k
    EVT InVT = Vec.getValueType();
14492
4.62k
    MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
14493
4.62k
    SmallVector<unsigned, 8> IndexVec(NumElems, 0);
14494
4.62k
14495
48.3k
    for (unsigned i = 0; 
i < NumElems48.3k
;
i++43.7k
) {
14496
43.7k
      if (VectorMask[i] <= 0)
14497
26.5k
        continue;
14498
17.1k
      unsigned Index = N->getOperand(i).getConstantOperandVal(1);
14499
17.1k
      IndexVec[i] = Index;
14500
17.1k
      MaxIndex = std::max(MaxIndex, Index);
14501
17.1k
    }
14502
4.62k
14503
4.62k
    NearestPow2 = PowerOf2Ceil(MaxIndex);
14504
4.62k
    if (
InVT.isSimple() && 4.62k
NearestPow2 > 24.41k
&&
MaxIndex < NearestPow22.55k
&&
14505
4.62k
        
NumElems * 2 < NearestPow22.34k
) {
14506
180
      unsigned SplitSize = NearestPow2 / 2;
14507
180
      EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
14508
180
                                     InVT.getVectorElementType(), SplitSize);
14509
180
      if (
TLI.isTypeLegal(SplitVT)180
) {
14510
132
        SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
14511
132
                                     DAG.getConstant(SplitSize, DL, IdxTy));
14512
132
        SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
14513
132
                                     DAG.getConstant(0, DL, IdxTy));
14514
132
        VecIn.pop_back();
14515
132
        VecIn.push_back(VecIn1);
14516
132
        VecIn.push_back(VecIn2);
14517
132
14518
1.08k
        for (unsigned i = 0; 
i < NumElems1.08k
;
i++948
) {
14519
948
          if (VectorMask[i] <= 0)
14520
16
            continue;
14521
932
          
VectorMask[i] = (IndexVec[i] < SplitSize) ? 932
1466
:
2466
;
14522
948
        }
14523
132
      }
14524
180
    }
14525
4.62k
  }
14526
5.77k
14527
5.77k
  // TODO: We want to sort the vectors by descending length, so that adjacent
14528
5.77k
  // pairs have similar length, and the longer vector is always first in the
14529
5.77k
  // pair.
14530
5.77k
14531
5.77k
  // TODO: Should this fire if some of the input vectors has illegal type (like
14532
5.77k
  // it does now), or should we let legalization run its course first?
14533
5.77k
14534
5.77k
  // Shuffle phase:
14535
5.77k
  // Take pairs of vectors, and shuffle them so that the result has elements
14536
5.77k
  // from these vectors in the correct places.
14537
5.77k
  // For example, given:
14538
5.77k
  // t10: i32 = extract_vector_elt t1, Constant:i64<0>
14539
5.77k
  // t11: i32 = extract_vector_elt t2, Constant:i64<0>
14540
5.77k
  // t12: i32 = extract_vector_elt t3, Constant:i64<0>
14541
5.77k
  // t13: i32 = extract_vector_elt t1, Constant:i64<1>
14542
5.77k
  // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
14543
5.77k
  // We will generate:
14544
5.77k
  // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
14545
5.77k
  // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
14546
5.77k
  SmallVector<SDValue, 4> Shuffles;
14547
11.0k
  for (unsigned In = 0, Len = (VecIn.size() / 2); 
In < Len11.0k
;
++In5.25k
) {
14548
5.91k
    unsigned LeftIdx = 2 * In + 1;
14549
5.91k
    SDValue VecLeft = VecIn[LeftIdx];
14550
5.91k
    SDValue VecRight =
14551
5.91k
        (LeftIdx + 1) < VecIn.size() ? 
VecIn[LeftIdx + 1]1.32k
:
SDValue()4.59k
;
14552
5.91k
14553
5.91k
    if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
14554
5.91k
                                                VecRight, LeftIdx))
14555
5.25k
      Shuffles.push_back(Shuffle);
14556
5.91k
    else
14557
660
      return SDValue();
14558
5.91k
  }
14559
5.77k
14560
5.77k
  // If we need the zero vector as an "ingredient" in the blend tree, add it
14561
5.77k
  // to the list of shuffles.
14562
5.11k
  
if (5.11k
UsesZeroVector5.11k
)
14563
592
    
Shuffles.push_back(VT.isInteger() ? 592
DAG.getConstant(0, DL, VT)507
14564
592
                                      : DAG.getConstantFP(0.0, DL, VT));
14565
5.11k
14566
5.11k
  // If we only have one shuffle, we're done.
14567
5.11k
  if (Shuffles.size() == 1)
14568
4.38k
    return Shuffles[0];
14569
729
14570
729
  // Update the vector mask to point to the post-shuffle vectors.
14571
729
  for (int &Vec : VectorMask)
14572
10.4k
    
if (10.4k
Vec == 010.4k
)
14573
6.57k
      Vec = Shuffles.size() - 1;
14574
10.4k
    else
14575
3.88k
      Vec = (Vec - 1) / 2;
14576
729
14577
729
  // More than one shuffle. Generate a binary tree of blends, e.g. if from
14578
729
  // the previous step we got the set of shuffles t10, t11, t12, t13, we will
14579
729
  // generate:
14580
729
  // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
14581
729
  // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
14582
729
  // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
14583
729
  // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
14584
729
  // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
14585
729
  // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
14586
729
  // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
14587
729
14588
729
  // Make sure the initial size of the shuffle list is even.
14589
729
  if (Shuffles.size() % 2)
14590
2
    Shuffles.push_back(DAG.getUNDEF(VT));
14591
729
14592
1.46k
  for (unsigned CurSize = Shuffles.size(); 
CurSize > 11.46k
;
CurSize /= 2731
) {
14593
731
    if (
CurSize % 2731
) {
14594
0
      Shuffles[CurSize] = DAG.getUNDEF(VT);
14595
0
      CurSize++;
14596
0
    }
14597
1.46k
    for (unsigned In = 0, Len = CurSize / 2; 
In < Len1.46k
;
++In733
) {
14598
733
      int Left = 2 * In;
14599
733
      int Right = 2 * In + 1;
14600
733
      SmallVector<int, 8> Mask(NumElems, -1);
14601
11.2k
      for (unsigned i = 0; 
i != NumElems11.2k
;
++i10.5k
) {
14602
10.5k
        if (
VectorMask[i] == Left10.5k
) {
14603
3.06k
          Mask[i] = i;
14604
3.06k
          VectorMask[i] = In;
14605
10.5k
        } else 
if (7.45k
VectorMask[i] == Right7.45k
) {
14606
7.33k
          Mask[i] = i + NumElems;
14607
7.33k
          VectorMask[i] = In;
14608
7.33k
        }
14609
10.5k
      }
14610
733
14611
733
      Shuffles[In] =
14612
733
          DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
14613
733
    }
14614
731
  }
14615
364k
  return Shuffles[0];
14616
364k
}
14617
14618
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
14619
// operations which can be matched to a truncate or to a shuffle-truncate.
14620
69.4k
SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) {
14621
69.4k
  // TODO: Add support for big-endian.
14622
69.4k
  if (DAG.getDataLayout().isBigEndian())
14623
0
    return SDValue();
14624
69.4k
  
if (69.4k
N->getNumOperands() < 269.4k
)
14625
983
    return SDValue();
14626
68.4k
  SDLoc DL(N);
14627
68.4k
  EVT VT = N->getValueType(0);
14628
68.4k
  unsigned NumElems = N->getNumOperands();
14629
68.4k
14630
68.4k
  if (!isTypeLegal(VT))
14631
0
    return SDValue();
14632
68.4k
14633
68.4k
  // If the input is something other than an EXTRACT_VECTOR_ELT with a constant
14634
68.4k
  // index, bail out.
14635
68.4k
  // TODO: Allow undef elements in some cases?
14636
68.4k
  
if (68.4k
llvm::any_of(N->ops(), [VT](SDValue Op) 68.4k
{
14637
99.6k
        return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14638
33.4k
               !isa<ConstantSDNode>(Op.getOperand(1)) ||
14639
33.4k
               Op.getValueType() != VT.getVectorElementType();
14640
99.6k
      }))
14641
66.6k
    return SDValue();
14642
1.81k
14643
1.81k
  // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index
14644
1.81k
  
auto GetExtractIdx = [](SDValue Extract) 1.81k
{
14645
6.48k
    return cast<ConstantSDNode>(Extract.getOperand(1))->getSExtValue();
14646
6.48k
  };
14647
1.81k
14648
1.81k
  // The offset is defined to be the BUILD_VECTOR's first operand (assuming no
14649
1.81k
  // undef and little-endian).
14650
1.81k
  int Offset = GetExtractIdx(N->getOperand(0));
14651
1.81k
14652
1.81k
  // Compute the stride from the next operand.
14653
1.81k
  int Stride = GetExtractIdx(N->getOperand(1)) - Offset;
14654
1.81k
  SDValue ExtractedFromVec = N->getOperand(0).getOperand(0);
14655
1.81k
14656
1.81k
  // Proceed only if the stride and the types can be matched to a truncate.
14657
1.81k
  if (
(Stride == 1 || 1.81k
!isPowerOf2_32(Stride)1.35k
) ||
14658
937
      (ExtractedFromVec.getValueType().getVectorNumElements() !=
14659
937
       Stride * NumElems) ||
14660
504
      (VT.getScalarSizeInBits() * Stride > 64))
14661
1.32k
    return SDValue();
14662
482
14663
482
  // Check remaining operands are consistent with the computed stride.
14664
3.33k
  
for (unsigned i = 1; 482
i != NumElems3.33k
;
++i2.85k
) {
14665
2.87k
    SDValue Op = N->getOperand(i);
14666
2.87k
14667
2.87k
    if ((Op.getOperand(0) != ExtractedFromVec) ||
14668
2.86k
        (GetExtractIdx(Op) != Stride * i + Offset))
14669
18
      return SDValue();
14670
2.87k
  }
14671
482
14672
464
  SDValue Res = ExtractedFromVec;
14673
464
  EVT TruncVT =
14674
464
      VT.isFloatingPoint() ? 
VT.changeVectorElementTypeToInteger()8
:
VT456
;
14675
464
  if (
Offset464
) {
14676
342
    // If the first index is non-zero, need to shuffle elements of interest to
14677
342
    // lower parts of the vector's elements the truncate will act upon.
14678
342
    // TODO: Generalize to compute the permute-shuffle that will prepare any
14679
342
    // element permutation for the truncate, and let the target decide if
14680
342
    // profitable.
14681
342
    EVT ExtractedVT = ExtractedFromVec.getValueType();
14682
342
    SmallVector<int, 64> Mask;
14683
2.65k
    for (unsigned i = 0; 
i != NumElems2.65k
;
++i2.30k
) {
14684
2.30k
      Mask.push_back(Offset + i * Stride);
14685
2.30k
      // Pad the elements that will be lost after the truncate with undefs.
14686
2.30k
      Mask.append(Stride - 1, -1);
14687
2.30k
    }
14688
342
    if (!TLI.isShuffleMaskLegal(Mask, ExtractedVT) ||
14689
290
        !TLI.isDesirableToCombineBuildVectorToShuffleTruncate(Mask, ExtractedVT,
14690
290
                                                              TruncVT))
14691
150
      return SDValue();
14692
192
    Res = DAG.getVectorShuffle(ExtractedVT, SDLoc(N), Res,
14693
192
                               DAG.getUNDEF(ExtractedVT), Mask);
14694
192
  }
14695
464
  // Construct the truncate.
14696
314
  LLVMContext &Ctx = *DAG.getContext();
14697
314
  EVT NewVT = VT.getVectorVT(
14698
314
      Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems);
14699
314
14700
314
  Res = DAG.getBitcast(NewVT, Res);
14701
314
  Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res);
14702
314
  return DAG.getBitcast(VT, Res);
14703
69.4k
}
14704
14705
366k
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
14706
366k
  EVT VT = N->getValueType(0);
14707
366k
14708
366k
  // A vector built entirely of undefs is undef.
14709
366k
  if (ISD::allOperandsUndef(N))
14710
73
    return DAG.getUNDEF(VT);
14711
366k
14712
366k
  // Check if we can express BUILD VECTOR via subvector extract.
14713
366k
  
if (366k
!LegalTypes && 366k
(N->getNumOperands() > 1)138k
) {
14714
136k
    SDValue Op0 = N->getOperand(0);
14715
275k
    auto checkElem = [&](SDValue Op) -> uint64_t {
14716
275k
      if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
14717
6.53k
          (Op0.getOperand(0) == Op.getOperand(0)))
14718
6.44k
        
if (auto 6.44k
CNode6.44k
= dyn_cast<ConstantSDNode>(Op.getOperand(1)))
14719
6.29k
          return CNode->getZExtValue();
14720
269k
      return -1;
14721
269k
    };
14722
136k
14723
136k
    int Offset = checkElem(Op0);
14724
139k
    for (unsigned i = 0; 
i < N->getNumOperands()139k
;
++i3.06k
) {
14725
139k
      if (
Offset + i != checkElem(N->getOperand(i))139k
) {
14726
136k
        Offset = -1;
14727
136k
        break;
14728
136k
      }
14729
139k
    }
14730
136k
14731
136k
    if ((Offset == 0) &&
14732
103
        (Op0.getOperand(0).getValueType() == N->getValueType(0)))
14733
90
      return Op0.getOperand(0);
14734
136k
    
if (136k
(Offset != -1) &&
14735
37
        ((Offset % N->getValueType(0).getVectorNumElements()) ==
14736
37
         0)) // IDX must be multiple of output size.
14737
33
      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
14738
33
                         Op0.getOperand(0), Op0.getOperand(1));
14739
366k
  }
14740
366k
14741
366k
  
if (SDValue 366k
V366k
= reduceBuildVecExtToExtBuildVec(N))
14742
979
    return V;
14743
365k
14744
365k
  
if (SDValue 365k
V365k
= reduceBuildVecConvertToConvertBuildVec(N))
14745
216
    return V;
14746
365k
14747
365k
  
if (365k
TLI.isDesirableToCombineBuildVectorToTruncate()365k
)
14748
69.4k
    
if (SDValue 69.4k
V69.4k
= reduceBuildVecToTrunc(N))
14749
314
      return V;
14750
364k
14751
364k
  
if (SDValue 364k
V364k
= reduceBuildVecToShuffle(N))
14752
5.11k
    return V;
14753
359k
14754
359k
  return SDValue();
14755
359k
}
14756
14757
27.9k
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
14758
27.9k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14759
27.9k
  EVT OpVT = N->getOperand(0).getValueType();
14760
27.9k
14761
27.9k
  // If the operands are legal vectors, leave them alone.
14762
27.9k
  if (TLI.isTypeLegal(OpVT))
14763
23.8k
    return SDValue();
14764
4.06k
14765
4.06k
  SDLoc DL(N);
14766
4.06k
  EVT VT = N->getValueType(0);
14767
4.06k
  SmallVector<SDValue, 8> Ops;
14768
4.06k
14769
4.06k
  EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
14770
4.06k
  SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
14771
4.06k
14772
4.06k
  // Keep track of what we encounter.
14773
4.06k
  bool AnyInteger = false;
14774
4.06k
  bool AnyFP = false;
14775
4.09k
  for (const SDValue &Op : N->ops()) {
14776
4.09k
    if (ISD::BITCAST == Op.getOpcode() &&
14777
97
        !Op.getOperand(0).getValueType().isVector())
14778
32
      Ops.push_back(Op.getOperand(0));
14779
4.06k
    else 
if (4.06k
ISD::UNDEF == Op.getOpcode()4.06k
)
14780
13
      Ops.push_back(ScalarUndef);
14781
4.06k
    else
14782
4.04k
      return SDValue();
14783
45
14784
45
    // Note whether we encounter an integer or floating point scalar.
14785
45
    // If it's neither, bail out, it could be something weird like x86mmx.
14786
45
    EVT LastOpVT = Ops.back().getValueType();
14787
45
    if (LastOpVT.isFloatingPoint())
14788
9
      AnyFP = true;
14789
36
    else 
if (36
LastOpVT.isInteger()36
)
14790
33
      AnyInteger = true;
14791
36
    else
14792
3
      return SDValue();
14793
9
  }
14794
9
14795
9
  // If any of the operands is a floating point scalar bitcast to a vector,
14796
9
  // use floating point types throughout, and bitcast everything.
14797
9
  // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
14798
9
  
if (9
AnyFP9
) {
14799
3
    SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
14800
3
    ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
14801
3
    if (
AnyInteger3
) {
14802
8
      for (SDValue &Op : Ops) {
14803
8
        if (Op.getValueType() == SVT)
14804
5
          continue;
14805
3
        
if (3
Op.isUndef()3
)
14806
1
          Op = ScalarUndef;
14807
3
        else
14808
2
          Op = DAG.getBitcast(SVT, Op);
14809
8
      }
14810
2
    }
14811
3
  }
14812
27.9k
14813
27.9k
  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
14814
27.9k
                               VT.getSizeInBits() / SVT.getSizeInBits());
14815
27.9k
  return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
14816
27.9k
}
14817
14818
// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
14819
// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
14820
// most two distinct vectors the same size as the result, attempt to turn this
14821
// into a legal shuffle.
14822
10.3k
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
14823
10.3k
  EVT VT = N->getValueType(0);
14824
10.3k
  EVT OpVT = N->getOperand(0).getValueType();
14825
10.3k
  int NumElts = VT.getVectorNumElements();
14826
10.3k
  int NumOpElts = OpVT.getVectorNumElements();
14827
10.3k
14828
10.3k
  SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
14829
10.3k
  SmallVector<int, 8> Mask;
14830
10.3k
14831
12.1k
  for (SDValue Op : N->ops()) {
14832
12.1k
    // Peek through any bitcast.
14833
12.1k
    Op = peekThroughBitcast(Op);
14834
12.1k
14835
12.1k
    // UNDEF nodes convert to UNDEF shuffle mask values.
14836
12.1k
    if (
Op.isUndef()12.1k
) {
14837
1.71k
      Mask.append((unsigned)NumOpElts, -1);
14838
1.71k
      continue;
14839
1.71k
    }
14840
10.4k
14841
10.4k
    
if (10.4k
Op.getOpcode() != ISD::EXTRACT_SUBVECTOR10.4k
)
14842
9.05k
      return SDValue();
14843
1.36k
14844
1.36k
    // What vector are we extracting the subvector from and at what index?
14845
1.36k
    SDValue ExtVec = Op.getOperand(0);
14846
1.36k
14847
1.36k
    // We want the EVT of the original extraction to correctly scale the
14848
1.36k
    // extraction index.
14849
1.36k
    EVT ExtVT = ExtVec.getValueType();
14850
1.36k
14851
1.36k
    // Peek through any bitcast.
14852
1.36k
    ExtVec = peekThroughBitcast(ExtVec);
14853
1.36k
14854
1.36k
    // UNDEF nodes convert to UNDEF shuffle mask values.
14855
1.36k
    if (
ExtVec.isUndef()1.36k
) {
14856
0
      Mask.append((unsigned)NumOpElts, -1);
14857
0
      continue;
14858
0
    }
14859
1.36k
14860
1.36k
    
if (1.36k
!isa<ConstantSDNode>(Op.getOperand(1))1.36k
)
14861
0
      return SDValue();
14862
1.36k
    int ExtIdx = Op.getConstantOperandVal(1);
14863
1.36k
14864
1.36k
    // Ensure that we are extracting a subvector from a vector the same
14865
1.36k
    // size as the result.
14866
1.36k
    if (ExtVT.getSizeInBits() != VT.getSizeInBits())
14867
429
      return SDValue();
14868
934
14869
934
    // Scale the subvector index to account for any bitcast.
14870
934
    int NumExtElts = ExtVT.getVectorNumElements();
14871
934
    if (0 == (NumExtElts % NumElts))
14872
916
      ExtIdx /= (NumExtElts / NumElts);
14873
18
    else 
if (18
0 == (NumElts % NumExtElts)18
)
14874
18
      ExtIdx *= (NumElts / NumExtElts);
14875
18
    else
14876
0
      return SDValue();
14877
934
14878
934
    // At most we can reference 2 inputs in the final shuffle.
14879
934
    
if (934
SV0.isUndef() || 934
SV0 == ExtVec46
) {
14880
904
      SV0 = ExtVec;
14881
4.24k
      for (int i = 0; 
i != NumOpElts4.24k
;
++i3.34k
)
14882
3.34k
        Mask.push_back(i + ExtIdx);
14883
934
    } else 
if (30
SV1.isUndef() || 30
SV1 == ExtVec0
) {
14884
30
      SV1 = ExtVec;
14885
161
      for (int i = 0; 
i != NumOpElts161
;
++i131
)
14886
131
        Mask.push_back(i + ExtIdx + NumElts);
14887
0
    } else {
14888
0
      return SDValue();
14889
0
    }
14890
874
  }
14891
874
14892
874
  
if (874
!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)874
)
14893
5
    return SDValue();
14894
869
14895
869
  return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
14896
869
                              DAG.getBitcast(VT, SV1), Mask);
14897
869
}
14898
14899
31.3k
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
14900
31.3k
  // If we only have one input vector, we don't need to do any concatenation.
14901
31.3k
  if (N->getNumOperands() == 1)
14902
0
    return N->getOperand(0);
14903
31.3k
14904
31.3k
  // Check if all of the operands are undefs.
14905
31.3k
  EVT VT = N->getValueType(0);
14906
31.3k
  if (ISD::allOperandsUndef(N))
14907
2
    return DAG.getUNDEF(VT);
14908
31.3k
14909
31.3k
  // Optimize concat_vectors where all but the first of the vectors are undef.
14910
31.3k
  
if (31.3k
std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) 31.3k
{
14911
38.3k
        return Op.isUndef();
14912
31.3k
      })) {
14913
9.83k
    SDValue In = N->getOperand(0);
14914
9.83k
    assert(In.getValueType().isVector() && "Must concat vectors");
14915
9.83k
14916
9.83k
    // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
14917
9.83k
    if (In->getOpcode() == ISD::BITCAST &&
14918
9.83k
        
!In->getOperand(0)->getValueType(0).isVector()861
) {
14919
119
      SDValue Scalar = In->getOperand(0);
14920
119
14921
119
      // If the bitcast type isn't legal, it might be a trunc of a legal type;
14922
119
      // look through the trunc so we can still do the transform:
14923
119
      //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
14924
119
      if (Scalar->getOpcode() == ISD::TRUNCATE &&
14925
2
          !TLI.isTypeLegal(Scalar.getValueType()) &&
14926
2
          TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
14927
2
        Scalar = Scalar->getOperand(0);
14928
119
14929
119
      EVT SclTy = Scalar->getValueType(0);
14930
119
14931
119
      if (
!SclTy.isFloatingPoint() && 119
!SclTy.isInteger()74
)
14932
0
        return SDValue();
14933
119
14934
119
      unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
14935
119
      if (VNTNumElms < 2)
14936
1
        return SDValue();
14937
118
14938
118
      EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
14939
118
      if (
!TLI.isTypeLegal(NVT) || 118
!TLI.isTypeLegal(Scalar.getValueType())116
)
14940
3
        return SDValue();
14941
115
14942
115
      SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
14943
115
      return DAG.getBitcast(VT, Res);
14944
115
    }
14945
9.83k
  }
14946
31.2k
14947
31.2k
  // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
14948
31.2k
  // We have already tested above for an UNDEF only concatenation.
14949
31.2k
  // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
14950
31.2k
  // -> (BUILD_VECTOR A, B, ..., C, D, ...)
14951
31.2k
  
auto IsBuildVectorOrUndef = [](const SDValue &Op) 31.2k
{
14952
34.5k
    return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
14953
36.8k
  };
14954
31.2k
  if (
llvm::all_of(N->ops(), IsBuildVectorOrUndef)31.2k
) {
14955
3.29k
    SmallVector<SDValue, 8> Opnds;
14956
3.29k
    EVT SVT = VT.getScalarType();
14957
3.29k
14958
3.29k
    EVT MinVT = SVT;
14959
3.29k
    if (
!SVT.isFloatingPoint()3.29k
) {
14960
2.97k
      // If BUILD_VECTOR are from built from integer, they may have different
14961
2.97k
      // operand types. Get the smallest type and truncate all operands to it.
14962
2.97k
      bool FoundMinVT = false;
14963
2.97k
      for (const SDValue &Op : N->ops())
14964
7.60k
        
if (7.60k
ISD::BUILD_VECTOR == Op.getOpcode()7.60k
) {
14965
5.68k
          EVT OpSVT = Op.getOperand(0)->getValueType(0);
14966
5.68k
          MinVT = (!FoundMinVT || 
OpSVT.bitsLE(MinVT)2.70k
) ?
OpSVT5.68k
:
MinVT0
;
14967
7.60k
          FoundMinVT = true;
14968
7.60k
        }
14969
2.97k
      assert(FoundMinVT && "Concat vector type mismatch");
14970
2.97k
    }
14971
3.29k
14972
8.42k
    for (const SDValue &Op : N->ops()) {
14973
8.42k
      EVT OpVT = Op.getValueType();
14974
8.42k
      unsigned NumElts = OpVT.getVectorNumElements();
14975
8.42k
14976
8.42k
      if (ISD::UNDEF == Op.getOpcode())
14977
2.20k
        Opnds.append(NumElts, DAG.getUNDEF(MinVT));
14978
8.42k
14979
8.42k
      if (
ISD::BUILD_VECTOR == Op.getOpcode()8.42k
) {
14980
6.22k
        if (
SVT.isFloatingPoint()6.22k
) {
14981
538
          assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
14982
538
          Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
14983
6.22k
        } else {
14984
24.2k
          for (unsigned i = 0; 
i != NumElts24.2k
;
++i18.5k
)
14985
18.5k
            Opnds.push_back(
14986
18.5k
                DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
14987
5.68k
        }
14988
6.22k
      }
14989
8.42k
    }
14990
3.29k
14991
3.29k
    assert(VT.getVectorNumElements() == Opnds.size() &&
14992
3.29k
           "Concat vector type mismatch");
14993
3.29k
    return DAG.getBuildVector(VT, SDLoc(N), Opnds);
14994
3.29k
  }
14995
27.9k
14996
27.9k
  // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
14997
27.9k
  
if (SDValue 27.9k
V27.9k
= combineConcatVectorOfScalars(N, DAG))
14998
9
    return V;
14999
27.9k
15000
27.9k
  // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
15001
27.9k
  
if (27.9k
Level < AfterLegalizeVectorOps && 27.9k
TLI.isTypeLegal(VT)22.4k
)
15002
10.3k
    
if (SDValue 10.3k
V10.3k
= combineConcatVectorOfExtracts(N, DAG))
15003
869
      return V;
15004
27.0k
15005
27.0k
  // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
15006
27.0k
  // nodes often generate nop CONCAT_VECTOR nodes.
15007
27.0k
  // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
15008
27.0k
  // place the incoming vectors at the exact same location.
15009
27.0k
  SDValue SingleSource = SDValue();
15010
27.0k
  unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
15011
27.0k
15012
27.2k
  for (unsigned i = 0, e = N->getNumOperands(); 
i != e27.2k
;
++i214
) {
15013
27.2k
    SDValue Op = N->getOperand(i);
15014
27.2k
15015
27.2k
    if (Op.isUndef())
15016
105
      continue;
15017
27.1k
15018
27.1k
    // Check if this is the identity extract:
15019
27.1k
    
if (27.1k
Op.getOpcode() != ISD::EXTRACT_SUBVECTOR27.1k
)
15020
26.4k
      return SDValue();
15021
695
15022
695
    // Find the single incoming vector for the extract_subvector.
15023
695
    
if (695
SingleSource.getNode()695
) {
15024
76
      if (Op.getOperand(0) != SingleSource)
15025
69
        return SDValue();
15026
619
    } else {
15027
619
      SingleSource = Op.getOperand(0);
15028
619
15029
619
      // Check the source type is the same as the type of the result.
15030
619
      // If not, this concat may extend the vector, so we can not
15031
619
      // optimize it away.
15032
619
      if (SingleSource.getValueType() != N->getValueType(0))
15033
516
        return SDValue();
15034
110
    }
15035
110
15036
110
    unsigned IdentityIndex = i * PartNumElem;
15037
110
    ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
15038
110
    // The extract index must be constant.
15039
110
    if (!CS)
15040
0
      return SDValue();
15041
110
15042
110
    // Check that we are reading from the identity index.
15043
110
    
if (110
CS->getZExtValue() != IdentityIndex110
)
15044
1
      return SDValue();
15045
27.2k
  }
15046
27.0k
15047
11
  
if (11
SingleSource.getNode()11
)
15048
11
    return SingleSource;
15049
0
15050
0
  return SDValue();
15051
0
}
15052
15053
/// If we are extracting a subvector produced by a wide binary operator with at
15054
/// at least one operand that was the result of a vector concatenation, then try
15055
/// to use the narrow vector operands directly to avoid the concatenation and
15056
/// extraction.
15057
244k
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
15058
244k
  // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
15059
244k
  // some of these bailouts with other transforms.
15060
244k
15061
244k
  // The extract index must be a constant, so we can map it to a concat operand.
15062
244k
  auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
15063
244k
  if (!ExtractIndex)
15064
0
    return SDValue();
15065
244k
15066
244k
  // Only handle the case where we are doubling and then halving. A larger ratio
15067
244k
  // may require more than two narrow binops to replace the wide binop.
15068
244k
  EVT VT = Extract->getValueType(0);
15069
244k
  unsigned NumElems = VT.getVectorNumElements();
15070
244k
  assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
15071
244k
         "Extract index is not a multiple of the vector length.");
15072
244k
  if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
15073
27.0k
    return SDValue();
15074
217k
15075
217k
  // We are looking for an optionally bitcasted wide vector binary operator
15076
217k
  // feeding an extract subvector.
15077
217k
  SDValue BinOp = peekThroughBitcast(Extract->getOperand(0));
15078
217k
15079
217k
  // TODO: The motivating case for this transform is an x86 AVX1 target. That
15080
217k
  // target has temptingly almost legal versions of bitwise logic ops in 256-bit
15081
217k
  // flavors, but no other 256-bit integer support. This could be extended to
15082
217k
  // handle any binop, but that may require fixing/adding other folds to avoid
15083
217k
  // codegen regressions.
15084
217k
  unsigned BOpcode = BinOp.getOpcode();
15085
217k
  if (
BOpcode != ISD::AND && 217k
BOpcode != ISD::OR216k
&&
BOpcode != ISD::XOR214k
)
15086
214k
    return SDValue();
15087
3.28k
15088
3.28k
  // The binop must be a vector type, so we can chop it in half.
15089
3.28k
  EVT WideBVT = BinOp.getValueType();
15090
3.28k
  if (!WideBVT.isVector())
15091
0
    return SDValue();
15092
3.28k
15093
3.28k
  // Bail out if the target does not support a narrower version of the binop.
15094
3.28k
  EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
15095
3.28k
                                   WideBVT.getVectorNumElements() / 2);
15096
3.28k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15097
3.28k
  if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
15098
12
    return SDValue();
15099
3.27k
15100
3.27k
  // Peek through bitcasts of the binary operator operands if needed.
15101
3.27k
  SDValue LHS = peekThroughBitcast(BinOp.getOperand(0));
15102
3.27k
  SDValue RHS = peekThroughBitcast(BinOp.getOperand(1));
15103
3.27k
15104
3.27k
  // We need at least one concatenation operation of a binop operand to make
15105
3.27k
  // this transform worthwhile. The concat must double the input vector sizes.
15106
3.27k
  // TODO: Should we also handle INSERT_SUBVECTOR patterns?
15107
3.27k
  bool ConcatL =
15108
83
      LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
15109
3.27k
  bool ConcatR =
15110
162
      RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
15111
3.27k
  if (
!ConcatL && 3.27k
!ConcatR3.19k
)
15112
3.07k
    return SDValue();
15113
199
15114
199
  // If one of the binop operands was not the result of a concat, we must
15115
199
  // extract a half-sized operand for our new narrow binop. We can't just reuse
15116
199
  // the original extract index operand because we may have bitcasted.
15117
199
  unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
15118
199
  unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
15119
199
  EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
15120
199
  SDLoc DL(Extract);
15121
199
15122
199
  // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
15123
199
  // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
15124
199
  // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
15125
83
  SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
15126
116
                      : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
15127
116
                                    BinOp.getOperand(0),
15128
116
                                    DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
15129
199
15130
162
  SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
15131
37
                      : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
15132
37
                                    BinOp.getOperand(1),
15133
37
                                    DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
15134
244k
15135
244k
  SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
15136
244k
  return DAG.getBitcast(VT, NarrowBinOp);
15137
244k
}
15138
15139
/// If we are extracting a subvector from a wide vector load, convert to a
15140
/// narrow load to eliminate the extraction:
15141
/// (extract_subvector (load wide vector)) --> (load narrow vector)
15142
244k
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
15143
244k
  // TODO: Add support for big-endian. The offset calculation must be adjusted.
15144
244k
  if (DAG.getDataLayout().isBigEndian())
15145
936
    return SDValue();
15146
243k
15147
243k
  // TODO: The one-use check is overly conservative. Check the cost of the
15148
243k
  // extract instead or remove that condition entirely.
15149
243k
  auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
15150
243k
  auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
15151
243k
  if (
!Ld || 243k
!Ld->hasOneUse()12.7k
||
Ld->getExtensionType()141
||
Ld->isVolatile()130
||
15152
130
      !ExtIdx)
15153
243k
    return SDValue();
15154
130
15155
130
  // The narrow load will be offset from the base address of the old load if
15156
130
  // we are extracting from something besides index 0 (little-endian).
15157
130
  EVT VT = Extract->getValueType(0);
15158
130
  SDLoc DL(Extract);
15159
130
  SDValue BaseAddr = Ld->getOperand(1);
15160
130
  unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
15161
130
15162
130
  // TODO: Use "BaseIndexOffset" to make this more effective.
15163
130
  SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
15164
130
  MachineFunction &MF = DAG.getMachineFunction();
15165
130
  MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
15166
130
                                                   VT.getStoreSize());
15167
130
  SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
15168
130
  DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
15169
130
  return NewLd;
15170
130
}
15171
15172
248k
SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
15173
248k
  EVT NVT = N->getValueType(0);
15174
248k
  SDValue V = N->getOperand(0);
15175
248k
15176
248k
  // Extract from UNDEF is UNDEF.
15177
248k
  if (V.isUndef())
15178
2
    return DAG.getUNDEF(NVT);
15179
248k
15180
248k
  
if (248k
TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT)248k
)
15181
244k
    
if (SDValue 244k
NarrowLoad244k
= narrowExtractedVectorLoad(N, DAG))
15182
130
      return NarrowLoad;
15183
248k
15184
248k
  // Combine:
15185
248k
  //    (extract_subvec (concat V1, V2, ...), i)
15186
248k
  // Into:
15187
248k
  //    Vi if possible
15188
248k
  // Only operand 0 is checked as 'concat' assumes all inputs of the same
15189
248k
  // type.
15190
248k
  
if (248k
V->getOpcode() == ISD::CONCAT_VECTORS &&
15191
1.03k
      isa<ConstantSDNode>(N->getOperand(1)) &&
15192
248k
      
V->getOperand(0).getValueType() == NVT1.03k
) {
15193
670
    unsigned Idx = N->getConstantOperandVal(1);
15194
670
    unsigned NumElems = NVT.getVectorNumElements();
15195
670
    assert((Idx % NumElems) == 0 &&
15196
670
           "IDX in concat is not a multiple of the result vector length.");
15197
670
    return V->getOperand(Idx / NumElems);
15198
670
  }
15199
247k
15200
247k
  // Skip bitcasting
15201
247k
  V = peekThroughBitcast(V);
15202
247k
15203
247k
  // If the input is a build vector. Try to make a smaller build vector.
15204
247k
  if (
V->getOpcode() == ISD::BUILD_VECTOR247k
) {
15205
2.96k
    if (auto *
Idx2.96k
= dyn_cast<ConstantSDNode>(N->getOperand(1))) {
15206
2.96k
      EVT InVT = V->getValueType(0);
15207
2.96k
      unsigned ExtractSize = NVT.getSizeInBits();
15208
2.96k
      unsigned EltSize = InVT.getScalarSizeInBits();
15209
2.96k
      // Only do this if we won't split any elements.
15210
2.96k
      if (
ExtractSize % EltSize == 02.96k
) {
15211
2.96k
        unsigned NumElems = ExtractSize / EltSize;
15212
2.96k
        EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(),
15213
2.96k
                                         InVT.getVectorElementType(), NumElems);
15214
2.96k
        if ((!LegalOperations ||
15215
1.28k
             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) &&
15216
2.96k
            
(!LegalTypes || 1.80k
TLI.isTypeLegal(ExtractVT)224
)) {
15217
1.80k
          unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
15218
1.80k
                            EltSize;
15219
1.80k
15220
1.80k
          // Extract the pieces from the original build_vector.
15221
1.80k
          SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
15222
1.80k
                                            makeArrayRef(V->op_begin() + IdxVal,
15223
1.80k
                                                         NumElems));
15224
1.80k
          return DAG.getBitcast(NVT, BuildVec);
15225
1.80k
        }
15226
245k
      }
15227
2.96k
    }
15228
2.96k
  }
15229
245k
15230
245k
  
if (245k
V->getOpcode() == ISD::INSERT_SUBVECTOR245k
) {
15231
985
    // Handle only simple case where vector being inserted and vector
15232
985
    // being extracted are of same size.
15233
985
    EVT SmallVT = V->getOperand(1).getValueType();
15234
985
    if (!NVT.bitsEq(SmallVT))
15235
14
      return SDValue();
15236
971
15237
971
    // Only handle cases where both indexes are constants.
15238
971
    ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
15239
971
    ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
15240
971
15241
971
    if (
InsIdx && 971
ExtIdx971
) {
15242
971
      // Combine:
15243
971
      //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
15244
971
      // Into:
15245
971
      //    indices are equal or bit offsets are equal => V1
15246
971
      //    otherwise => (extract_subvec V1, ExtIdx)
15247
971
      if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
15248
971
          ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
15249
778
        return DAG.getBitcast(NVT, V->getOperand(1));
15250
193
      return DAG.getNode(
15251
193
          ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
15252
193
          DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
15253
193
          N->getOperand(1));
15254
193
    }
15255
985
  }
15256
244k
15257
244k
  
if (SDValue 244k
NarrowBOp244k
= narrowExtractedVectorBinOp(N, DAG))
15258
199
    return NarrowBOp;
15259
244k
15260
244k
  return SDValue();
15261
244k
}
15262
15263
static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
15264
101k
                                                 SDValue V, SelectionDAG &DAG) {
15265
101k
  SDLoc DL(V);
15266
101k
  EVT VT = V.getValueType();
15267
101k
15268
101k
  switch (V.getOpcode()) {
15269
95.6k
  default:
15270
95.6k
    return V;
15271
101k
15272
5.58k
  case ISD::CONCAT_VECTORS: {
15273
5.58k
    EVT OpVT = V->getOperand(0).getValueType();
15274
5.58k
    int OpSize = OpVT.getVectorNumElements();
15275
5.58k
    SmallBitVector OpUsedElements(OpSize, false);
15276
5.58k
    bool FoundSimplification = false;
15277
5.58k
    SmallVector<SDValue, 4> NewOps;
15278
5.58k
    NewOps.reserve(V->getNumOperands());
15279
18.8k
    for (int i = 0, NumOps = V->getNumOperands(); 
i < NumOps18.8k
;
++i13.2k
) {
15280
13.2k
      SDValue Op = V->getOperand(i);
15281
13.2k
      bool OpUsed = false;
15282
110k
      for (int j = 0; 
j < OpSize110k
;
++j97.4k
)
15283
97.4k
        
if (97.4k
UsedElements[i * OpSize + j]97.4k
) {
15284
32.9k
          OpUsedElements[j] = true;
15285
32.9k
          OpUsed = true;
15286
32.9k
        }
15287
13.2k
      NewOps.push_back(
15288
6.17k
          OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
15289
7.05k
                 : DAG.getUNDEF(OpVT));
15290
13.2k
      FoundSimplification |= Op == NewOps.back();
15291
13.2k
      OpUsedElements.reset();
15292
13.2k
    }
15293
5.58k
    if (FoundSimplification)
15294
5.58k
      V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
15295
5.58k
    return V;
15296
101k
  }
15297
101k
15298
8
  case ISD::INSERT_SUBVECTOR: {
15299
8
    SDValue BaseV = V->getOperand(0);
15300
8
    SDValue SubV = V->getOperand(1);
15301
8
    auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
15302
8
    if (!IdxN)
15303
0
      return V;
15304
8
15305
8
    int SubSize = SubV.getValueType().getVectorNumElements();
15306
8
    int Idx = IdxN->getZExtValue();
15307
8
    bool SubVectorUsed = false;
15308
8
    SmallBitVector SubUsedElements(SubSize, false);
15309
64
    for (int i = 0; 
i < SubSize64
;
++i56
)
15310
56
      
if (56
UsedElements[i + Idx]56
) {
15311
18
        SubVectorUsed = true;
15312
18
        SubUsedElements[i] = true;
15313
18
        UsedElements[i + Idx] = false;
15314
18
      }
15315
8
15316
8
    // Now recurse on both the base and sub vectors.
15317
8
    SDValue SimplifiedSubV =
15318
8
        SubVectorUsed
15319
8
            ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
15320
0
            : DAG.getUNDEF(SubV.getValueType());
15321
8
    SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
15322
8
    if (
SimplifiedSubV != SubV || 8
SimplifiedBaseV != BaseV8
)
15323
0
      V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
15324
0
                      SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
15325
8
    return V;
15326
8
  }
15327
101k
  }
15328
101k
}
15329
15330
static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
15331
47.5k
                                       SDValue N1, SelectionDAG &DAG) {
15332
47.5k
  EVT VT = SVN->getValueType(0);
15333
47.5k
  int NumElts = VT.getVectorNumElements();
15334
47.5k
  SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
15335
47.5k
  for (int M : SVN->getMask())
15336
474k
    
if (474k
M >= 0 && 474k
M < NumElts382k
)
15337
263k
      N0UsedElements[M] = true;
15338
211k
    else 
if (211k
M >= NumElts211k
)
15339
119k
      N1UsedElements[M - NumElts] = true;
15340
47.5k
15341
47.5k
  SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
15342
47.5k
  SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
15343
47.5k
  if (
S0 == N0 && 47.5k
S1 == N147.4k
)
15344
47.4k
    return SDValue();
15345
83
15346
83
  return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
15347
83
}
15348
15349
static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0,
15350
53.1k
                                   SDValue N1, SelectionDAG &DAG) {
15351
399k
  auto isUndefElt = [](SDValue V, int Idx) {
15352
399k
    // TODO - handle more cases as required.
15353
399k
    if (V.getOpcode() == ISD::BUILD_VECTOR)
15354
104k
      return V.getOperand(Idx).isUndef();
15355
294k
    return false;
15356
294k
  };
15357
53.1k
15358
53.1k
  EVT VT = SVN->getValueType(0);
15359
53.1k
  unsigned NumElts = VT.getVectorNumElements();
15360
53.1k
15361
53.1k
  bool Changed = false;
15362
53.1k
  SmallVector<int, 8> NewMask;
15363
555k
  for (unsigned i = 0; 
i != NumElts555k
;
++i502k
) {
15364
502k
    int Idx = SVN->getMaskElt(i);
15365
502k
    if (
(0 <= Idx && 502k
Idx < (int)NumElts409k
&&
isUndefElt(N0, Idx)290k
) ||
15366
502k
        
((int)NumElts < Idx && 502k
isUndefElt(N1, Idx - NumElts)108k
)) {
15367
49
      Changed = true;
15368
49
      Idx = -1;
15369
49
    }
15370
502k
    NewMask.push_back(Idx);
15371
502k
  }
15372
53.1k
  if (Changed)
15373
28
    return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask);
15374
53.1k
15375
53.1k
  return SDValue();
15376
53.1k
}
15377
15378
// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
15379
// or turn a shuffle of a single concat into simpler shuffle then concat.
15380
2.12k
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
15381
2.12k
  EVT VT = N->getValueType(0);
15382
2.12k
  unsigned NumElts = VT.getVectorNumElements();
15383
2.12k
15384
2.12k
  SDValue N0 = N->getOperand(0);
15385
2.12k
  SDValue N1 = N->getOperand(1);
15386
2.12k
  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15387
2.12k
15388
2.12k
  SmallVector<SDValue, 4> Ops;
15389
2.12k
  EVT ConcatVT = N0.getOperand(0).getValueType();
15390
2.12k
  unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
15391
2.12k
  unsigned NumConcats = NumElts / NumElemsPerConcat;
15392
2.12k
15393
2.12k
  // Special case: shuffle(concat(A,B)) can be more efficiently represented
15394
2.12k
  // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
15395
2.12k
  // half vector elements.
15396
2.12k
  if (
NumElemsPerConcat * 2 == NumElts && 2.12k
N1.isUndef()1.79k
&&
15397
588
      std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
15398
2.12k
                  SVN->getMask().end(), [](int i) 
{ return i == -1; }954
)) {
15399
42
    N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
15400
42
                              makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
15401
42
    N1 = DAG.getUNDEF(ConcatVT);
15402
42
    return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
15403
42
  }
15404
2.08k
15405
2.08k
  // Look at every vector that's inserted. We're looking for exact
15406
2.08k
  // subvector-sized copies from a concatenated vector
15407
3.18k
  
for (unsigned I = 0; 2.08k
I != NumConcats3.18k
;
++I1.10k
) {
15408
3.14k
    // Make sure we're dealing with a copy.
15409
3.14k
    unsigned Begin = I * NumElemsPerConcat;
15410
3.14k
    bool AllUndef = true, NoUndef = true;
15411
28.3k
    for (unsigned J = Begin; 
J != Begin + NumElemsPerConcat28.3k
;
++J25.1k
) {
15412
25.1k
      if (SVN->getMaskElt(J) >= 0)
15413
20.3k
        AllUndef = false;
15414
25.1k
      else
15415
4.79k
        NoUndef = false;
15416
25.1k
    }
15417
3.14k
15418
3.14k
    if (
NoUndef3.14k
) {
15419
2.09k
      if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
15420
188
        return SDValue();
15421
1.90k
15422
10.2k
      
for (unsigned J = 1; 1.90k
J != NumElemsPerConcat10.2k
;
++J8.35k
)
15423
9.21k
        
if (9.21k
SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)9.21k
)
15424
862
          return SDValue();
15425
1.90k
15426
1.04k
      unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
15427
1.04k
      if (FirstElt < N0.getNumOperands())
15428
1.02k
        Ops.push_back(N0.getOperand(FirstElt));
15429
1.04k
      else
15430
20
        Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
15431
2.09k
15432
3.14k
    } else 
if (1.05k
AllUndef1.05k
) {
15433
56
      Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
15434
1.05k
    } else { // Mixed with general masks and undefs, can't do optimization.
15435
995
      return SDValue();
15436
995
    }
15437
3.14k
  }
15438
2.08k
15439
36
  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
15440
2.12k
}
15441
15442
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
15443
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
15444
//
15445
// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
15446
// a simplification in some sense, but it isn't appropriate in general: some
15447
// BUILD_VECTORs are substantially cheaper than others. The general case
15448
// of a BUILD_VECTOR requires inserting each element individually (or
15449
// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
15450
// all constants is a single constant pool load.  A BUILD_VECTOR where each
15451
// element is identical is a splat.  A BUILD_VECTOR where most of the operands
15452
// are undef lowers to a small number of element insertions.
15453
//
15454
// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
15455
// We don't fold shuffles where one side is a non-zero constant, and we don't
15456
// fold shuffles if the resulting BUILD_VECTOR would have duplicate
15457
// non-constant operands. This seems to work out reasonably well in practice.
15458
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
15459
                                       SelectionDAG &DAG,
15460
32.5k
                                       const TargetLowering &TLI) {
15461
32.5k
  EVT VT = SVN->getValueType(0);
15462
32.5k
  unsigned NumElts = VT.getVectorNumElements();
15463
32.5k
  SDValue N0 = SVN->getOperand(0);
15464
32.5k
  SDValue N1 = SVN->getOperand(1);
15465
32.5k
15466
32.5k
  if (
!N0->hasOneUse() || 32.5k
!N1->hasOneUse()26.7k
)
15467
14.3k
    return SDValue();
15468
18.1k
  // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
15469
18.1k
  // discussed above.
15470
18.1k
  
if (18.1k
!N1.isUndef()18.1k
) {
15471
9.30k
    bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
15472
9.30k
    bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
15473
9.30k
    if (
N0AnyConst && 9.30k
!N1AnyConst496
&&
!ISD::isBuildVectorAllZeros(N0.getNode())477
)
15474
64
      return SDValue();
15475
9.24k
    
if (9.24k
!N0AnyConst && 9.24k
N1AnyConst8.80k
&&
!ISD::isBuildVectorAllZeros(N1.getNode())2.02k
)
15476
51
      return SDValue();
15477
18.0k
  }
15478
18.0k
15479
18.0k
  SmallVector<SDValue, 8> Ops;
15480
18.0k
  SmallSet<SDValue, 16> DuplicateOps;
15481
23.6k
  for (int M : SVN->getMask()) {
15482
23.6k
    SDValue Op = DAG.getUNDEF(VT.getScalarType());
15483
23.6k
    if (
M >= 023.6k
) {
15484
20.9k
      int Idx = M < (int)NumElts ? 
M18.3k
:
M - NumElts2.63k
;
15485
20.9k
      SDValue &S = (M < (int)NumElts ? 
N018.3k
:
N12.63k
);
15486
20.9k
      if (
S.getOpcode() == ISD::BUILD_VECTOR20.9k
) {
15487
3.20k
        Op = S.getOperand(Idx);
15488
20.9k
      } else 
if (17.7k
S.getOpcode() == ISD::SCALAR_TO_VECTOR17.7k
) {
15489
152
        if (Idx == 0)
15490
152
          Op = S.getOperand(0);
15491
17.7k
      } else {
15492
17.5k
        // Operand can't be combined - bail out.
15493
17.5k
        return SDValue();
15494
17.5k
      }
15495
6.02k
    }
15496
6.02k
15497
6.02k
    // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
15498
6.02k
    // fine, but it's likely to generate low-quality code if the target can't
15499
6.02k
    // reconstruct an appropriate shuffle.
15500
6.02k
    
if (6.02k
!Op.isUndef() && 6.02k
!isa<ConstantSDNode>(Op)3.35k
&&
!isa<ConstantFPSDNode>(Op)1.56k
)
15501
1.24k
      
if (1.24k
!DuplicateOps.insert(Op).second1.24k
)
15502
97
        return SDValue();
15503
5.92k
15504
5.92k
    Ops.push_back(Op);
15505
5.92k
  }
15506
18.0k
  // BUILD_VECTOR requires all inputs to be of the same type, find the
15507
18.0k
  // maximum type and extend them all.
15508
354
  EVT SVT = VT.getScalarType();
15509
354
  if (SVT.isInteger())
15510
316
    for (SDValue &Op : Ops)
15511
3.26k
      
SVT = (SVT.bitsLT(Op.getValueType()) ? 3.26k
Op.getValueType()6
:
SVT3.25k
);
15512
354
  if (SVT != VT.getScalarType())
15513
6
    for (SDValue &Op : Ops)
15514
64
      Op = TLI.isZExtFree(Op.getValueType(), SVT)
15515
0
               ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
15516
64
               : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
15517
354
  return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
15518
32.5k
}
15519
15520
// Match shuffles that can be converted to any_vector_extend_in_reg.
15521
// This is often generated during legalization.
15522
// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
15523
// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
15524
static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
15525
                                            SelectionDAG &DAG,
15526
                                            const TargetLowering &TLI,
15527
                                            bool LegalOperations,
15528
47.4k
                                            bool LegalTypes) {
15529
47.4k
  EVT VT = SVN->getValueType(0);
15530
47.4k
  bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15531
47.4k
15532
47.4k
  // TODO Add support for big-endian when we have a test case.
15533
47.4k
  if (
!VT.isInteger() || 47.4k
IsBigEndian35.9k
)
15534
13.1k
    return SDValue();
15535
34.2k
15536
34.2k
  unsigned NumElts = VT.getVectorNumElements();
15537
34.2k
  unsigned EltSizeInBits = VT.getScalarSizeInBits();
15538
34.2k
  ArrayRef<int> Mask = SVN->getMask();
15539
34.2k
  SDValue N0 = SVN->getOperand(0);
15540
34.2k
15541
34.2k
  // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
15542
72.4k
  auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
15543
143k
    for (unsigned i = 0; 
i != NumElts143k
;
++i70.9k
) {
15544
141k
      if (Mask[i] < 0)
15545
21.3k
        continue;
15546
120k
      
if (120k
(i % Scale) == 0 && 120k
Mask[i] == (int)(i / Scale)76.3k
)
15547
49.5k
        continue;
15548
71.0k
      return false;
15549
71.0k
    }
15550
1.39k
    return true;
15551
72.4k
  };
15552
34.2k
15553
34.2k
  // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
15554
34.2k
  // power-of-2 extensions as they are the most likely.
15555
106k
  for (unsigned Scale = 2; 
Scale < NumElts106k
;
Scale *= 272.1k
) {
15556
72.4k
    if (!isAnyExtend(Scale))
15557
71.0k
      continue;
15558
1.39k
15559
1.39k
    EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
15560
1.39k
    EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
15561
1.39k
    if (
!LegalTypes || 1.39k
TLI.isTypeLegal(OutVT)1.31k
)
15562
1.38k
      
if (1.38k
!LegalOperations ||
15563
1.16k
          TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
15564
215
        return DAG.getBitcast(VT,
15565
215
                            DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
15566
72.4k
  }
15567
34.2k
15568
34.0k
  return SDValue();
15569
47.4k
}
15570
15571
// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
15572
// each source element of a large type into the lowest elements of a smaller
15573
// destination type. This is often generated during legalization.
15574
// If the source node itself was a '*_extend_vector_inreg' node then we should
15575
// then be able to remove it.
15576
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
15577
47.2k
                                        SelectionDAG &DAG) {
15578
47.2k
  EVT VT = SVN->getValueType(0);
15579
47.2k
  bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15580
47.2k
15581
47.2k
  // TODO Add support for big-endian when we have a test case.
15582
47.2k
  if (
!VT.isInteger() || 47.2k
IsBigEndian35.6k
)
15583
13.1k
    return SDValue();
15584
34.0k
15585
34.0k
  SDValue N0 = peekThroughBitcast(SVN->getOperand(0));
15586
34.0k
15587
34.0k
  unsigned Opcode = N0.getOpcode();
15588
34.0k
  if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
15589
34.0k
      Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
15590
34.0k
      Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
15591
33.9k
    return SDValue();
15592
41
15593
41
  SDValue N00 = N0.getOperand(0);
15594
41
  ArrayRef<int> Mask = SVN->getMask();
15595
41
  unsigned NumElts = VT.getVectorNumElements();
15596
41
  unsigned EltSizeInBits = VT.getScalarSizeInBits();
15597
41
  unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
15598
41
  unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
15599
41
15600
41
  if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
15601
0
    return SDValue();
15602
41
  unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
15603
41
15604
41
  // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
15605
41
  // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
15606
41
  // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
15607
21
  auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
15608
93
    for (unsigned i = 0; 
i != NumElts93
;
++i72
) {
15609
76
      if (Mask[i] < 0)
15610
38
        continue;
15611
38
      
if (38
(i * Scale) < NumElts && 38
Mask[i] == (int)(i * Scale)38
)
15612
34
        continue;
15613
4
      return false;
15614
4
    }
15615
17
    return true;
15616
21
  };
15617
41
15618
41
  // At the moment we just handle the case where we've truncated back to the
15619
41
  // same size as before the extension.
15620
41
  // TODO: handle more extension/truncation cases as cases arise.
15621
41
  if (EltSizeInBits != ExtSrcSizeInBits)
15622
20
    return SDValue();
15623
21
15624
21
  // We can remove *extend_vector_inreg only if the truncation happens at
15625
21
  // the same scale as the extension.
15626
21
  
if (21
isTruncate(ExtScale)21
)
15627
17
    return DAG.getBitcast(VT, N00);
15628
4
15629
4
  return SDValue();
15630
4
}
15631
15632
// Combine shuffles of splat-shuffles of the form:
15633
// shuffle (shuffle V, undef, splat-mask), undef, M
15634
// If splat-mask contains undef elements, we need to be careful about
15635
// introducing undef's in the folded mask which are not the result of composing
15636
// the masks of the shuffles.
15637
static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
15638
                                     ShuffleVectorSDNode *Splat,
15639
23
                                     SelectionDAG &DAG) {
15640
23
  ArrayRef<int> SplatMask = Splat->getMask();
15641
23
  assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
15642
23
15643
23
  // Prefer simplifying to the splat-shuffle, if possible. This is legal if
15644
23
  // every undef mask element in the splat-shuffle has a corresponding undef
15645
23
  // element in the user-shuffle's mask or if the composition of mask elements
15646
23
  // would result in undef.
15647
23
  // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
15648
23
  // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
15649
23
  //   In this case it is not legal to simplify to the splat-shuffle because we
15650
23
  //   may be exposing the users of the shuffle an undef element at index 1
15651
23
  //   which was not there before the combine.
15652
23
  // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
15653
23
  //   In this case the composition of masks yields SplatMask, so it's ok to
15654
23
  //   simplify to the splat-shuffle.
15655
23
  // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
15656
23
  //   In this case the composed mask includes all undef elements of SplatMask
15657
23
  //   and in addition sets element zero to undef. It is safe to simplify to
15658
23
  //   the splat-shuffle.
15659
23
  auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
15660
23
                                       ArrayRef<int> SplatMask) {
15661
174
    for (unsigned i = 0, e = UserMask.size(); 
i != e174
;
++i151
)
15662
158
      
if (158
UserMask[i] != -1 && 158
SplatMask[i] == -153
&&
15663
13
          SplatMask[UserMask[i]] != -1)
15664
7
        return false;
15665
16
    return true;
15666
23
  };
15667
23
  if (CanSimplifyToExistingSplat(UserMask, SplatMask))
15668
16
    return SDValue(Splat, 0);
15669
7
15670
7
  // Create a new shuffle with a mask that is composed of the two shuffles'
15671
7
  // masks.
15672
7
  SmallVector<int, 32> NewMask;
15673
7
  for (int Idx : UserMask)
15674
28
    
NewMask.push_back(Idx == -1 ? 28
-19
:
SplatMask[Idx]19
);
15675
23
15676
23
  return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
15677
23
                              Splat->getOperand(0), Splat->getOperand(1),
15678
23
                              NewMask);
15679
23
}
15680
15681
53.1k
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
15682
53.1k
  EVT VT = N->getValueType(0);
15683
53.1k
  unsigned NumElts = VT.getVectorNumElements();
15684
53.1k
15685
53.1k
  SDValue N0 = N->getOperand(0);
15686
53.1k
  SDValue N1 = N->getOperand(1);
15687
53.1k
15688
53.1k
  assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
15689
53.1k
15690
53.1k
  // Canonicalize shuffle undef, undef -> undef
15691
53.1k
  if (
N0.isUndef() && 53.1k
N1.isUndef()1
)
15692
0
    return DAG.getUNDEF(VT);
15693
53.1k
15694
53.1k
  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15695
53.1k
15696
53.1k
  // Canonicalize shuffle v, v -> v, undef
15697
53.1k
  if (
N0 == N153.1k
) {
15698
23
    SmallVector<int, 8> NewMask;
15699
161
    for (unsigned i = 0; 
i != NumElts161
;
++i138
) {
15700
138
      int Idx = SVN->getMaskElt(i);
15701
138
      if (
Idx >= (int)NumElts138
)
Idx -= NumElts57
;
15702
138
      NewMask.push_back(Idx);
15703
138
    }
15704
23
    return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
15705
23
  }
15706
53.1k
15707
53.1k
  // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
15708
53.1k
  
if (53.1k
N0.isUndef()53.1k
)
15709
1
    return DAG.getCommutedVectorShuffle(*SVN);
15710
53.1k
15711
53.1k
  // Remove references to rhs if it is undef
15712
53.1k
  
if (53.1k
N1.isUndef()53.1k
) {
15713
31.5k
    bool Changed = false;
15714
31.5k
    SmallVector<int, 8> NewMask;
15715
301k
    for (unsigned i = 0; 
i != NumElts301k
;
++i269k
) {
15716
269k
      int Idx = SVN->getMaskElt(i);
15717
269k
      if (
Idx >= (int)NumElts269k
) {
15718
7
        Idx = -1;
15719
7
        Changed = true;
15720
7
      }
15721
269k
      NewMask.push_back(Idx);
15722
269k
    }
15723
31.5k
    if (Changed)
15724
4
      return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
15725
53.1k
  }
15726
53.1k
15727
53.1k
  // Simplify shuffle mask if a referenced element is UNDEF.
15728
53.1k
  
if (SDValue 53.1k
V53.1k
= simplifyShuffleMask(SVN, N0, N1, DAG))
15729
28
    return V;
15730
53.1k
15731
53.1k
  // A shuffle of a single vector that is a splat can always be folded.
15732
53.1k
  
if (auto *53.1k
N0Shuf53.1k
= dyn_cast<ShuffleVectorSDNode>(N0))
15733
3.89k
    
if (3.89k
N1->isUndef() && 3.89k
N0Shuf->isSplat()913
)
15734
23
      return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
15735
53.1k
15736
53.1k
  // If it is a splat, check if the argument vector is another splat or a
15737
53.1k
  // build_vector.
15738
53.1k
  
if (53.1k
SVN->isSplat() && 53.1k
SVN->getSplatIndex() < (int)NumElts14.9k
) {
15739
14.9k
    SDNode *V = N0.getNode();
15740
14.9k
15741
14.9k
    // If this is a bit convert that changes the element type of the vector but
15742
14.9k
    // not the number of vector elements, look through it.  Be careful not to
15743
14.9k
    // look though conversions that change things like v4f32 to v2f64.
15744
14.9k
    if (
V->getOpcode() == ISD::BITCAST14.9k
) {
15745
365
      SDValue ConvInput = V->getOperand(0);
15746
365
      if (ConvInput.getValueType().isVector() &&
15747
315
          ConvInput.getValueType().getVectorNumElements() == NumElts)
15748
50
        V = ConvInput.getNode();
15749
365
    }
15750
14.9k
15751
14.9k
    if (
V->getOpcode() == ISD::BUILD_VECTOR14.9k
) {
15752
5.60k
      assert(V->getNumOperands() == NumElts &&
15753
5.60k
             "BUILD_VECTOR has wrong number of operands");
15754
5.60k
      SDValue Base;
15755
5.60k
      bool AllSame = true;
15756
5.65k
      for (unsigned i = 0; 
i != NumElts5.65k
;
++i43
) {
15757
5.65k
        if (
!V->getOperand(i).isUndef()5.65k
) {
15758
5.60k
          Base = V->getOperand(i);
15759
5.60k
          break;
15760
5.60k
        }
15761
5.65k
      }
15762
5.60k
      // Splat of <u, u, u, u>, return <u, u, u, u>
15763
5.60k
      if (!Base.getNode())
15764
0
        return N0;
15765
11.2k
      
for (unsigned i = 0; 5.60k
i != NumElts11.2k
;
++i5.60k
) {
15766
11.2k
        if (
V->getOperand(i) != Base11.2k
) {
15767
5.60k
          AllSame = false;
15768
5.60k
          break;
15769
5.60k
        }
15770
11.2k
      }
15771
5.60k
      // Splat of <x, x, x, x>, return <x, x, x, x>
15772
5.60k
      if (AllSame)
15773
5
        return N0;
15774
5.60k
15775
5.60k
      // Canonicalize any other splat as a build_vector.
15776
5.60k
      const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
15777
5.60k
      SmallVector<SDValue, 8> Ops(NumElts, Splatted);
15778
5.60k
      SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
15779
5.60k
15780
5.60k
      // We may have jumped through bitcasts, so the type of the
15781
5.60k
      // BUILD_VECTOR may not match the type of the shuffle.
15782
5.60k
      if (V->getValueType(0) != VT)
15783
0
        NewBV = DAG.getBitcast(VT, NewBV);
15784
5.60k
      return NewBV;
15785
5.60k
    }
15786
14.9k
  }
15787
47.5k
15788
47.5k
  // There are various patterns used to build up a vector from smaller vectors,
15789
47.5k
  // subvectors, or elements. Scan chains of these and replace unused insertions
15790
47.5k
  // or components with undef.
15791
47.5k
  
if (SDValue 47.5k
S47.5k
= simplifyShuffleOperands(SVN, N0, N1, DAG))
15792
83
    return S;
15793
47.4k
15794
47.4k
  // Match shuffles that can be converted to any_vector_extend_in_reg.
15795
47.4k
  
if (SDValue 47.4k
V47.4k
= combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
15796
215
    return V;
15797
47.2k
15798
47.2k
  // Combine "truncate_vector_in_reg" style shuffles.
15799
47.2k
  
if (SDValue 47.2k
V47.2k
= combineTruncationShuffle(SVN, DAG))
15800
17
    return V;
15801
47.1k
15802
47.1k
  
if (47.1k
N0.getOpcode() == ISD::CONCAT_VECTORS &&
15803
2.82k
      Level < AfterLegalizeVectorOps &&
15804
2.61k
      (N1.isUndef() ||
15805
1.70k
      (N1.getOpcode() == ISD::CONCAT_VECTORS &&
15806
47.1k
       
N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()1.21k
))) {
15807
2.12k
    if (SDValue V = partitionShuffleOfConcats(N, DAG))
15808
78
      return V;
15809
47.1k
  }
15810
47.1k
15811
47.1k
  // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
15812
47.1k
  // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
15813
47.1k
  
if (47.1k
Level < AfterLegalizeVectorOps && 47.1k
TLI.isTypeLegal(VT)34.4k
)
15814
32.5k
    
if (SDValue 32.5k
Res32.5k
= combineShuffleOfScalars(SVN, DAG, TLI))
15815
354
      return Res;
15816
46.7k
15817
46.7k
  // If this shuffle only has a single input that is a bitcasted shuffle,
15818
46.7k
  // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
15819
46.7k
  // back to their original types.
15820
46.7k
  
if (46.7k
N0.getOpcode() == ISD::BITCAST && 46.7k
N0.hasOneUse()9.74k
&&
15821
46.7k
      
N1.isUndef()9.17k
&&
Level < AfterLegalizeVectorOps4.58k
&&
15822
46.7k
      
TLI.isTypeLegal(VT)1.91k
) {
15823
1.88k
15824
1.88k
    // Peek through the bitcast only if there is one user.
15825
1.88k
    SDValue BC0 = N0;
15826
3.76k
    while (
BC0.getOpcode() == ISD::BITCAST3.76k
) {
15827
1.88k
      if (!BC0.hasOneUse())
15828
0
        break;
15829
1.88k
      BC0 = BC0.getOperand(0);
15830
1.88k
    }
15831
1.88k
15832
164
    auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
15833
164
      if (Scale == 1)
15834
82
        return SmallVector<int, 8>(Mask.begin(), Mask.end());
15835
82
15836
82
      SmallVector<int, 8> NewMask;
15837
82
      for (int M : Mask)
15838
1.29k
        
for (int s = 0; 350
s != Scale1.29k
;
++s944
)
15839
944
          
NewMask.push_back(M < 0 ? 944
-1128
:
Scale * M + s816
);
15840
164
      return NewMask;
15841
164
    };
15842
1.88k
15843
1.88k
    if (
BC0.getOpcode() == ISD::VECTOR_SHUFFLE && 1.88k
BC0.hasOneUse()86
) {
15844
82
      EVT SVT = VT.getScalarType();
15845
82
      EVT InnerVT = BC0->getValueType(0);
15846
82
      EVT InnerSVT = InnerVT.getScalarType();
15847
82
15848
82
      // Determine which shuffle works with the smaller scalar type.
15849
82
      EVT ScaleVT = SVT.bitsLT(InnerSVT) ? 
VT57
:
InnerVT25
;
15850
82
      EVT ScaleSVT = ScaleVT.getScalarType();
15851
82
15852
82
      if (TLI.isTypeLegal(ScaleVT) &&
15853
82
          0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
15854
82
          
0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())82
) {
15855
82
        int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
15856
82
        int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
15857
82
15858
82
        // Scale the shuffle masks to the smaller scalar type.
15859
82
        ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
15860
82
        SmallVector<int, 8> InnerMask =
15861
82
            ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
15862
82
        SmallVector<int, 8> OuterMask =
15863
82
            ScaleShuffleMask(SVN->getMask(), OuterScale);
15864
82
15865
82
        // Merge the shuffle masks.
15866
82
        SmallVector<int, 8> NewMask;
15867
82
        for (int M : OuterMask)
15868
944
          
NewMask.push_back(M < 0 ? 944
-1170
:
InnerMask[M]774
);
15869
82
15870
82
        // Test for shuffle mask legality over both commutations.
15871
82
        SDValue SV0 = BC0->getOperand(0);
15872
82
        SDValue SV1 = BC0->getOperand(1);
15873
82
        bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
15874
82
        if (
!LegalMask82
) {
15875
2
          std::swap(SV0, SV1);
15876
2
          ShuffleVectorSDNode::commuteMask(NewMask);
15877
2
          LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
15878
2
        }
15879
82
15880
82
        if (
LegalMask82
) {
15881
80
          SV0 = DAG.getBitcast(ScaleVT, SV0);
15882
80
          SV1 = DAG.getBitcast(ScaleVT, SV1);
15883
80
          return DAG.getBitcast(
15884
80
              VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
15885
80
        }
15886
46.6k
      }
15887
82
    }
15888
1.88k
  }
15889
46.6k
15890
46.6k
  // Canonicalize shuffles according to rules:
15891
46.6k
  //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
15892
46.6k
  //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
15893
46.6k
  //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
15894
46.6k
  
if (46.6k
N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
15895
46.6k
      
N0.getOpcode() != ISD::VECTOR_SHUFFLE1.68k
&&
Level < AfterLegalizeDAG782
&&
15896
46.6k
      
TLI.isTypeLegal(VT)766
) {
15897
765
    // The incoming shuffle must be of the same type as the result of the
15898
765
    // current shuffle.
15899
765
    assert(N1->getOperand(0).getValueType() == VT &&
15900
765
           "Shuffle types don't match");
15901
765
15902
765
    SDValue SV0 = N1->getOperand(0);
15903
765
    SDValue SV1 = N1->getOperand(1);
15904
765
    bool HasSameOp0 = N0 == SV0;
15905
765
    bool IsSV1Undef = SV1.isUndef();
15906
765
    if (
HasSameOp0 || 765
IsSV1Undef715
||
N0 == SV1163
)
15907
765
      // Commute the operands of this shuffle so that next rule
15908
765
      // will trigger.
15909
602
      return DAG.getCommutedVectorShuffle(*SVN);
15910
46.0k
  }
15911
46.0k
15912
46.0k
  // Try to fold according to rules:
15913
46.0k
  //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
15914
46.0k
  //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
15915
46.0k
  //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
15916
46.0k
  // Don't try to fold shuffles with illegal type.
15917
46.0k
  // Only fold if this shuffle is the only user of the other shuffle.
15918
46.0k
  
if (46.0k
N0.getOpcode() == ISD::VECTOR_SHUFFLE && 46.0k
N->isOnlyUserOf(N0.getNode())3.86k
&&
15919
46.0k
      
Level < AfterLegalizeDAG3.07k
&&
TLI.isTypeLegal(VT)3.02k
) {
15920
2.94k
    ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
15921
2.94k
15922
2.94k
    // Don't try to fold splats; they're likely to simplify somehow, or they
15923
2.94k
    // might be free.
15924
2.94k
    if (OtherSV->isSplat())
15925
45
      return SDValue();
15926
2.90k
15927
2.90k
    // The incoming shuffle must be of the same type as the result of the
15928
2.90k
    // current shuffle.
15929
2.94k
    assert(OtherSV->getOperand(0).getValueType() == VT &&
15930
2.90k
           "Shuffle types don't match");
15931
2.90k
15932
2.90k
    SDValue SV0, SV1;
15933
2.90k
    SmallVector<int, 4> Mask;
15934
2.90k
    // Compute the combined shuffle mask for a shuffle with SV0 as the first
15935
2.90k
    // operand, and SV1 as the second operand.
15936
32.4k
    for (unsigned i = 0; 
i != NumElts32.4k
;
++i29.5k
) {
15937
30.1k
      int Idx = SVN->getMaskElt(i);
15938
30.1k
      if (
Idx < 030.1k
) {
15939
4.06k
        // Propagate Undef.
15940
4.06k
        Mask.push_back(Idx);
15941
4.06k
        continue;
15942
4.06k
      }
15943
26.0k
15944
26.0k
      SDValue CurrentVec;
15945
26.0k
      if (
Idx < (int)NumElts26.0k
) {
15946
12.0k
        // This shuffle index refers to the inner shuffle N0. Lookup the inner
15947
12.0k
        // shuffle mask to identify which vector is actually referenced.
15948
12.0k
        Idx = OtherSV->getMaskElt(Idx);
15949
12.0k
        if (
Idx < 012.0k
) {
15950
183
          // Propagate Undef.
15951
183
          Mask.push_back(Idx);
15952
183
          continue;
15953
183
        }
15954
11.8k
15955
11.8k
        
CurrentVec = (Idx < (int) NumElts) ? 11.8k
OtherSV->getOperand(0)9.74k
15956
2.08k
                                           : OtherSV->getOperand(1);
15957
26.0k
      } else {
15958
14.0k
        // This shuffle index references an element within N1.
15959
14.0k
        CurrentVec = N1;
15960
14.0k
      }
15961
26.0k
15962
26.0k
      // Simple case where 'CurrentVec' is UNDEF.
15963
25.8k
      
if (25.8k
CurrentVec.isUndef()25.8k
) {
15964
0
        Mask.push_back(-1);
15965
0
        continue;
15966
0
      }
15967
25.8k
15968
25.8k
      // Canonicalize the shuffle index. We don't know yet if CurrentVec
15969
25.8k
      // will be the first or second operand of the combined shuffle.
15970
25.8k
      Idx = Idx % NumElts;
15971
25.8k
      if (
!SV0.getNode() || 25.8k
SV0 == CurrentVec22.9k
) {
15972
10.5k
        // Ok. CurrentVec is the left hand side.
15973
10.5k
        // Update the mask accordingly.
15974
10.5k
        SV0 = CurrentVec;
15975
10.5k
        Mask.push_back(Idx);
15976
10.5k
        continue;
15977
10.5k
      }
15978
15.3k
15979
15.3k
      // Bail out if we cannot convert the shuffle pair into a single shuffle.
15980
15.3k
      
if (15.3k
SV1.getNode() && 15.3k
SV1 != CurrentVec13.1k
)
15981
593
        return SDValue();
15982
14.7k
15983
14.7k
      // Ok. CurrentVec is the right hand side.
15984
14.7k
      // Update the mask accordingly.
15985
14.7k
      SV1 = CurrentVec;
15986
14.7k
      Mask.push_back(Idx + NumElts);
15987
14.7k
    }
15988
2.90k
15989
2.90k
    // Check if all indices in Mask are Undef. In case, propagate Undef.
15990
2.31k
    bool isUndefMask = true;
15991
4.73k
    for (unsigned i = 0; 
i != NumElts && 4.73k
isUndefMask4.72k
;
++i2.42k
)
15992
2.42k
      isUndefMask &= Mask[i] < 0;
15993
2.31k
15994
2.31k
    if (isUndefMask)
15995
6
      return DAG.getUNDEF(VT);
15996
2.30k
15997
2.30k
    
if (2.30k
!SV0.getNode()2.30k
)
15998
0
      SV0 = DAG.getUNDEF(VT);
15999
2.30k
    if (!SV1.getNode())
16000
724
      SV1 = DAG.getUNDEF(VT);
16001
2.30k
16002
2.30k
    // Avoid introducing shuffles with illegal mask.
16003
2.30k
    if (
!TLI.isShuffleMaskLegal(Mask, VT)2.30k
) {
16004
0
      ShuffleVectorSDNode::commuteMask(Mask);
16005
0
16006
0
      if (!TLI.isShuffleMaskLegal(Mask, VT))
16007
0
        return SDValue();
16008
0
16009
0
      //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
16010
0
      //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
16011
0
      //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
16012
0
      std::swap(SV0, SV1);
16013
0
    }
16014
2.30k
16015
2.30k
    //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
16016
2.30k
    //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
16017
2.30k
    //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
16018
2.30k
    return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
16019
43.1k
  }
16020
43.1k
16021
43.1k
  return SDValue();
16022
43.1k
}
16023
16024
35.7k
SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
16025
35.7k
  SDValue InVal = N->getOperand(0);
16026
35.7k
  EVT VT = N->getValueType(0);
16027
35.7k
16028
35.7k
  // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
16029
35.7k
  // with a VECTOR_SHUFFLE and possible truncate.
16030
35.7k
  if (
InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT35.7k
) {
16031
15.5k
    SDValue InVec = InVal->getOperand(0);
16032
15.5k
    SDValue EltNo = InVal->getOperand(1);
16033
15.5k
    auto InVecT = InVec.getValueType();
16034
15.5k
    if (ConstantSDNode *
C015.5k
= dyn_cast<ConstantSDNode>(EltNo)) {
16035
15.5k
      SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
16036
15.5k
      int Elt = C0->getZExtValue();
16037
15.5k
      NewMask[0] = Elt;
16038
15.5k
      SDValue Val;
16039
15.5k
      // If we have an implict truncate do truncate here as long as it's legal.
16040
15.5k
      // if it's not legal, this should
16041
15.5k
      if (VT.getScalarType() != InVal.getValueType() &&
16042
30
          InVal.getValueType().isScalarInteger() &&
16043
15.5k
          
isTypeLegal(VT.getScalarType())30
) {
16044
0
        Val =
16045
0
            DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
16046
0
        return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
16047
0
      }
16048
15.5k
      
if (15.5k
VT.getScalarType() == InVecT.getScalarType() &&
16049
383
          VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
16050
15.5k
          
TLI.isShuffleMaskLegal(NewMask, VT)383
) {
16051
383
        Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
16052
383
                                   DAG.getUNDEF(InVecT), NewMask);
16053
383
        // If the initial vector is the correct size this shuffle is a
16054
383
        // valid result.
16055
383
        if (VT == InVecT)
16056
248
          return Val;
16057
135
        // If not we must truncate the vector.
16058
135
        
if (135
VT.getVectorNumElements() != InVecT.getVectorNumElements()135
) {
16059
135
          MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16060
135
          SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
16061
135
          EVT SubVT =
16062
135
              EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
16063
135
                               VT.getVectorNumElements());
16064
135
          Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
16065
135
                            ZeroIdx);
16066
135
          return Val;
16067
135
        }
16068
35.3k
      }
16069
15.5k
    }
16070
15.5k
  }
16071
35.3k
16072
35.3k
  return SDValue();
16073
35.3k
}
16074
16075
43.1k
SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
16076
43.1k
  EVT VT = N->getValueType(0);
16077
43.1k
  SDValue N0 = N->getOperand(0);
16078
43.1k
  SDValue N1 = N->getOperand(1);
16079
43.1k
  SDValue N2 = N->getOperand(2);
16080
43.1k
16081
43.1k
  // If inserting an UNDEF, just return the original vector.
16082
43.1k
  if (N1.isUndef())
16083
28
    return N0;
16084
43.0k
16085
43.0k
  // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
16086
43.0k
  // us to pull BITCASTs from input to output.
16087
43.0k
  
if (43.0k
N0.hasOneUse() && 43.0k
N0->getOpcode() == ISD::INSERT_SUBVECTOR16.1k
)
16088
4.32k
    
if (SDValue 4.32k
NN04.32k
= visitINSERT_SUBVECTOR(N0.getNode()))
16089
373
      return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
16090
42.7k
16091
42.7k
  // If this is an insert of an extracted vector into an undef vector, we can
16092
42.7k
  // just use the input to the extract.
16093
42.7k
  
if (42.7k
N0.isUndef() && 42.7k
N1.getOpcode() == ISD::EXTRACT_SUBVECTOR36.7k
&&
16094
42.7k
      
N1.getOperand(1) == N24.56k
&&
N1.getOperand(0).getValueType() == VT383
)
16095
376
    return N1.getOperand(0);
16096
42.3k
16097
42.3k
  // If we are inserting a bitcast value into an undef, with the same
16098
42.3k
  // number of elements, just use the bitcast input of the extract.
16099
42.3k
  // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
16100
42.3k
  //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
16101
42.3k
  
if (42.3k
N0.isUndef() && 42.3k
N1.getOpcode() == ISD::BITCAST36.3k
&&
16102
4.03k
      N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16103
145
      N1.getOperand(0).getOperand(1) == N2 &&
16104
142
      N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
16105
42.3k
          VT.getVectorNumElements()) {
16106
3
    return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
16107
3
  }
16108
42.3k
16109
42.3k
  // If both N1 and N2 are bitcast values on which insert_subvector
16110
42.3k
  // would makes sense, pull the bitcast through.
16111
42.3k
  // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
16112
42.3k
  //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
16113
42.3k
  
if (42.3k
N0.getOpcode() == ISD::BITCAST && 42.3k
N1.getOpcode() == ISD::BITCAST482
) {
16114
148
    SDValue CN0 = N0.getOperand(0);
16115
148
    SDValue CN1 = N1.getOperand(0);
16116
148
    if (CN0.getValueType().getVectorElementType() ==
16117
148
            CN1.getValueType().getVectorElementType() &&
16118
89
        CN0.getValueType().getVectorNumElements() ==
16119
148
            VT.getVectorNumElements()) {
16120
3
      SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
16121
3
                                      CN0.getValueType(), CN0, CN1, N2);
16122
3
      return DAG.getBitcast(VT, NewINSERT);
16123
3
    }
16124
42.3k
  }
16125
42.3k
16126
42.3k
  // Combine INSERT_SUBVECTORs where we are inserting to the same index.
16127
42.3k
  // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
16128
42.3k
  // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
16129
42.3k
  
if (42.3k
N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
16130
4.10k
      N0.getOperand(1).getValueType() == N1.getValueType() &&
16131
4.10k
      N0.getOperand(2) == N2)
16132
42
    return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
16133
42
                       N1, N2);
16134
42.2k
16135
42.2k
  
if (42.2k
!isa<ConstantSDNode>(N2)42.2k
)
16136
0
    return SDValue();
16137
42.2k
16138
42.2k
  unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
16139
42.2k
16140
42.2k
  // Canonicalize insert_subvector dag nodes.
16141
42.2k
  // Example:
16142
42.2k
  // (insert_subvector (insert_subvector A, Idx0), Idx1)
16143
42.2k
  // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
16144
42.2k
  if (
N0.getOpcode() == ISD::INSERT_SUBVECTOR && 42.2k
N0.hasOneUse()4.06k
&&
16145
3.94k
      N1.getValueType() == N0.getOperand(1).getValueType() &&
16146
42.2k
      
isa<ConstantSDNode>(N0.getOperand(2))3.94k
) {
16147
3.94k
    unsigned OtherIdx = N0.getConstantOperandVal(2);
16148
3.94k
    if (
InsIdx < OtherIdx3.94k
) {
16149
4
      // Swap nodes.
16150
4
      SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
16151
4
                                  N0.getOperand(0), N1, N2);
16152
4
      AddToWorklist(NewOp.getNode());
16153
4
      return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
16154
4
                         VT, NewOp, N0.getOperand(1), N0.getOperand(2));
16155
4
    }
16156
42.2k
  }
16157
42.2k
16158
42.2k
  // If the input vector is a concatenation, and the insert replaces
16159
42.2k
  // one of the pieces, we can optimize into a single concat_vectors.
16160
42.2k
  
if (42.2k
N0.getOpcode() == ISD::CONCAT_VECTORS && 42.2k
N0.hasOneUse()0
&&
16161
42.2k
      
N0.getOperand(0).getValueType() == N1.getValueType()0
) {
16162
0
    unsigned Factor = N1.getValueType().getVectorNumElements();
16163
0
16164
0
    SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
16165
0
    Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
16166
0
16167
0
    return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
16168
0
  }
16169
42.2k
16170
42.2k
  return SDValue();
16171
42.2k
}
16172
16173
3.38k
SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
16174
3.38k
  SDValue N0 = N->getOperand(0);
16175
3.38k
16176
3.38k
  // fold (fp_to_fp16 (fp16_to_fp op)) -> op
16177
3.38k
  if (N0->getOpcode() == ISD::FP16_TO_FP)
16178
880
    return N0->getOperand(0);
16179
2.50k
16180
2.50k
  return SDValue();
16181
2.50k
}
16182
16183
4.21k
SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
16184
4.21k
  SDValue N0 = N->getOperand(0);
16185
4.21k
16186
4.21k
  // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
16187
4.21k
  if (
N0->getOpcode() == ISD::AND4.21k
) {
16188
446
    ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
16189
446
    if (
AndConst && 446
AndConst->getAPIntValue() == 0xffff446
) {
16190
432
      return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
16191
432
                         N0.getOperand(0));
16192
432
    }
16193
3.78k
  }
16194
3.78k
16195
3.78k
  return SDValue();
16196
3.78k
}
16197
16198
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
16199
/// with the destination vector and a zero vector.
16200
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
16201
///      vector_shuffle V, Zero, <0, 4, 2, 4>
16202
339k
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
16203
339k
  EVT VT = N->getValueType(0);
16204
339k
  SDValue LHS = N->getOperand(0);
16205
339k
  SDValue RHS = peekThroughBitcast(N->getOperand(1));
16206
339k
  SDLoc DL(N);
16207
339k
16208
339k
  // Make sure we're not running after operation legalization where it
16209
339k
  // may have custom lowered the vector shuffles.
16210
339k
  if (LegalOperations)
16211
193k
    return SDValue();
16212
145k
16213
145k
  
if (145k
N->getOpcode() != ISD::AND145k
)
16214
123k
    return SDValue();
16215
21.9k
16216
21.9k
  
if (21.9k
RHS.getOpcode() != ISD::BUILD_VECTOR21.9k
)
16217
5.91k
    return SDValue();
16218
16.0k
16219
16.0k
  EVT RVT = RHS.getValueType();
16220
16.0k
  unsigned NumElts = RHS.getNumOperands();
16221
16.0k
16222
16.0k
  // Attempt to create a valid clear mask, splitting the mask into
16223
16.0k
  // sub elements and checking to see if each is
16224
16.0k
  // all zeros or all ones - suitable for shuffle masking.
16225
46.0k
  auto BuildClearMask = [&](int Split) {
16226
46.0k
    int NumSubElts = NumElts * Split;
16227
46.0k
    int NumSubBits = RVT.getScalarSizeInBits() / Split;
16228
46.0k
16229
46.0k
    SmallVector<int, 8> Indices;
16230
183k
    for (int i = 0; 
i != NumSubElts183k
;
++i137k
) {
16231
165k
      int EltIdx = i / Split;
16232
165k
      int SubIdx = i % Split;
16233
165k
      SDValue Elt = RHS.getOperand(EltIdx);
16234
165k
      if (
Elt.isUndef()165k
) {
16235
28
        Indices.push_back(-1);
16236
28
        continue;
16237
28
      }
16238
165k
16239
165k
      APInt Bits;
16240
165k
      if (isa<ConstantSDNode>(Elt))
16241
164k
        Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
16242
822
      else 
if (822
isa<ConstantFPSDNode>(Elt)822
)
16243
154
        Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
16244
822
      else
16245
668
        return SDValue();
16246
165k
16247
165k
      // Extract the sub element from the constant bit mask.
16248
165k
      
if (165k
DAG.getDataLayout().isBigEndian()165k
) {
16249
469
        Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
16250
165k
      } else {
16251
164k
        Bits.lshrInPlace(SubIdx * NumSubBits);
16252
164k
      }
16253
165k
16254
165k
      if (Split > 1)
16255
147k
        Bits = Bits.trunc(NumSubBits);
16256
165k
16257
165k
      if (Bits.isAllOnesValue())
16258
51.2k
        Indices.push_back(i);
16259
113k
      else 
if (113k
Bits == 0113k
)
16260
86.2k
        Indices.push_back(i + NumSubElts);
16261
113k
      else
16262
27.5k
        return SDValue();
16263
165k
    }
16264
46.0k
16265
46.0k
    // Let's see if the target supports this vector_shuffle.
16266
17.7k
    EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
16267
17.7k
    EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
16268
17.7k
    if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
16269
16.8k
      return SDValue();
16270
970
16271
970
    SDValue Zero = DAG.getConstant(0, DL, ClearVT);
16272
970
    return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
16273
970
                                                   DAG.getBitcast(ClearVT, LHS),
16274
970
                                                   Zero, Indices));
16275
970
  };
16276
16.0k
16277
16.0k
  // Determine maximum split level (byte level masking).
16278
16.0k
  int MaxSplit = 1;
16279
16.0k
  if (RVT.getScalarSizeInBits() % 8 == 0)
16280
15.9k
    MaxSplit = RVT.getScalarSizeInBits() / 8;
16281
16.0k
16282
77.3k
  for (int Split = 1; 
Split <= MaxSplit77.3k
;
++Split61.3k
)
16283
62.3k
    
if (62.3k
RVT.getScalarSizeInBits() % Split == 062.3k
)
16284
46.0k
      
if (SDValue 46.0k
S46.0k
= BuildClearMask(Split))
16285
970
        return S;
16286
16.0k
16287
15.0k
  return SDValue();
16288
339k
}
16289
16290
/// Visit a binary vector operation, like ADD.
16291
339k
SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
16292
339k
  assert(N->getValueType(0).isVector() &&
16293
339k
         "SimplifyVBinOp only works on vectors!");
16294
339k
16295
339k
  SDValue LHS = N->getOperand(0);
16296
339k
  SDValue RHS = N->getOperand(1);
16297
339k
  SDValue Ops[] = {LHS, RHS};
16298
339k
16299
339k
  // See if we can constant fold the vector operation.
16300
339k
  if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
16301
339k
          N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
16302
281
    return Fold;
16303
339k
16304
339k
  // Try to convert a constant mask AND into a shuffle clear mask.
16305
339k
  
if (SDValue 339k
Shuffle339k
= XformToShuffleWithZero(N))
16306
970
    return Shuffle;
16307
338k
16308
338k
  // Type legalization might introduce new shuffles in the DAG.
16309
338k
  // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
16310
338k
  //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
16311
338k
  
if (338k
LegalTypes && 338k
isa<ShuffleVectorSDNode>(LHS)229k
&&
16312
338k
      
isa<ShuffleVectorSDNode>(RHS)842
&&
LHS.hasOneUse()374
&&
RHS.hasOneUse()215
&&
16313
167
      LHS.getOperand(1).isUndef() &&
16314
338k
      
RHS.getOperand(1).isUndef()30
) {
16315
30
    ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
16316
30
    ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
16317
30
16318
30
    if (
SVN0->getMask().equals(SVN1->getMask())30
) {
16319
20
      EVT VT = N->getValueType(0);
16320
20
      SDValue UndefVector = LHS.getOperand(1);
16321
20
      SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
16322
20
                                     LHS.getOperand(0), RHS.getOperand(0),
16323
20
                                     N->getFlags());
16324
20
      AddUsersToWorklist(N);
16325
20
      return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
16326
20
                                  SVN0->getMask());
16327
20
    }
16328
338k
  }
16329
338k
16330
338k
  return SDValue();
16331
338k
}
16332
16333
SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
16334
38.3k
                                    SDValue N2) {
16335
38.3k
  assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
16336
38.3k
16337
38.3k
  SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
16338
38.3k
                                 cast<CondCodeSDNode>(N0.getOperand(2))->get());
16339
38.3k
16340
38.3k
  // If we got a simplified select_cc node back from SimplifySelectCC, then
16341
38.3k
  // break it down into a new SETCC node, and a new SELECT node, and then return
16342
38.3k
  // the SELECT node, since we were called with a SELECT node.
16343
38.3k
  if (
SCC.getNode()38.3k
) {
16344
403
    // Check to see if we got a select_cc back (to turn into setcc/select).
16345
403
    // Otherwise, just return whatever node we got back, like fabs.
16346
403
    if (
SCC.getOpcode() == ISD::SELECT_CC403
) {
16347
3
      SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
16348
3
                                  N0.getValueType(),
16349
3
                                  SCC.getOperand(0), SCC.getOperand(1),
16350
3
                                  SCC.getOperand(4));
16351
3
      AddToWorklist(SETCC.getNode());
16352
3
      return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
16353
3
                           SCC.getOperand(2), SCC.getOperand(3));
16354
3
    }
16355
400
16356
400
    return SCC;
16357
400
  }
16358
37.9k
  return SDValue();
16359
37.9k
}
16360
16361
/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
16362
/// being selected between, see if we can simplify the select.  Callers of this
16363
/// should assume that TheSelect is deleted if this returns true.  As such, they
16364
/// should return the appropriate thing (e.g. the node) back to the top-level of
16365
/// the DAG combiner loop to avoid it being looked at.
16366
bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
16367
400k
                                    SDValue RHS) {
16368
400k
  // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
16369
400k
  // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
16370
400k
  if (const ConstantFPSDNode *
NaN400k
= isConstOrConstSplatFP(LHS)) {
16371
4.99k
    if (
NaN->isNaN() && 4.99k
RHS.getOpcode() == ISD::FSQRT133
) {
16372
15
      // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
16373
15
      SDValue Sqrt = RHS;
16374
15
      ISD::CondCode CC;
16375
15
      SDValue CmpLHS;
16376
15
      const ConstantFPSDNode *Zero = nullptr;
16377
15
16378
15
      if (
TheSelect->getOpcode() == ISD::SELECT_CC15
) {
16379
0
        CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
16380
0
        CmpLHS = TheSelect->getOperand(0);
16381
0
        Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
16382
15
      } else {
16383
15
        // SELECT or VSELECT
16384
15
        SDValue Cmp = TheSelect->getOperand(0);
16385
15
        if (
Cmp.getOpcode() == ISD::SETCC15
) {
16386
15
          CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
16387
15
          CmpLHS = Cmp.getOperand(0);
16388
15
          Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
16389
15
        }
16390
15
      }
16391
15
      if (
Zero && 15
Zero->isZero()15
&&
16392
15
          
Sqrt.getOperand(0) == CmpLHS15
&& (CC == ISD::SETOLT ||
16393
15
          
CC == ISD::SETULT6
||
CC == ISD::SETLT0
)) {
16394
15
        // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
16395
15
        CombineTo(TheSelect, Sqrt);
16396
15
        return true;
16397
15
      }
16398
400k
    }
16399
4.99k
  }
16400
400k
  // Cannot simplify select with vector condition
16401
400k
  
if (400k
TheSelect->getOperand(0).getValueType().isVector()400k
)
return false29.9k
;
16402
370k
16403
370k
  // If this is a select from two identical things, try to pull the operation
16404
370k
  // through the select.
16405
370k
  
if (370k
LHS.getOpcode() != RHS.getOpcode() ||
16406
370k
      
!LHS.hasOneUse()121k
||
!RHS.hasOneUse()57.2k
)
16407
320k
    return false;
16408
49.9k
16409
49.9k
  // If this is a load and the token chain is identical, replace the select
16410
49.9k
  // of two loads with a load through a select of the address to load from.
16411
49.9k
  // This triggers in things like "select bool X, 10.0, 123.0" after the FP
16412
49.9k
  // constants have been dropped into the constant pool.
16413
49.9k
  
if (49.9k
LHS.getOpcode() == ISD::LOAD49.9k
) {
16414
491
    LoadSDNode *LLD = cast<LoadSDNode>(LHS);
16415
491
    LoadSDNode *RLD = cast<LoadSDNode>(RHS);
16416
491
16417
491
    // Token chains must be identical.
16418
491
    if (LHS.getOperand(0) != RHS.getOperand(0) ||
16419
491
        // Do not let this transformation reduce the number of volatile loads.
16420
491
        
LLD->isVolatile()381
||
RLD->isVolatile()381
||
16421
491
        // FIXME: If either is a pre/post inc/dec load,
16422
491
        // we'd need to split out the address adjustment.
16423
491
        
LLD->isIndexed()381
||
RLD->isIndexed()381
||
16424
491
        // If this is an EXTLOAD, the VT's must match.
16425
381
        LLD->getMemoryVT() != RLD->getMemoryVT() ||
16426
491
        // If this is an EXTLOAD, the kind of extension must match.
16427
379
        (LLD->getExtensionType() != RLD->getExtensionType() &&
16428
379
         // The only exception is if one of the extensions is anyext.
16429
2
         LLD->getExtensionType() != ISD::EXTLOAD &&
16430
379
         RLD->getExtensionType() != ISD::EXTLOAD) ||
16431
491
        // FIXME: this discards src value information.  This is
16432
491
        // over-conservative. It would be beneficial to be able to remember
16433
491
        // both potential memory locations.  Since we are discarding
16434
491
        // src value info, don't do the transformation if the memory
16435
491
        // locations are not in the default address space.
16436
377
        LLD->getPointerInfo().getAddrSpace() != 0 ||
16437
161
        RLD->getPointerInfo().getAddrSpace() != 0 ||
16438
161
        !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
16439
161
                                      LLD->getBasePtr().getValueType()))
16440
330
      return false;
16441
161
16442
161
    // Check that the select condition doesn't reach either load.  If so,
16443
161
    // folding this will induce a cycle into the DAG.  If not, this is safe to
16444
161
    // xform, so create a select of the addresses.
16445
161
    SDValue Addr;
16446
161
    if (
TheSelect->getOpcode() == ISD::SELECT161
) {
16447
161
      SDNode *CondNode = TheSelect->getOperand(0).getNode();
16448
161
      if (
(LLD->hasAnyUseOfValue(1) && 161
LLD->isPredecessorOf(CondNode)86
) ||
16449
159
          
(RLD->hasAnyUseOfValue(1) && 159
RLD->isPredecessorOf(CondNode)81
))
16450
2
        return false;
16451
159
      // The loads must not depend on one another.
16452
159
      
if (159
LLD->isPredecessorOf(RLD) ||
16453
159
          RLD->isPredecessorOf(LLD))
16454
0
        return false;
16455
159
      Addr = DAG.getSelect(SDLoc(TheSelect),
16456
159
                           LLD->getBasePtr().getValueType(),
16457
159
                           TheSelect->getOperand(0), LLD->getBasePtr(),
16458
159
                           RLD->getBasePtr());
16459
161
    } else {  // Otherwise SELECT_CC
16460
0
      SDNode *CondLHS = TheSelect->getOperand(0).getNode();
16461
0
      SDNode *CondRHS = TheSelect->getOperand(1).getNode();
16462
0
16463
0
      if ((LLD->hasAnyUseOfValue(1) &&
16464
0
           
(LLD->isPredecessorOf(CondLHS) || 0
LLD->isPredecessorOf(CondRHS)0
)) ||
16465
0
          (RLD->hasAnyUseOfValue(1) &&
16466
0
           
(RLD->isPredecessorOf(CondLHS) || 0
RLD->isPredecessorOf(CondRHS)0
)))
16467
0
        return false;
16468
0
16469
0
      Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
16470
0
                         LLD->getBasePtr().getValueType(),
16471
0
                         TheSelect->getOperand(0),
16472
0
                         TheSelect->getOperand(1),
16473
0
                         LLD->getBasePtr(), RLD->getBasePtr(),
16474
0
                         TheSelect->getOperand(4));
16475
0
    }
16476
161
16477
159
    SDValue Load;
16478
159
    // It is safe to replace the two loads if they have different alignments,
16479
159
    // but the new load must be the minimum (most restrictive) alignment of the
16480
159
    // inputs.
16481
159
    unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
16482
159
    MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
16483
159
    if (!RLD->isInvariant())
16484
159
      MMOFlags &= ~MachineMemOperand::MOInvariant;
16485
159
    if (!RLD->isDereferenceable())
16486
55
      MMOFlags &= ~MachineMemOperand::MODereferenceable;
16487
159
    if (
LLD->getExtensionType() == ISD::NON_EXTLOAD159
) {
16488
158
      // FIXME: Discards pointer and AA info.
16489
158
      Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
16490
158
                         LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
16491
158
                         MMOFlags);
16492
159
    } else {
16493
1
      // FIXME: Discards pointer and AA info.
16494
1
      Load = DAG.getExtLoad(
16495
1
          LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
16496
0
                                                  : LLD->getExtensionType(),
16497
1
          SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
16498
1
          MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
16499
1
    }
16500
159
16501
159
    // Users of the select now use the result of the load.
16502
159
    CombineTo(TheSelect, Load);
16503
159
16504
159
    // Users of the old loads now use the new load's chain.  We know the
16505
159
    // old-load value is dead now.
16506
159
    CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
16507
159
    CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
16508
159
    return true;
16509
49.4k
  }
16510
49.4k
16511
49.4k
  return false;
16512
49.4k
}
16513
16514
/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
16515
/// bitwise 'and'.
16516
SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
16517
                                            SDValue N1, SDValue N2, SDValue N3,
16518
277k
                                            ISD::CondCode CC) {
16519
277k
  // If this is a select where the false operand is zero and the compare is a
16520
277k
  // check of the sign bit, see if we can perform the "gzip trick":
16521
277k
  // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
16522
277k
  // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
16523
277k
  EVT XType = N0.getValueType();
16524
277k
  EVT AType = N2.getValueType();
16525
277k
  if (
!isNullConstant(N3) || 277k
!XType.bitsGE(AType)59.1k
)
16526
231k
    return SDValue();
16527
45.9k
16528
45.9k
  // If the comparison is testing for a positive value, we have to invert
16529
45.9k
  // the sign bit mask, so only do that transform if the target has a bitwise
16530
45.9k
  // 'and not' instruction (the invert is free).
16531
45.9k
  
if (45.9k
CC == ISD::SETGT && 45.9k
TLI.hasAndNot(N2)4.70k
) {
16532
3.91k
    // (X > -1) ? A : 0
16533
3.91k
    // (X >  0) ? X : 0 <-- This is canonical signed max.
16534
3.91k
    if (
!(isAllOnesConstant(N1) || 3.91k
(isNullConstant(N1) && 3.11k
N0 == N22.46k
)))
16535
942
      return SDValue();
16536
42.0k
  } else 
if (42.0k
CC == ISD::SETLT42.0k
) {
16537
953
    // (X <  0) ? A : 0
16538
953
    // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
16539
953
    if (
!(isNullConstant(N1) || 953
(isOneConstant(N1) && 642
N0 == N298
)))
16540
640
      return SDValue();
16541
41.1k
  } else {
16542
41.1k
    return SDValue();
16543
41.1k
  }
16544
3.28k
16545
3.28k
  // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
16546
3.28k
  // constant.
16547
3.28k
  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
16548
3.28k
  auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
16549
3.28k
  if (
N2C && 3.28k
((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)248
) {
16550
79
    unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
16551
79
    SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
16552
79
    SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
16553
79
    AddToWorklist(Shift.getNode());
16554
79
16555
79
    if (
XType.bitsGT(AType)79
) {
16556
40
      Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
16557
40
      AddToWorklist(Shift.getNode());
16558
40
    }
16559
79
16560
79
    if (CC == ISD::SETGT)
16561
21
      Shift = DAG.getNOT(DL, Shift, AType);
16562
79
16563
79
    return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
16564
79
  }
16565
3.20k
16566
3.20k
  SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
16567
3.20k
  SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
16568
3.20k
  AddToWorklist(Shift.getNode());
16569
3.20k
16570
3.20k
  if (
XType.bitsGT(AType)3.20k
) {
16571
77
    Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
16572
77
    AddToWorklist(Shift.getNode());
16573
77
  }
16574
3.20k
16575
3.20k
  if (CC == ISD::SETGT)
16576
2.94k
    Shift = DAG.getNOT(DL, Shift, AType);
16577
277k
16578
277k
  return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
16579
277k
}
16580
16581
/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
16582
/// where 'cond' is the comparison specified by CC.
16583
SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
16584
                                      SDValue N2, SDValue N3, ISD::CondCode CC,
16585
277k
                                      bool NotExtCompare) {
16586
277k
  // (x ? y : y) -> y.
16587
277k
  if (
N2 == N3277k
)
return N20
;
16588
277k
16589
277k
  EVT VT = N2.getValueType();
16590
277k
  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
16591
277k
  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
16592
277k
16593
277k
  // Determine if the condition we're dealing with is constant
16594
277k
  SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
16595
277k
                              N0, N1, CC, DL, false);
16596
277k
  if (
SCC.getNode()277k
)
AddToWorklist(SCC.getNode())2.33k
;
16597
277k
16598
277k
  if (ConstantSDNode *
SCCC277k
= dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
16599
35
    // fold select_cc true, x, y -> x
16600
35
    // fold select_cc false, x, y -> y
16601
35
    return !SCCC->isNullValue() ? 
N213
:
N322
;
16602
35
  }
16603
277k
16604
277k
  // Check to see if we can simplify the select into an fabs node
16605
277k
  
if (ConstantFPSDNode *277k
CFP277k
= dyn_cast<ConstantFPSDNode>(N1)) {
16606
4.31k
    // Allow either -0.0 or 0.0
16607
4.31k
    if (
CFP->isZero()4.31k
) {
16608
1.41k
      // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
16609
1.41k
      if (
(CC == ISD::SETGE || 1.41k
CC == ISD::SETGT1.39k
) &&
16610
1.41k
          
N0 == N235
&&
N3.getOpcode() == ISD::FNEG17
&&
16611
1
          N2 == N3.getOperand(0))
16612
1
        return DAG.getNode(ISD::FABS, DL, VT, N0);
16613
1.41k
16614
1.41k
      // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
16615
1.41k
      
if (1.41k
(CC == ISD::SETLT || 1.41k
CC == ISD::SETLE1.38k
) &&
16616
1.41k
          
N0 == N340
&&
N2.getOpcode() == ISD::FNEG16
&&
16617
0
          N2.getOperand(0) == N3)
16618
0
        return DAG.getNode(ISD::FABS, DL, VT, N3);
16619
277k
    }
16620
4.31k
  }
16621
277k
16622
277k
  // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
16623
277k
  // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
16624
277k
  // in it.  This is a win when the constant is not otherwise available because
16625
277k
  // it replaces two constant pool loads with one.  We only do this if the FP
16626
277k
  // type is known to be legal, because if it isn't, then we are before legalize
16627
277k
  // types an we want the other legalization to happen first (e.g. to avoid
16628
277k
  // messing with soft float) and if the ConstantFP is not legal, because if
16629
277k
  // it is legal, we may not need to store the FP constant in a constant pool.
16630
277k
  
if (ConstantFPSDNode *277k
TV277k
= dyn_cast<ConstantFPSDNode>(N2))
16631
2.45k
    
if (ConstantFPSDNode *2.45k
FV2.45k
= dyn_cast<ConstantFPSDNode>(N3)) {
16632
1.76k
      if (TLI.isTypeLegal(N2.getValueType()) &&
16633
1.72k
          (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
16634
1.72k
               TargetLowering::Legal &&
16635
1.52k
           !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
16636
1.72k
           !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
16637
1.76k
          // If both constants have multiple uses, then we won't need to do an
16638
1.76k
          // extra load, they are likely around in registers for other users.
16639
1.76k
          
(TV->hasOneUse() || 85
FV->hasOneUse()8
)) {
16640
77
        Constant *Elts[] = {
16641
77
          const_cast<ConstantFP*>(FV->getConstantFPValue()),
16642
77
          const_cast<ConstantFP*>(TV->getConstantFPValue())
16643
77
        };
16644
77
        Type *FPTy = Elts[0]->getType();
16645
77
        const DataLayout &TD = DAG.getDataLayout();
16646
77
16647
77
        // Create a ConstantArray of the two constants.
16648
77
        Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
16649
77
        SDValue CPIdx =
16650
77
            DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
16651
77
                                TD.getPrefTypeAlignment(FPTy));
16652
77
        unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
16653
77
16654
77
        // Get the offsets to the 0 and 1 element of the array so that we can
16655
77
        // select between them.
16656
77
        SDValue Zero = DAG.getIntPtrConstant(0, DL);
16657
77
        unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
16658
77
        SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
16659
77
16660
77
        SDValue Cond = DAG.getSetCC(DL,
16661
77
                                    getSetCCResultType(N0.getValueType()),
16662
77
                                    N0, N1, CC);
16663
77
        AddToWorklist(Cond.getNode());
16664
77
        SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
16665
77
                                          Cond, One, Zero);
16666
77
        AddToWorklist(CstOffset.getNode());
16667
77
        CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
16668
77
                            CstOffset);
16669
77
        AddToWorklist(CPIdx.getNode());
16670
77
        return DAG.getLoad(
16671
77
            TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
16672
77
            MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
16673
77
            Alignment);
16674
77
      }
16675
277k
    }
16676
277k
16677
277k
  
if (SDValue 277k
V277k
= foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
16678
3.28k
    return V;
16679
274k
16680
274k
  // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
16681
274k
  // where y is has a single bit set.
16682
274k
  // A plaintext description would be, we can turn the SELECT_CC into an AND
16683
274k
  // when the condition can be materialized as an all-ones register.  Any
16684
274k
  // single bit-test can be materialized as an all-ones register with
16685
274k
  // shift-left and shift-right-arith.
16686
274k
  
if (274k
CC == ISD::SETEQ && 274k
N0->getOpcode() == ISD::AND98.0k
&&
16687
274k
      
N0->getValueType(0) == VT3.23k
&&
isNullConstant(N1)2.01k
&&
isNullConstant(N2)1.66k
) {
16688
115
    SDValue AndLHS = N0->getOperand(0);
16689
115
    ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16690
115
    if (
ConstAndRHS && 115
ConstAndRHS->getAPIntValue().countPopulation() == 179
) {
16691
28
      // Shift the tested bit over the sign bit.
16692
28
      const APInt &AndMask = ConstAndRHS->getAPIntValue();
16693
28
      SDValue ShlAmt =
16694
28
        DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
16695
28
                        getShiftAmountTy(AndLHS.getValueType()));
16696
28
      SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
16697
28
16698
28
      // Now arithmetic right shift it all the way over, so the result is either
16699
28
      // all-ones, or zero.
16700
28
      SDValue ShrAmt =
16701
28
        DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
16702
28
                        getShiftAmountTy(Shl.getValueType()));
16703
28
      SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
16704
28
16705
28
      return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
16706
28
    }
16707
274k
  }
16708
274k
16709
274k
  // fold select C, 16, 0 -> shl C, 4
16710
274k
  
if (274k
N2C && 274k
isNullConstant(N3)100k
&&
N2C->getAPIntValue().isPowerOf2()52.5k
&&
16711
45.0k
      TLI.getBooleanContents(N0.getValueType()) ==
16712
274k
          TargetLowering::ZeroOrOneBooleanContent) {
16713
43.4k
16714
43.4k
    // If the caller doesn't want us to simplify this into a zext of a compare,
16715
43.4k
    // don't do it.
16716
43.4k
    if (
NotExtCompare && 43.4k
N2C->isOne()43.1k
)
16717
43.1k
      return SDValue();
16718
298
16719
298
    // Get a SetCC of the condition
16720
298
    // NOTE: Don't create a SETCC if it's not legal on this target.
16721
298
    
if (298
!LegalOperations ||
16722
298
        
TLI.isOperationLegal(ISD::SETCC, N0.getValueType())16
) {
16723
282
      SDValue Temp, SCC;
16724
282
      // cast from setcc result type to select result type
16725
282
      if (
LegalTypes282
) {
16726
2
        SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
16727
2
                            N0, N1, CC);
16728
2
        if (N2.getValueType().bitsLT(SCC.getValueType()))
16729
0
          Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
16730
0
                                        N2.getValueType());
16731
2
        else
16732
2
          Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16733
2
                             N2.getValueType(), SCC);
16734
282
      } else {
16735
280
        SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
16736
280
        Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16737
280
                           N2.getValueType(), SCC);
16738
280
      }
16739
282
16740
282
      AddToWorklist(SCC.getNode());
16741
282
      AddToWorklist(Temp.getNode());
16742
282
16743
282
      if (N2C->isOne())
16744
2
        return Temp;
16745
280
16746
280
      // shl setcc result by log2 n2c
16747
280
      return DAG.getNode(
16748
280
          ISD::SHL, DL, N2.getValueType(), Temp,
16749
280
          DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
16750
280
                          getShiftAmountTy(Temp.getValueType())));
16751
280
    }
16752
43.4k
  }
16753
230k
16754
230k
  // Check to see if this is an integer abs.
16755
230k
  // select_cc setg[te] X,  0,  X, -X ->
16756
230k
  // select_cc setgt    X, -1,  X, -X ->
16757
230k
  // select_cc setl[te] X,  0, -X,  X ->
16758
230k
  // select_cc setlt    X,  1, -X,  X ->
16759
230k
  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
16760
230k
  
if (230k
N1C230k
) {
16761
158k
    ConstantSDNode *SubC = nullptr;
16762
158k
    if (
((N1C->isNullValue() && 158k
(CC == ISD::SETGT || 54.1k
CC == ISD::SETGE53.2k
)) ||
16763
157k
         
(N1C->isAllOnesValue() && 157k
CC == ISD::SETGT2.19k
)) &&
16764
158k
        
N0 == N21.97k
&&
N3.getOpcode() == ISD::SUB576
&&
N0 == N3.getOperand(1)458
)
16765
458
      SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
16766
157k
    else 
if (157k
((N1C->isNullValue() && 157k
(CC == ISD::SETLT || 54.1k
CC == ISD::SETLE43.9k
)) ||
16767
147k
              
(N1C->isOne() && 147k
CC == ISD::SETLT46.0k
)) &&
16768
157k
             
N0 == N310.9k
&&
N2.getOpcode() == ISD::SUB7.12k
&&
N0 == N2.getOperand(1)6.94k
)
16769
6.92k
      SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
16770
158k
16771
158k
    EVT XType = N0.getValueType();
16772
158k
    if (
SubC && 158k
SubC->isNullValue()7.37k
&&
XType.isInteger()7.37k
) {
16773
7.37k
      SDLoc DL(N0);
16774
7.37k
      SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
16775
7.37k
                                  N0,
16776
7.37k
                                  DAG.getConstant(XType.getSizeInBits() - 1, DL,
16777
7.37k
                                         getShiftAmountTy(N0.getValueType())));
16778
7.37k
      SDValue Add = DAG.getNode(ISD::ADD, DL,
16779
7.37k
                                XType, N0, Shift);
16780
7.37k
      AddToWorklist(Shift.getNode());
16781
7.37k
      AddToWorklist(Add.getNode());
16782
7.37k
      return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
16783
7.37k
    }
16784
223k
  }
16785
223k
16786
223k
  // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
16787
223k
  // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
16788
223k
  // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
16789
223k
  // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
16790
223k
  // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
16791
223k
  // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
16792
223k
  // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
16793
223k
  // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
16794
223k
  
if (223k
N1C && 223k
N1C->isNullValue()151k
&&
(CC == ISD::SETEQ || 47.2k
CC == ISD::SETNE10.4k
)) {
16795
42.9k
    SDValue ValueOnZero = N2;
16796
42.9k
    SDValue Count = N3;
16797
42.9k
    // If the condition is NE instead of E, swap the operands.
16798
42.9k
    if (CC == ISD::SETNE)
16799
6.07k
      std::swap(ValueOnZero, Count);
16800
42.9k
    // Check if the value on zero is a constant equal to the bits in the type.
16801
42.9k
    if (auto *
ValueOnZeroC42.9k
= dyn_cast<ConstantSDNode>(ValueOnZero)) {
16802
20.7k
      if (
ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()20.7k
) {
16803
112
        // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
16804
112
        // legal, combine to just cttz.
16805
112
        if ((Count.getOpcode() == ISD::CTTZ ||
16806
112
             Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
16807
15
            N0 == Count.getOperand(0) &&
16808
15
            
(!LegalOperations || 15
TLI.isOperationLegal(ISD::CTTZ, VT)0
))
16809
15
          return DAG.getNode(ISD::CTTZ, DL, VT, N0);
16810
97
        // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
16811
97
        // legal, combine to just ctlz.
16812
97
        
if (97
(Count.getOpcode() == ISD::CTLZ ||
16813
97
             Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
16814
15
            N0 == Count.getOperand(0) &&
16815
15
            
(!LegalOperations || 15
TLI.isOperationLegal(ISD::CTLZ, VT)0
))
16816
15
          return DAG.getNode(ISD::CTLZ, DL, VT, N0);
16817
223k
      }
16818
20.7k
    }
16819
42.9k
  }
16820
223k
16821
223k
  return SDValue();
16822
223k
}
16823
16824
/// This is a stub for TargetLowering::SimplifySetCC.
16825
SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
16826
                                   ISD::CondCode Cond, const SDLoc &DL,
16827
3.32M
                                   bool foldBooleans) {
16828
3.32M
  TargetLowering::DAGCombinerInfo
16829
3.32M
    DagCombineInfo(DAG, Level, false, this);
16830
3.32M
  return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
16831
3.32M
}
16832
16833
/// Given an ISD::SDIV node expressing a divide by constant, return
16834
/// a DAG expression to select that will generate the same value by multiplying
16835
/// by a magic number.
16836
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
16837
1.50k
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
16838
1.50k
  // when optimising for minimum size, we don't want to expand a div to a mul
16839
1.50k
  // and a shift.
16840
1.50k
  if (DAG.getMachineFunction().getFunction()->optForMinSize())
16841
34
    return SDValue();
16842
1.47k
16843
1.47k
  ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16844
1.47k
  if (!C)
16845
0
    return SDValue();
16846
1.47k
16847
1.47k
  // Avoid division by zero.
16848
1.47k
  
if (1.47k
C->isNullValue()1.47k
)
16849
0
    return SDValue();
16850
1.47k
16851
1.47k
  std::vector<SDNode *> Built;
16852
1.47k
  SDValue S =
16853
1.47k
      TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
16854
1.47k
16855
1.47k
  for (SDNode *N : Built)
16856
2.97k
    AddToWorklist(N);
16857
1.50k
  return S;
16858
1.50k
}
16859
16860
/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
16861
/// DAG expression that will generate the same value by right shifting.
16862
3.08k
SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
16863
3.08k
  ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16864
3.08k
  if (!C)
16865
0
    return SDValue();
16866
3.08k
16867
3.08k
  // Avoid division by zero.
16868
3.08k
  
if (3.08k
C->isNullValue()3.08k
)
16869
0
    return SDValue();
16870
3.08k
16871
3.08k
  std::vector<SDNode *> Built;
16872
3.08k
  SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
16873
3.08k
16874
3.08k
  for (SDNode *N : Built)
16875
8.58k
    AddToWorklist(N);
16876
3.08k
  return S;
16877
3.08k
}
16878
16879
/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
16880
/// expression that will generate the same value by multiplying by a magic
16881
/// number.
16882
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
16883
7.04k
SDValue DAGCombiner::BuildUDIV(SDNode *N) {
16884
7.04k
  // when optimising for minimum size, we don't want to expand a div to a mul
16885
7.04k
  // and a shift.
16886
7.04k
  if (DAG.getMachineFunction().getFunction()->optForMinSize())
16887
32
    return SDValue();
16888
7.01k
16889
7.01k
  ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
16890
7.01k
  if (!C)
16891
0
    return SDValue();
16892
7.01k
16893
7.01k
  // Avoid division by zero.
16894
7.01k
  
if (7.01k
C->isNullValue()7.01k
)
16895
0
    return SDValue();
16896
7.01k
16897
7.01k
  std::vector<SDNode *> Built;
16898
7.01k
  SDValue S =
16899
7.01k
      TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
16900
7.01k
16901
7.01k
  for (SDNode *N : Built)
16902
8.47k
    AddToWorklist(N);
16903
7.04k
  return S;
16904
7.04k
}
16905
16906
/// Determines the LogBase2 value for a non-null input value using the
16907
/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
16908
23.0k
SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
16909
23.0k
  EVT VT = V.getValueType();
16910
23.0k
  unsigned EltBits = VT.getScalarSizeInBits();
16911
23.0k
  SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
16912
23.0k
  SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
16913
23.0k
  SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
16914
23.0k
  return LogBase2;
16915
23.0k
}
16916
16917
/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16918
/// For the reciprocal, we need to find the zero of the function:
16919
///   F(X) = A X - 1 [which has a zero at X = 1/A]
16920
///     =>
16921
///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
16922
///     does not require additional intermediate precision]
16923
535
SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
16924
535
  if (Level >= AfterLegalizeDAG)
16925
130
    return SDValue();
16926
405
16927
405
  // TODO: Handle half and/or extended types?
16928
405
  EVT VT = Op.getValueType();
16929
405
  if (
VT.getScalarType() != MVT::f32 && 405
VT.getScalarType() != MVT::f6468
)
16930
3
    return SDValue();
16931
402
16932
402
  // If estimates are explicitly disabled for this function, we're done.
16933
402
  MachineFunction &MF = DAG.getMachineFunction();
16934
402
  int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
16935
402
  if (Enabled == TLI.ReciprocalEstimate::Disabled)
16936
37
    return SDValue();
16937
365
16938
365
  // Estimates may be explicitly enabled for this type with a custom number of
16939
365
  // refinement steps.
16940
365
  int Iterations = TLI.getDivRefinementSteps(VT, MF);
16941
365
  if (SDValue 
Est365
= TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
16942
229
    AddToWorklist(Est.getNode());
16943
229
16944
229
    if (
Iterations229
) {
16945
184
      EVT VT = Op.getValueType();
16946
184
      SDLoc DL(Op);
16947
184
      SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
16948
184
16949
184
      // Newton iterations: Est = Est + Est (1 - Arg * Est)
16950
432
      for (int i = 0; 
i < Iterations432
;
++i248
) {
16951
248
        SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
16952
248
        AddToWorklist(NewEst.getNode());
16953
248
16954
248
        NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
16955
248
        AddToWorklist(NewEst.getNode());
16956
248
16957
248
        NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
16958
248
        AddToWorklist(NewEst.getNode());
16959
248
16960
248
        Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
16961
248
        AddToWorklist(Est.getNode());
16962
248
      }
16963
184
    }
16964
229
    return Est;
16965
229
  }
16966
136
16967
136
  return SDValue();
16968
136
}
16969
16970
/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
16971
/// For the reciprocal sqrt, we need to find the zero of the function:
16972
///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
16973
///     =>
16974
///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
16975
/// As a result, we precompute A/2 prior to the iteration loop.
16976
SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
16977
                                         unsigned Iterations,
16978
21
                                         SDNodeFlags Flags, bool Reciprocal) {
16979
21
  EVT VT = Arg.getValueType();
16980
21
  SDLoc DL(Arg);
16981
21
  SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
16982
21
16983
21
  // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
16984
21
  // this entire sequence requires only one FP constant.
16985
21
  SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
16986
21
  AddToWorklist(HalfArg.getNode());
16987
21
16988
21
  HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
16989
21
  AddToWorklist(HalfArg.getNode());
16990
21
16991
21
  // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
16992
52
  for (unsigned i = 0; 
i < Iterations52
;
++i31
) {
16993
31
    SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
16994
31
    AddToWorklist(NewEst.getNode());
16995
31
16996
31
    NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
16997
31
    AddToWorklist(NewEst.getNode());
16998
31
16999
31
    NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
17000
31
    AddToWorklist(NewEst.getNode());
17001
31
17002
31
    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
17003
31
    AddToWorklist(Est.getNode());
17004
31
  }
17005
21
17006
21
  // If non-reciprocal square root is requested, multiply the result by Arg.
17007
21
  if (
!Reciprocal21
) {
17008
6
    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
17009
6
    AddToWorklist(Est.getNode());
17010
6
  }
17011
21
17012
21
  return Est;
17013
21
}
17014
17015
/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
17016
/// For the reciprocal sqrt, we need to find the zero of the function:
17017
///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
17018
///     =>
17019
///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
17020
SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
17021
                                         unsigned Iterations,
17022
33
                                         SDNodeFlags Flags, bool Reciprocal) {
17023
33
  EVT VT = Arg.getValueType();
17024
33
  SDLoc DL(Arg);
17025
33
  SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
17026
33
  SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
17027
33
17028
33
  // This routine must enter the loop below to work correctly
17029
33
  // when (Reciprocal == false).
17030
33
  assert(Iterations > 0);
17031
33
17032
33
  // Newton iterations for reciprocal square root:
17033
33
  // E = (E * -0.5) * ((A * E) * E + -3.0)
17034
68
  for (unsigned i = 0; 
i < Iterations68
;
++i35
) {
17035
35
    SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
17036
35
    AddToWorklist(AE.getNode());
17037
35
17038
35
    SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
17039
35
    AddToWorklist(AEE.getNode());
17040
35
17041
35
    SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
17042
35
    AddToWorklist(RHS.getNode());
17043
35
17044
35
    // When calculating a square root at the last iteration build:
17045
35
    // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
17046
35
    // (notice a common subexpression)
17047
35
    SDValue LHS;
17048
35
    if (
Reciprocal || 35
(i + 1) < Iterations22
) {
17049
14
      // RSQRT: LHS = (E * -0.5)
17050
14
      LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
17051
35
    } else {
17052
21
      // SQRT: LHS = (A * E) * -0.5
17053
21
      LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
17054
21
    }
17055
35
    AddToWorklist(LHS.getNode());
17056
35
17057
35
    Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
17058
35
    AddToWorklist(Est.getNode());
17059
35
  }
17060
33
17061
33
  return Est;
17062
33
}
17063
17064
/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
17065
/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
17066
/// Op can be zero.
17067
SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
17068
217
                                           bool Reciprocal) {
17069
217
  if (Level >= AfterLegalizeDAG)
17070
51
    return SDValue();
17071
166
17072
166
  // TODO: Handle half and/or extended types?
17073
166
  EVT VT = Op.getValueType();
17074
166
  if (
VT.getScalarType() != MVT::f32 && 166
VT.getScalarType() != MVT::f6457
)
17075
4
    return SDValue();
17076
162
17077
162
  // If estimates are explicitly disabled for this function, we're done.
17078
162
  MachineFunction &MF = DAG.getMachineFunction();
17079
162
  int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
17080
162
  if (Enabled == TLI.ReciprocalEstimate::Disabled)
17081
18
    return SDValue();
17082
144
17083
144
  // Estimates may be explicitly enabled for this type with a custom number of
17084
144
  // refinement steps.
17085
144
  int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
17086
144
17087
144
  bool UseOneConstNR = false;
17088
144
  if (SDValue Est =
17089
144
      TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
17090
92
                          Reciprocal)) {
17091
92
    AddToWorklist(Est.getNode());
17092
92
17093
92
    if (
Iterations92
) {
17094
54
      Est = UseOneConstNR
17095
21
            ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
17096
33
            : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
17097
54
17098
54
      if (
!Reciprocal54
) {
17099
27
        // Unfortunately, Est is now NaN if the input was exactly 0.0.
17100
27
        // Select out this case and force the answer to 0.0.
17101
27
        EVT VT = Op.getValueType();
17102
27
        SDLoc DL(Op);
17103
27
17104
27
        SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
17105
27
        EVT CCVT = getSetCCResultType(VT);
17106
27
        SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
17107
27
        AddToWorklist(ZeroCmp.getNode());
17108
27
17109
27
        Est = DAG.getNode(VT.isVector() ? 
ISD::VSELECT15
:
ISD::SELECT12
, DL, VT,
17110
27
                          ZeroCmp, FPZero, Est);
17111
27
        AddToWorklist(Est.getNode());
17112
27
      }
17113
54
    }
17114
92
    return Est;
17115
92
  }
17116
52
17117
52
  return SDValue();
17118
52
}
17119
17120
90
SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
17121
90
  return buildSqrtEstimateImpl(Op, Flags, true);
17122
90
}
17123
17124
127
SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
17125
127
  return buildSqrtEstimateImpl(Op, Flags, false);
17126
127
}
17127
17128
/// Return true if base is a frame index, which is known not to alias with
17129
/// anything but itself.  Provides base object and offset as results.
17130
static bool findBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
17131
12.3M
                           const GlobalValue *&GV, const void *&CV) {
17132
12.3M
  // Assume it is a primitive operation.
17133
12.3M
  Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
17134
12.3M
17135
12.3M
  // If it's an adding a simple constant then integrate the offset.
17136
12.3M
  if (
Base.getOpcode() == ISD::ADD12.3M
) {
17137
10.0M
    if (ConstantSDNode *
C10.0M
= dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
17138
8.50M
      Base = Base.getOperand(0);
17139
8.50M
      Offset += C->getSExtValue();
17140
8.50M
    }
17141
10.0M
  }
17142
12.3M
17143
12.3M
  // Return the underlying GlobalValue, and update the Offset.  Return false
17144
12.3M
  // for GlobalAddressSDNode since the same GlobalAddress may be represented
17145
12.3M
  // by multiple nodes with different offsets.
17146
12.3M
  if (GlobalAddressSDNode *
G12.3M
= dyn_cast<GlobalAddressSDNode>(Base)) {
17147
1.00M
    GV = G->getGlobal();
17148
1.00M
    Offset += G->getOffset();
17149
1.00M
    return false;
17150
1.00M
  }
17151
11.2M
17152
11.2M
  // Return the underlying Constant value, and update the Offset.  Return false
17153
11.2M
  // for ConstantSDNodes since the same constant pool entry may be represented
17154
11.2M
  // by multiple nodes with different offsets.
17155
11.2M
  
if (ConstantPoolSDNode *11.2M
C11.2M
= dyn_cast<ConstantPoolSDNode>(Base)) {
17156
0
    CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
17157
0
                                         : (const void *)C->getConstVal();
17158
0
    Offset += C->getOffset();
17159
0
    return false;
17160
0
  }
17161
11.2M
  // If it's any of the following then it can't alias with anything but itself.
17162
11.2M
  return isa<FrameIndexSDNode>(Base);
17163
11.2M
}
17164
17165
/// Return true if there is any possibility that the two addresses overlap.
17166
35.4M
bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
17167
35.4M
  // If they are the same then they must be aliases.
17168
35.4M
  if (
Op0->getBasePtr() == Op1->getBasePtr()35.4M
)
return true634k
;
17169
34.8M
17170
34.8M
  // If they are both volatile then they cannot be reordered.
17171
34.8M
  
if (34.8M
Op0->isVolatile() && 34.8M
Op1->isVolatile()37.5k
)
return true25.3k
;
17172
34.7M
17173
34.7M
  // If one operation reads from invariant memory, and the other may store, they
17174
34.7M
  // cannot alias. These should really be checking the equivalent of mayWrite,
17175
34.7M
  // but it only matters for memory nodes other than load /store.
17176
34.7M
  
if (34.7M
Op0->isInvariant() && 34.7M
Op1->writeMem()121
)
17177
121
    return false;
17178
34.7M
17179
34.7M
  
if (34.7M
Op1->isInvariant() && 34.7M
Op0->writeMem()28.9k
)
17180
24.4k
    return false;
17181
34.7M
17182
34.7M
  unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3;
17183
34.7M
  unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
17184
34.7M
17185
34.7M
  // Check for BaseIndexOffset matching.
17186
34.7M
  BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
17187
34.7M
  BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
17188
34.7M
  int64_t PtrDiff;
17189
34.7M
  if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
17190
28.5M
    
return !((NumBytes0 <= PtrDiff) || 28.5M
(PtrDiff + NumBytes1 <= 0)27.7M
);
17191
6.21M
17192
6.21M
  // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
17193
6.21M
  // able to calculate their relative offset if at least one arises
17194
6.21M
  // from an alloca. However, these allocas cannot overlap and we
17195
6.21M
  // can infer there is no alias.
17196
6.21M
  
if (auto *6.21M
A6.21M
= dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
17197
1.04M
    
if (auto *1.04M
B1.04M
= dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
17198
72.5k
      MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
17199
72.5k
      // If the base are the same frame index but the we couldn't find a
17200
72.5k
      // constant offset, (indices are different) be conservative.
17201
72.5k
      if (
A != B && 72.5k
(!MFI.isFixedObjectIndex(A->getIndex()) ||
17202
328
                     !MFI.isFixedObjectIndex(B->getIndex())))
17203
68.0k
        return false;
17204
6.15M
    }
17205
6.15M
17206
6.15M
  // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis
17207
6.15M
  // modified to use BaseIndexOffset.
17208
6.15M
17209
6.15M
  // Gather base node and offset information.
17210
6.15M
  SDValue Base0, Base1;
17211
6.15M
  int64_t Offset0, Offset1;
17212
6.15M
  const GlobalValue *GV0, *GV1;
17213
6.15M
  const void *CV0, *CV1;
17214
6.15M
  bool IsFrameIndex0 = findBaseOffset(Op0->getBasePtr(),
17215
6.15M
                                      Base0, Offset0, GV0, CV0);
17216
6.15M
  bool IsFrameIndex1 = findBaseOffset(Op1->getBasePtr(),
17217
6.15M
                                      Base1, Offset1, GV1, CV1);
17218
6.15M
17219
6.15M
  // If they have the same base address, then check to see if they overlap.
17220
6.15M
  if (
Base0 == Base1 || 6.15M
(GV0 && 6.15M
(GV0 == GV1)567k
) ||
(CV0 && 6.15M
(CV0 == CV1)0
))
17221
3
    return !((Offset0 + NumBytes0) <= Offset1 ||
17222
3
             (Offset1 + NumBytes1) <= Offset0);
17223
6.15M
17224
6.15M
  // It is possible for different frame indices to alias each other, mostly
17225
6.15M
  // when tail call optimization reuses return address slots for arguments.
17226
6.15M
  // To catch this case, look up the actual index of frame indices to compute
17227
6.15M
  // the real alias relationship.
17228
6.15M
  
if (6.15M
IsFrameIndex0 && 6.15M
IsFrameIndex1823k
) {
17229
0
    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
17230
0
    Offset0 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base0)->getIndex());
17231
0
    Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
17232
0
    return !((Offset0 + NumBytes0) <= Offset1 ||
17233
0
             (Offset1 + NumBytes1) <= Offset0);
17234
0
  }
17235
6.15M
17236
6.15M
  // Otherwise, if we know what the bases are, and they aren't identical, then
17237
6.15M
  // we know they cannot alias.
17238
6.15M
  
if (6.15M
(IsFrameIndex0 || 6.15M
CV05.32M
||
GV05.32M
) &&
(IsFrameIndex1 || 1.39M
CV11.31M
||
GV11.31M
))
17239
256k
    return false;
17240
5.89M
17241
5.89M
  // If we know required SrcValue1 and SrcValue2 have relatively large alignment
17242
5.89M
  // compared to the size and offset of the access, we may be able to prove they
17243
5.89M
  // do not alias. This check is conservative for now to catch cases created by
17244
5.89M
  // splitting vector types.
17245
5.89M
  int64_t SrcValOffset0 = Op0->getSrcValueOffset();
17246
5.89M
  int64_t SrcValOffset1 = Op1->getSrcValueOffset();
17247
5.89M
  unsigned OrigAlignment0 = Op0->getOriginalAlignment();
17248
5.89M
  unsigned OrigAlignment1 = Op1->getOriginalAlignment();
17249
5.89M
  if (
OrigAlignment0 == OrigAlignment1 && 5.89M
SrcValOffset0 != SrcValOffset13.08M
&&
17250
5.89M
      
NumBytes0 == NumBytes1358k
&&
OrigAlignment0 > NumBytes0310k
) {
17251
19.4k
    int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
17252
19.4k
    int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
17253
19.4k
17254
19.4k
    // There is no overlap between these relatively aligned accesses of similar
17255
19.4k
    // size. Return no alias.
17256
19.4k
    if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
17257
17.3k
        (OffAlign1 + NumBytes1) <= OffAlign0)
17258
4.87k
      return false;
17259
5.89M
  }
17260
5.89M
17261
5.89M
  bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
17262
0
                   ? CombinerGlobalAA
17263
5.89M
                   : DAG.getSubtarget().useAA();
17264
#ifndef NDEBUG
17265
  if (CombinerAAOnlyFunc.getNumOccurrences() &&
17266
      CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
17267
    UseAA = false;
17268
#endif
17269
17270
5.89M
  if (
UseAA && 5.89M
AA6.38k
&&
17271
5.89M
      
Op0->getMemOperand()->getValue()3.79k
&&
Op1->getMemOperand()->getValue()3.59k
) {
17272
3.55k
    // Use alias analysis information.
17273
3.55k
    int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
17274
3.55k
    int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
17275
3.55k
    int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
17276
3.55k
    AliasResult AAResult =
17277
3.55k
        AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
17278
3.55k
                                 UseTBAA ? 
Op0->getAAInfo()3.55k
:
AAMDNodes()0
),
17279
3.55k
                  MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
17280
3.55k
                                 UseTBAA ? 
Op1->getAAInfo()3.55k
:
AAMDNodes()0
) );
17281
3.55k
    if (AAResult == NoAlias)
17282
1.36k
      return false;
17283
5.88M
  }
17284
5.88M
17285
5.88M
  // Otherwise we have to assume they alias.
17286
5.88M
  return true;
17287
5.88M
}
17288
17289
/// Walk up chain skipping non-aliasing memory nodes,
17290
/// looking for aliasing nodes and adding them to the Aliases vector.
17291
void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
17292
17.9M
                                   SmallVectorImpl<SDValue> &Aliases) {
17293
17.9M
  SmallVector<SDValue, 8> Chains;     // List of chains to visit.
17294
17.9M
  SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
17295
17.9M
17296
17.9M
  // Get alias information for node.
17297
6.02M
  bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
17298
17.9M
17299
17.9M
  // Starting off.
17300
17.9M
  Chains.push_back(OriginalChain);
17301
17.9M
  unsigned Depth = 0;
17302
17.9M
17303
17.9M
  // Look at each chain and determine if it is an alias.  If so, add it to the
17304
17.9M
  // aliases list.  If not, then continue up the chain looking for the next
17305
17.9M
  // candidate.
17306
73.3M
  while (
!Chains.empty()73.3M
) {
17307
56.8M
    SDValue Chain = Chains.pop_back_val();
17308
56.8M
17309
56.8M
    // For TokenFactor nodes, look at each operand and only continue up the
17310
56.8M
    // chain until we reach the depth limit.
17311
56.8M
    //
17312
56.8M
    // FIXME: The depth check could be made to return the last non-aliasing
17313
56.8M
    // chain we found before we hit a tokenfactor rather than the original
17314
56.8M
    // chain.
17315
56.8M
    if (
Depth > TLI.getGatherAllAliasesMaxDepth()56.8M
) {
17316
1.45M
      Aliases.clear();
17317
1.45M
      Aliases.push_back(OriginalChain);
17318
1.45M
      return;
17319
1.45M
    }
17320
55.4M
17321
55.4M
    // Don't bother if we've been before.
17322
55.4M
    
if (55.4M
!Visited.insert(Chain.getNode()).second55.4M
)
17323
2.18M
      continue;
17324
53.2M
17325
53.2M
    switch (Chain.getOpcode()) {
17326
6.38M
    case ISD::EntryToken:
17327
6.38M
      // Entry token is ideal chain operand, but handled in FindBetterChain.
17328
6.38M
      break;
17329
53.2M
17330
35.7M
    case ISD::LOAD:
17331
35.7M
    case ISD::STORE: {
17332
35.7M
      // Get alias information for Chain.
17333
35.7M
      bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
17334
5.44M
          !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
17335
35.7M
17336
35.7M
      // If chain is alias then stop here.
17337
35.7M
      if (
!(IsLoad && 35.7M
IsOpLoad1.96M
) &&
17338
35.7M
          
isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))35.4M
) {
17339
6.62M
        Aliases.push_back(Chain);
17340
35.7M
      } else {
17341
29.1M
        // Look further up the chain.
17342
29.1M
        Chains.push_back(Chain.getOperand(0));
17343
29.1M
        ++Depth;
17344
29.1M
      }
17345
35.7M
      break;
17346
35.7M
    }
17347
35.7M
17348
6.98M
    case ISD::TokenFactor:
17349
6.98M
      // We have to check each of the operands of the token factor for "small"
17350
6.98M
      // token factors, so we queue them up.  Adding the operands to the queue
17351
6.98M
      // (stack) in reverse order maintains the original order and increases the
17352
6.98M
      // likelihood that getNode will find a matching token factor (CSE.)
17353
6.98M
      if (
Chain.getNumOperands() > 166.98M
) {
17354
2.55M
        Aliases.push_back(Chain);
17355
2.55M
        break;
17356
2.55M
      }
17357
18.3M
      
for (unsigned n = Chain.getNumOperands(); 4.42M
n18.3M
;)
17358
13.9M
        Chains.push_back(Chain.getOperand(--n));
17359
4.42M
      ++Depth;
17360
4.42M
      break;
17361
4.42M
17362
362k
    case ISD::CopyFromReg:
17363
362k
      // Forward past CopyFromReg.
17364
362k
      Chains.push_back(Chain.getOperand(0));
17365
362k
      ++Depth;
17366
362k
      break;
17367
4.42M
17368
3.71M
    default:
17369
3.71M
      // For all other instructions we will just have to take what we can get.
17370
3.71M
      Aliases.push_back(Chain);
17371
3.71M
      break;
17372
56.8M
    }
17373
56.8M
  }
17374
17.9M
}
17375
17376
/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
17377
/// (aliasing node.)
17378
17.9M
SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
17379
17.9M
  SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
17380
17.9M
17381
17.9M
  // Accumulate all the aliases to this node.
17382
17.9M
  GatherAllAliases(N, OldChain, Aliases);
17383
17.9M
17384
17.9M
  // If no operands then chain to entry token.
17385
17.9M
  if (Aliases.size() == 0)
17386
6.32M
    return DAG.getEntryNode();
17387
11.5M
17388
11.5M
  // If a single operand then chain to it.  We don't need to revisit it.
17389
11.5M
  
if (11.5M
Aliases.size() == 111.5M
)
17390
10.5M
    return Aliases[0];
17391
1.08M
17392
1.08M
  // Construct a custom tailored token factor.
17393
1.08M
  return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
17394
1.08M
}
17395
17396
// This function tries to collect a bunch of potentially interesting
17397
// nodes to improve the chains of, all at once. This might seem
17398
// redundant, as this function gets called when visiting every store
17399
// node, so why not let the work be done on each store as it's visited?
17400
//
17401
// I believe this is mainly important because MergeConsecutiveStores
17402
// is unable to deal with merging stores of different sizes, so unless
17403
// we improve the chains of all the potential candidates up-front
17404
// before running MergeConsecutiveStores, it might only see some of
17405
// the nodes that will eventually be candidates, and then not be able
17406
// to go from a partially-merged state to the desired final
17407
// fully-merged state.
17408
8.88M
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
17409
8.88M
  // This holds the base pointer, index, and the offset in bytes from the base
17410
8.88M
  // pointer.
17411
8.88M
  BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
17412
8.88M
17413
8.88M
  // We must have a base and an offset.
17414
8.88M
  if (!BasePtr.getBase().getNode())
17415
0
    return false;
17416
8.88M
17417
8.88M
  // Do not handle stores to undef base pointers.
17418
8.88M
  
if (8.88M
BasePtr.getBase().isUndef()8.88M
)
17419
10.9k
    return false;
17420
8.87M
17421
8.87M
  SmallVector<StoreSDNode *, 8> ChainedStores;
17422
8.87M
  ChainedStores.push_back(St);
17423
8.87M
17424
8.87M
  // Walk up the chain and look for nodes with offsets from the same
17425
8.87M
  // base pointer. Stop when reaching an instruction with a different kind
17426
8.87M
  // or instruction which has a different base pointer.
17427
8.87M
  StoreSDNode *Index = St;
17428
20.3M
  while (
Index20.3M
) {
17429
11.8M
    // If the chain has more than one use, then we can't reorder the mem ops.
17430
11.8M
    if (
Index != St && 11.8M
!SDValue(Index, 0)->hasOneUse()3.00M
)
17431
206k
      break;
17432
11.6M
17433
11.6M
    
if (11.6M
Index->isVolatile() || 11.6M
Index->isIndexed()11.6M
)
17434
37.1k
      break;
17435
11.6M
17436
11.6M
    // Find the base pointer and offset for this memory node.
17437
11.6M
    BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
17438
11.6M
17439
11.6M
    // Check that the base pointer is the same as the original one.
17440
11.6M
    if (!BasePtr.equalBaseIndex(Ptr, DAG))
17441
205k
      break;
17442
11.4M
17443
11.4M
    // Walk up the chain to find the next store node, ignoring any
17444
11.4M
    // intermediate loads. Any other kind of node will halt the loop.
17445
11.4M
    SDNode *NextInChain = Index->getChain().getNode();
17446
13.4M
    while (
true13.4M
) {
17447
13.4M
      if (StoreSDNode *
STn13.4M
= dyn_cast<StoreSDNode>(NextInChain)) {
17448
3.00M
        // We found a store node. Use it for the next iteration.
17449
3.00M
        if (
STn->isVolatile() || 3.00M
STn->isIndexed()3.00M
) {
17450
1.68k
          Index = nullptr;
17451
1.68k
          break;
17452
1.68k
        }
17453
3.00M
        ChainedStores.push_back(STn);
17454
3.00M
        Index = STn;
17455
3.00M
        break;
17456
10.4M
      } else 
if (LoadSDNode *10.4M
Ldn10.4M
= dyn_cast<LoadSDNode>(NextInChain)) {
17457
2.04M
        NextInChain = Ldn->getChain().getNode();
17458
2.04M
        continue;
17459
0
      } else {
17460
8.42M
        Index = nullptr;
17461
8.42M
        break;
17462
8.42M
      }
17463
13.4M
    } // end while
17464
11.8M
  }
17465
8.87M
17466
8.87M
  // At this point, ChainedStores lists all of the Store nodes
17467
8.87M
  // reachable by iterating up through chain nodes matching the above
17468
8.87M
  // conditions.  For each such store identified, try to find an
17469
8.87M
  // earlier chain to attach the store to which won't violate the
17470
8.87M
  // required ordering.
17471
8.87M
  bool MadeChangeToSt = false;
17472
8.87M
  SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
17473
8.87M
17474
11.8M
  for (StoreSDNode *ChainedStore : ChainedStores) {
17475
11.8M
    SDValue Chain = ChainedStore->getChain();
17476
11.8M
    SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
17477
11.8M
17478
11.8M
    if (
Chain != BetterChain11.8M
) {
17479
941k
      if (ChainedStore == St)
17480
445k
        MadeChangeToSt = true;
17481
941k
      BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
17482
941k
    }
17483
11.8M
  }
17484
8.87M
17485
8.87M
  // Do all replacements after finding the replacements to make to avoid making
17486
8.87M
  // the chains more complicated by introducing new TokenFactors.
17487
8.87M
  for (auto Replacement : BetterChains)
17488
941k
    replaceStoreChain(Replacement.first, Replacement.second);
17489
8.88M
17490
8.88M
  return MadeChangeToSt;
17491
8.88M
}
17492
17493
/// This is the entry point for the file.
17494
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
17495
7.42M
                           CodeGenOpt::Level OptLevel) {
17496
7.42M
  /// This is the main entry point to this class.
17497
7.42M
  DAGCombiner(*this, AA, OptLevel).Run(Level);
17498
7.42M
}