Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
10
// both before and after the DAG is legalized.
11
//
12
// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13
// primarily intended to handle simplification opportunities that are implicit
14
// in the LLVM IR and exposed by the various codegen lowering phases.
15
//
16
//===----------------------------------------------------------------------===//
17
18
#include "llvm/ADT/APFloat.h"
19
#include "llvm/ADT/APInt.h"
20
#include "llvm/ADT/ArrayRef.h"
21
#include "llvm/ADT/DenseMap.h"
22
#include "llvm/ADT/IntervalMap.h"
23
#include "llvm/ADT/None.h"
24
#include "llvm/ADT/Optional.h"
25
#include "llvm/ADT/STLExtras.h"
26
#include "llvm/ADT/SetVector.h"
27
#include "llvm/ADT/SmallBitVector.h"
28
#include "llvm/ADT/SmallPtrSet.h"
29
#include "llvm/ADT/SmallSet.h"
30
#include "llvm/ADT/SmallVector.h"
31
#include "llvm/ADT/Statistic.h"
32
#include "llvm/Analysis/AliasAnalysis.h"
33
#include "llvm/Analysis/MemoryLocation.h"
34
#include "llvm/CodeGen/DAGCombine.h"
35
#include "llvm/CodeGen/ISDOpcodes.h"
36
#include "llvm/CodeGen/MachineFrameInfo.h"
37
#include "llvm/CodeGen/MachineFunction.h"
38
#include "llvm/CodeGen/MachineMemOperand.h"
39
#include "llvm/CodeGen/RuntimeLibcalls.h"
40
#include "llvm/CodeGen/SelectionDAG.h"
41
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
42
#include "llvm/CodeGen/SelectionDAGNodes.h"
43
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
44
#include "llvm/CodeGen/TargetLowering.h"
45
#include "llvm/CodeGen/TargetRegisterInfo.h"
46
#include "llvm/CodeGen/TargetSubtargetInfo.h"
47
#include "llvm/CodeGen/ValueTypes.h"
48
#include "llvm/IR/Attributes.h"
49
#include "llvm/IR/Constant.h"
50
#include "llvm/IR/DataLayout.h"
51
#include "llvm/IR/DerivedTypes.h"
52
#include "llvm/IR/Function.h"
53
#include "llvm/IR/LLVMContext.h"
54
#include "llvm/IR/Metadata.h"
55
#include "llvm/Support/Casting.h"
56
#include "llvm/Support/CodeGen.h"
57
#include "llvm/Support/CommandLine.h"
58
#include "llvm/Support/Compiler.h"
59
#include "llvm/Support/Debug.h"
60
#include "llvm/Support/ErrorHandling.h"
61
#include "llvm/Support/KnownBits.h"
62
#include "llvm/Support/MachineValueType.h"
63
#include "llvm/Support/MathExtras.h"
64
#include "llvm/Support/raw_ostream.h"
65
#include "llvm/Target/TargetMachine.h"
66
#include "llvm/Target/TargetOptions.h"
67
#include <algorithm>
68
#include <cassert>
69
#include <cstdint>
70
#include <functional>
71
#include <iterator>
72
#include <string>
73
#include <tuple>
74
#include <utility>
75
76
using namespace llvm;
77
78
#define DEBUG_TYPE "dagcombine"
79
80
STATISTIC(NodesCombined   , "Number of dag nodes combined");
81
STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83
STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
84
STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
85
STATISTIC(SlicedLoads, "Number of load sliced");
86
STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
87
88
static cl::opt<bool>
89
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90
                 cl::desc("Enable DAG combiner's use of IR alias analysis"));
91
92
static cl::opt<bool>
93
UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94
        cl::desc("Enable DAG combiner's use of TBAA"));
95
96
#ifndef NDEBUG
97
static cl::opt<std::string>
98
CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99
                   cl::desc("Only use DAG-combiner alias analysis in this"
100
                            " function"));
101
#endif
102
103
/// Hidden option to stress test load slicing, i.e., when this option
104
/// is enabled, load slicing bypasses most of its profitability guards.
105
static cl::opt<bool>
106
StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107
                  cl::desc("Bypass the profitability model of load slicing"),
108
                  cl::init(false));
109
110
static cl::opt<bool>
111
  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112
                    cl::desc("DAG combiner may split indexing from loads"));
113
114
static cl::opt<unsigned> TokenFactorInlineLimit(
115
    "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
116
    cl::desc("Limit the number of operands to inline for Token Factors"));
117
118
namespace {
119
120
  class DAGCombiner {
121
    SelectionDAG &DAG;
122
    const TargetLowering &TLI;
123
    CombineLevel Level;
124
    CodeGenOpt::Level OptLevel;
125
    bool LegalOperations = false;
126
    bool LegalTypes = false;
127
    bool ForCodeSize;
128
129
    /// Worklist of all of the nodes that need to be simplified.
130
    ///
131
    /// This must behave as a stack -- new nodes to process are pushed onto the
132
    /// back and when processing we pop off of the back.
133
    ///
134
    /// The worklist will not contain duplicates but may contain null entries
135
    /// due to nodes being deleted from the underlying DAG.
136
    SmallVector<SDNode *, 64> Worklist;
137
138
    /// Mapping from an SDNode to its position on the worklist.
139
    ///
140
    /// This is used to find and remove nodes from the worklist (by nulling
141
    /// them) when they are deleted from the underlying DAG. It relies on
142
    /// stable indices of nodes within the worklist.
143
    DenseMap<SDNode *, unsigned> WorklistMap;
144
    /// This records all nodes attempted to add to the worklist since we
145
    /// considered a new worklist entry. As we keep do not add duplicate nodes
146
    /// in the worklist, this is different from the tail of the worklist.
147
    SmallSetVector<SDNode *, 32> PruningList;
148
149
    /// Set of nodes which have been combined (at least once).
150
    ///
151
    /// This is used to allow us to reliably add any operands of a DAG node
152
    /// which have not yet been combined to the worklist.
153
    SmallPtrSet<SDNode *, 32> CombinedNodes;
154
155
    // AA - Used for DAG load/store alias analysis.
156
    AliasAnalysis *AA;
157
158
    /// When an instruction is simplified, add all users of the instruction to
159
    /// the work lists because they might get more simplified now.
160
3.42M
    void AddUsersToWorklist(SDNode *N) {
161
3.42M
      for (SDNode *Node : N->uses())
162
6.44M
        AddToWorklist(Node);
163
3.42M
    }
164
165
    // Prune potentially dangling nodes. This is called after
166
    // any visit to a node, but should also be called during a visit after any
167
    // failed combine which may have created a DAG node.
168
72.3M
    void clearAddedDanglingWorklistEntries() {
169
72.3M
      // Check any nodes added to the worklist to see if they are prunable.
170
244M
      while (!PruningList.empty()) {
171
172M
        auto *N = PruningList.pop_back_val();
172
172M
        if (N->use_empty())
173
2.37M
          recursivelyDeleteUnusedNodes(N);
174
172M
      }
175
72.3M
    }
176
177
72.3M
    SDNode *getNextWorklistEntry() {
178
72.3M
      // Before we do any work, remove nodes that are not in use.
179
72.3M
      clearAddedDanglingWorklistEntries();
180
72.3M
      SDNode *N = nullptr;
181
72.3M
      // The Worklist holds the SDNodes in order, but it may contain null
182
72.3M
      // entries.
183
147M
      while (!N && 
!Worklist.empty()78.3M
) {
184
75.5M
        N = Worklist.pop_back_val();
185
75.5M
      }
186
72.3M
187
72.3M
      if (N) {
188
69.5M
        bool GoodWorklistEntry = WorklistMap.erase(N);
189
69.5M
        (void)GoodWorklistEntry;
190
69.5M
        assert(GoodWorklistEntry &&
191
69.5M
               "Found a worklist entry without a corresponding map entry!");
192
69.5M
      }
193
72.3M
      return N;
194
72.3M
    }
195
196
    /// Call the node-specific routine that folds each particular type of node.
197
    SDValue visit(SDNode *N);
198
199
  public:
200
    DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
201
        : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
202
2.88M
          OptLevel(OL), AA(AA) {
203
2.88M
      ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
204
2.88M
205
2.88M
      MaximumLegalStoreInBits = 0;
206
2.88M
      for (MVT VT : MVT::all_valuetypes())
207
371M
        
if (371M
EVT(VT).isSimple()371M
&& VT != MVT::Other &&
208
371M
            
TLI.isTypeLegal(EVT(VT))368M
&&
209
371M
            
VT.getSizeInBits() >= MaximumLegalStoreInBits51.7M
)
210
32.3M
          MaximumLegalStoreInBits = VT.getSizeInBits();
211
2.88M
    }
212
213
194M
    void ConsiderForPruning(SDNode *N) {
214
194M
      // Mark this for potential pruning.
215
194M
      PruningList.insert(N);
216
194M
    }
217
218
    /// Add to the worklist making sure its instance is at the back (next to be
219
    /// processed.)
220
189M
    void AddToWorklist(SDNode *N) {
221
189M
      assert(N->getOpcode() != ISD::DELETED_NODE &&
222
189M
             "Deleted Node added to Worklist");
223
189M
224
189M
      // Skip handle nodes as they can't usefully be combined and confuse the
225
189M
      // zero-use deletion strategy.
226
189M
      if (N->getOpcode() == ISD::HANDLENODE)
227
22.6k
        return;
228
189M
229
189M
      ConsiderForPruning(N);
230
189M
231
189M
      if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
232
75.5M
        Worklist.push_back(N);
233
189M
    }
234
235
    /// Remove all instances of N from the worklist.
236
9.38M
    void removeFromWorklist(SDNode *N) {
237
9.38M
      CombinedNodes.erase(N);
238
9.38M
      PruningList.remove(N);
239
9.38M
240
9.38M
      auto It = WorklistMap.find(N);
241
9.38M
      if (It == WorklistMap.end())
242
3.39M
        return; // Not in the worklist.
243
5.98M
244
5.98M
      // Null out the entry rather than erasing it to avoid a linear operation.
245
5.98M
      Worklist[It->second] = nullptr;
246
5.98M
      WorklistMap.erase(It);
247
5.98M
    }
248
249
    void deleteAndRecombine(SDNode *N);
250
    bool recursivelyDeleteUnusedNodes(SDNode *N);
251
252
    /// Replaces all uses of the results of one DAG node with new values.
253
    SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
254
                      bool AddTo = true);
255
256
    /// Replaces all uses of the results of one DAG node with new values.
257
374k
    SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
258
374k
      return CombineTo(N, &Res, 1, AddTo);
259
374k
    }
260
261
    /// Replaces all uses of the results of one DAG node with new values.
262
    SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
263
202k
                      bool AddTo = true) {
264
202k
      SDValue To[] = { Res0, Res1 };
265
202k
      return CombineTo(N, To, 2, AddTo);
266
202k
    }
267
268
    void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
269
270
  private:
271
    unsigned MaximumLegalStoreInBits;
272
273
    /// Check the specified integer node value to see if it can be simplified or
274
    /// if things it uses can be simplified by bit propagation.
275
    /// If so, return true.
276
5.67M
    bool SimplifyDemandedBits(SDValue Op) {
277
5.67M
      unsigned BitWidth = Op.getScalarValueSizeInBits();
278
5.67M
      APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
279
5.67M
      return SimplifyDemandedBits(Op, DemandedBits);
280
5.67M
    }
281
282
5.97M
    bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
283
5.97M
      EVT VT = Op.getValueType();
284
5.97M
      unsigned NumElts = VT.isVector() ? 
VT.getVectorNumElements()341k
:
15.63M
;
285
5.97M
      APInt DemandedElts = APInt::getAllOnesValue(NumElts);
286
5.97M
      return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
287
5.97M
    }
288
289
    /// Check the specified vector node value to see if it can be simplified or
290
    /// if things it uses can be simplified as it only uses some of the
291
    /// elements. If so, return true.
292
329k
    bool SimplifyDemandedVectorElts(SDValue Op) {
293
329k
      unsigned NumElts = Op.getValueType().getVectorNumElements();
294
329k
      APInt DemandedElts = APInt::getAllOnesValue(NumElts);
295
329k
      return SimplifyDemandedVectorElts(Op, DemandedElts);
296
329k
    }
297
298
    bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
299
                              const APInt &DemandedElts);
300
    bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
301
                                    bool AssumeSingleUse = false);
302
303
    bool CombineToPreIndexedLoadStore(SDNode *N);
304
    bool CombineToPostIndexedLoadStore(SDNode *N);
305
    SDValue SplitIndexingFromLoad(LoadSDNode *LD);
306
    bool SliceUpLoad(SDNode *N);
307
308
    // Scalars have size 0 to distinguish from singleton vectors.
309
    SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
310
    bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
311
    bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
312
313
    /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
314
    ///   load.
315
    ///
316
    /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
317
    /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
318
    /// \param EltNo index of the vector element to load.
319
    /// \param OriginalLoad load that EVE came from to be replaced.
320
    /// \returns EVE on success SDValue() on failure.
321
    SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
322
                                         SDValue EltNo,
323
                                         LoadSDNode *OriginalLoad);
324
    void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
325
    SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
326
    SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
327
    SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
328
    SDValue PromoteIntBinOp(SDValue Op);
329
    SDValue PromoteIntShiftOp(SDValue Op);
330
    SDValue PromoteExtend(SDValue Op);
331
    bool PromoteLoad(SDValue Op);
332
333
    /// Call the node-specific routine that knows how to fold each
334
    /// particular type of node. If that doesn't do anything, try the
335
    /// target-specific DAG combines.
336
    SDValue combine(SDNode *N);
337
338
    // Visitation implementation - Implement dag node combining for different
339
    // node types.  The semantics are as follows:
340
    // Return Value:
341
    //   SDValue.getNode() == 0 - No change was made
342
    //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
343
    //   otherwise              - N should be replaced by the returned Operand.
344
    //
345
    SDValue visitTokenFactor(SDNode *N);
346
    SDValue visitMERGE_VALUES(SDNode *N);
347
    SDValue visitADD(SDNode *N);
348
    SDValue visitADDLike(SDNode *N);
349
    SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
350
    SDValue visitSUB(SDNode *N);
351
    SDValue visitADDSAT(SDNode *N);
352
    SDValue visitSUBSAT(SDNode *N);
353
    SDValue visitADDC(SDNode *N);
354
    SDValue visitADDO(SDNode *N);
355
    SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
356
    SDValue visitSUBC(SDNode *N);
357
    SDValue visitSUBO(SDNode *N);
358
    SDValue visitADDE(SDNode *N);
359
    SDValue visitADDCARRY(SDNode *N);
360
    SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
361
    SDValue visitSUBE(SDNode *N);
362
    SDValue visitSUBCARRY(SDNode *N);
363
    SDValue visitMUL(SDNode *N);
364
    SDValue useDivRem(SDNode *N);
365
    SDValue visitSDIV(SDNode *N);
366
    SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
367
    SDValue visitUDIV(SDNode *N);
368
    SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
369
    SDValue visitREM(SDNode *N);
370
    SDValue visitMULHU(SDNode *N);
371
    SDValue visitMULHS(SDNode *N);
372
    SDValue visitSMUL_LOHI(SDNode *N);
373
    SDValue visitUMUL_LOHI(SDNode *N);
374
    SDValue visitMULO(SDNode *N);
375
    SDValue visitIMINMAX(SDNode *N);
376
    SDValue visitAND(SDNode *N);
377
    SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
378
    SDValue visitOR(SDNode *N);
379
    SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
380
    SDValue visitXOR(SDNode *N);
381
    SDValue SimplifyVBinOp(SDNode *N);
382
    SDValue visitSHL(SDNode *N);
383
    SDValue visitSRA(SDNode *N);
384
    SDValue visitSRL(SDNode *N);
385
    SDValue visitFunnelShift(SDNode *N);
386
    SDValue visitRotate(SDNode *N);
387
    SDValue visitABS(SDNode *N);
388
    SDValue visitBSWAP(SDNode *N);
389
    SDValue visitBITREVERSE(SDNode *N);
390
    SDValue visitCTLZ(SDNode *N);
391
    SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
392
    SDValue visitCTTZ(SDNode *N);
393
    SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
394
    SDValue visitCTPOP(SDNode *N);
395
    SDValue visitSELECT(SDNode *N);
396
    SDValue visitVSELECT(SDNode *N);
397
    SDValue visitSELECT_CC(SDNode *N);
398
    SDValue visitSETCC(SDNode *N);
399
    SDValue visitSETCCCARRY(SDNode *N);
400
    SDValue visitSIGN_EXTEND(SDNode *N);
401
    SDValue visitZERO_EXTEND(SDNode *N);
402
    SDValue visitANY_EXTEND(SDNode *N);
403
    SDValue visitAssertExt(SDNode *N);
404
    SDValue visitSIGN_EXTEND_INREG(SDNode *N);
405
    SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
406
    SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
407
    SDValue visitTRUNCATE(SDNode *N);
408
    SDValue visitBITCAST(SDNode *N);
409
    SDValue visitBUILD_PAIR(SDNode *N);
410
    SDValue visitFADD(SDNode *N);
411
    SDValue visitFSUB(SDNode *N);
412
    SDValue visitFMUL(SDNode *N);
413
    SDValue visitFMA(SDNode *N);
414
    SDValue visitFDIV(SDNode *N);
415
    SDValue visitFREM(SDNode *N);
416
    SDValue visitFSQRT(SDNode *N);
417
    SDValue visitFCOPYSIGN(SDNode *N);
418
    SDValue visitFPOW(SDNode *N);
419
    SDValue visitSINT_TO_FP(SDNode *N);
420
    SDValue visitUINT_TO_FP(SDNode *N);
421
    SDValue visitFP_TO_SINT(SDNode *N);
422
    SDValue visitFP_TO_UINT(SDNode *N);
423
    SDValue visitFP_ROUND(SDNode *N);
424
    SDValue visitFP_ROUND_INREG(SDNode *N);
425
    SDValue visitFP_EXTEND(SDNode *N);
426
    SDValue visitFNEG(SDNode *N);
427
    SDValue visitFABS(SDNode *N);
428
    SDValue visitFCEIL(SDNode *N);
429
    SDValue visitFTRUNC(SDNode *N);
430
    SDValue visitFFLOOR(SDNode *N);
431
    SDValue visitFMINNUM(SDNode *N);
432
    SDValue visitFMAXNUM(SDNode *N);
433
    SDValue visitFMINIMUM(SDNode *N);
434
    SDValue visitFMAXIMUM(SDNode *N);
435
    SDValue visitBRCOND(SDNode *N);
436
    SDValue visitBR_CC(SDNode *N);
437
    SDValue visitLOAD(SDNode *N);
438
439
    SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
440
    SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
441
442
    SDValue visitSTORE(SDNode *N);
443
    SDValue visitLIFETIME_END(SDNode *N);
444
    SDValue visitINSERT_VECTOR_ELT(SDNode *N);
445
    SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
446
    SDValue visitBUILD_VECTOR(SDNode *N);
447
    SDValue visitCONCAT_VECTORS(SDNode *N);
448
    SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
449
    SDValue visitVECTOR_SHUFFLE(SDNode *N);
450
    SDValue visitSCALAR_TO_VECTOR(SDNode *N);
451
    SDValue visitINSERT_SUBVECTOR(SDNode *N);
452
    SDValue visitMLOAD(SDNode *N);
453
    SDValue visitMSTORE(SDNode *N);
454
    SDValue visitMGATHER(SDNode *N);
455
    SDValue visitMSCATTER(SDNode *N);
456
    SDValue visitFP_TO_FP16(SDNode *N);
457
    SDValue visitFP16_TO_FP(SDNode *N);
458
    SDValue visitVECREDUCE(SDNode *N);
459
460
    SDValue visitFADDForFMACombine(SDNode *N);
461
    SDValue visitFSUBForFMACombine(SDNode *N);
462
    SDValue visitFMULForFMADistributiveCombine(SDNode *N);
463
464
    SDValue XformToShuffleWithZero(SDNode *N);
465
    bool reassociationCanBreakAddressingModePattern(unsigned Opc,
466
                                                    const SDLoc &DL, SDValue N0,
467
                                                    SDValue N1);
468
    SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
469
                                      SDValue N1);
470
    SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
471
                           SDValue N1, SDNodeFlags Flags);
472
473
    SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
474
475
    SDValue foldSelectOfConstants(SDNode *N);
476
    SDValue foldVSelectOfConstants(SDNode *N);
477
    SDValue foldBinOpIntoSelect(SDNode *BO);
478
    bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
479
    SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
480
    SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
481
    SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
482
                             SDValue N2, SDValue N3, ISD::CondCode CC,
483
                             bool NotExtCompare = false);
484
    SDValue convertSelectOfFPConstantsToLoadOffset(
485
        const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
486
        ISD::CondCode CC);
487
    SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
488
                                   SDValue N2, SDValue N3, ISD::CondCode CC);
489
    SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
490
                              const SDLoc &DL);
491
    SDValue unfoldMaskedMerge(SDNode *N);
492
    SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
493
    SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
494
                          const SDLoc &DL, bool foldBooleans);
495
    SDValue rebuildSetCC(SDValue N);
496
497
    bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
498
                           SDValue &CC) const;
499
    bool isOneUseSetCC(SDValue N) const;
500
501
    SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
502
                                         unsigned HiOp);
503
    SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
504
    SDValue CombineExtLoad(SDNode *N);
505
    SDValue CombineZExtLogicopShiftLoad(SDNode *N);
506
    SDValue combineRepeatedFPDivisors(SDNode *N);
507
    SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
508
    SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
509
    SDValue BuildSDIV(SDNode *N);
510
    SDValue BuildSDIVPow2(SDNode *N);
511
    SDValue BuildUDIV(SDNode *N);
512
    SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
513
    SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
514
    SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
515
    SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
516
    SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
517
    SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
518
                                SDNodeFlags Flags, bool Reciprocal);
519
    SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
520
                                SDNodeFlags Flags, bool Reciprocal);
521
    SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
522
                               bool DemandHighBits = true);
523
    SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
524
    SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
525
                              SDValue InnerPos, SDValue InnerNeg,
526
                              unsigned PosOpcode, unsigned NegOpcode,
527
                              const SDLoc &DL);
528
    SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
529
    SDValue MatchLoadCombine(SDNode *N);
530
    SDValue MatchStoreCombine(StoreSDNode *N);
531
    SDValue ReduceLoadWidth(SDNode *N);
532
    SDValue ReduceLoadOpStoreWidth(SDNode *N);
533
    SDValue splitMergedValStore(StoreSDNode *ST);
534
    SDValue TransformFPLoadStorePair(SDNode *N);
535
    SDValue convertBuildVecZextToZext(SDNode *N);
536
    SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
537
    SDValue reduceBuildVecToShuffle(SDNode *N);
538
    SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
539
                                  ArrayRef<int> VectorMask, SDValue VecIn1,
540
                                  SDValue VecIn2, unsigned LeftIdx,
541
                                  bool DidSplitVec);
542
    SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
543
544
    /// Walk up chain skipping non-aliasing memory nodes,
545
    /// looking for aliasing nodes and adding them to the Aliases vector.
546
    void GatherAllAliases(SDNode *N, SDValue OriginalChain,
547
                          SmallVectorImpl<SDValue> &Aliases);
548
549
    /// Return true if there is any possibility that the two addresses overlap.
550
    bool isAlias(SDNode *Op0, SDNode *Op1) const;
551
552
    /// Walk up chain skipping non-aliasing memory nodes, looking for a better
553
    /// chain (aliasing node.)
554
    SDValue FindBetterChain(SDNode *N, SDValue Chain);
555
556
    /// Try to replace a store and any possibly adjacent stores on
557
    /// consecutive chains with better chains. Return true only if St is
558
    /// replaced.
559
    ///
560
    /// Notice that other chains may still be replaced even if the function
561
    /// returns false.
562
    bool findBetterNeighborChains(StoreSDNode *St);
563
564
    // Helper for findBetterNeighborChains. Walk up store chain add additional
565
    // chained stores that do not overlap and can be parallelized.
566
    bool parallelizeChainedStores(StoreSDNode *St);
567
568
    /// Holds a pointer to an LSBaseSDNode as well as information on where it
569
    /// is located in a sequence of memory operations connected by a chain.
570
    struct MemOpLink {
571
      // Ptr to the mem node.
572
      LSBaseSDNode *MemNode;
573
574
      // Offset from the base ptr.
575
      int64_t OffsetFromBase;
576
577
      MemOpLink(LSBaseSDNode *N, int64_t Offset)
578
1.85M
          : MemNode(N), OffsetFromBase(Offset) {}
579
    };
580
581
    /// This is a helper function for visitMUL to check the profitability
582
    /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
583
    /// MulNode is the original multiply, AddNode is (add x, c1),
584
    /// and ConstNode is c2.
585
    bool isMulAddWithConstProfitable(SDNode *MulNode,
586
                                     SDValue &AddNode,
587
                                     SDValue &ConstNode);
588
589
    /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
590
    /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
591
    /// the type of the loaded value to be extended.
592
    bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
593
                          EVT LoadResultTy, EVT &ExtVT);
594
595
    /// Helper function to calculate whether the given Load/Store can have its
596
    /// width reduced to ExtVT.
597
    bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
598
                           EVT &MemVT, unsigned ShAmt = 0);
599
600
    /// Used by BackwardsPropagateMask to find suitable loads.
601
    bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
602
                           SmallPtrSetImpl<SDNode*> &NodesWithConsts,
603
                           ConstantSDNode *Mask, SDNode *&NodeToMask);
604
    /// Attempt to propagate a given AND node back to load leaves so that they
605
    /// can be combined into narrow loads.
606
    bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
607
608
    /// Helper function for MergeConsecutiveStores which merges the
609
    /// component store chains.
610
    SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
611
                                unsigned NumStores);
612
613
    /// This is a helper function for MergeConsecutiveStores. When the
614
    /// source elements of the consecutive stores are all constants or
615
    /// all extracted vector elements, try to merge them into one
616
    /// larger store introducing bitcasts if necessary.  \return True
617
    /// if a merged store was created.
618
    bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
619
                                         EVT MemVT, unsigned NumStores,
620
                                         bool IsConstantSrc, bool UseVector,
621
                                         bool UseTrunc);
622
623
    /// This is a helper function for MergeConsecutiveStores. Stores
624
    /// that potentially may be merged with St are placed in
625
    /// StoreNodes. RootNode is a chain predecessor to all store
626
    /// candidates.
627
    void getStoreMergeCandidates(StoreSDNode *St,
628
                                 SmallVectorImpl<MemOpLink> &StoreNodes,
629
                                 SDNode *&Root);
630
631
    /// Helper function for MergeConsecutiveStores. Checks if
632
    /// candidate stores have indirect dependency through their
633
    /// operands. RootNode is the predecessor to all stores calculated
634
    /// by getStoreMergeCandidates and is used to prune the dependency check.
635
    /// \return True if safe to merge.
636
    bool checkMergeStoreCandidatesForDependencies(
637
        SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
638
        SDNode *RootNode);
639
640
    /// Merge consecutive store operations into a wide store.
641
    /// This optimization uses wide integers or vectors when possible.
642
    /// \return number of stores that were merged into a merged store (the
643
    /// affected nodes are stored as a prefix in \p StoreNodes).
644
    bool MergeConsecutiveStores(StoreSDNode *St);
645
646
    /// Try to transform a truncation where C is a constant:
647
    ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
648
    ///
649
    /// \p N needs to be a truncation and its first operand an AND. Other
650
    /// requirements are checked by the function (e.g. that trunc is
651
    /// single-use) and if missed an empty SDValue is returned.
652
    SDValue distributeTruncateThroughAnd(SDNode *N);
653
654
    /// Helper function to determine whether the target supports operation
655
    /// given by \p Opcode for type \p VT, that is, whether the operation
656
    /// is legal or custom before legalizing operations, and whether is
657
    /// legal (but not custom) after legalization.
658
613k
    bool hasOperation(unsigned Opcode, EVT VT) {
659
613k
      if (LegalOperations)
660
366k
        return TLI.isOperationLegal(Opcode, VT);
661
246k
      return TLI.isOperationLegalOrCustom(Opcode, VT);
662
246k
    }
663
664
  public:
665
    /// Runs the dag combiner on all nodes in the work list
666
    void Run(CombineLevel AtLevel);
667
668
73.4M
    SelectionDAG &getDAG() const { return DAG; }
669
670
    /// Returns a type large enough to hold any valid shift amount - before type
671
    /// legalization these can be huge.
672
12.8k
    EVT getShiftAmountTy(EVT LHSTy) {
673
12.8k
      assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
674
12.8k
      return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
675
12.8k
    }
676
677
    /// This method returns true if we are running before type legalization or
678
    /// if the specified VT is legal.
679
560k
    bool isTypeLegal(const EVT &VT) {
680
560k
      if (!LegalTypes) 
return true149k
;
681
411k
      return TLI.isTypeLegal(VT);
682
411k
    }
683
684
    /// Convenience wrapper around TargetLowering::getSetCCResultType
685
739k
    EVT getSetCCResultType(EVT VT) const {
686
739k
      return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
687
739k
    }
688
689
    void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
690
                         SDValue OrigLoad, SDValue ExtLoad,
691
                         ISD::NodeType ExtType);
692
  };
693
694
/// This class is a DAGUpdateListener that removes any deleted
695
/// nodes from the worklist.
696
class WorklistRemover : public SelectionDAG::DAGUpdateListener {
697
  DAGCombiner &DC;
698
699
public:
700
  explicit WorklistRemover(DAGCombiner &dc)
701
70.5M
    : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
702
703
23.4k
  void NodeDeleted(SDNode *N, SDNode *E) override {
704
23.4k
    DC.removeFromWorklist(N);
705
23.4k
  }
706
};
707
708
class WorklistInserter : public SelectionDAG::DAGUpdateListener {
709
  DAGCombiner &DC;
710
711
public:
712
  explicit WorklistInserter(DAGCombiner &dc)
713
2.88M
      : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
714
715
  // FIXME: Ideally we could add N to the worklist, but this causes exponential
716
  //        compile time costs in large DAGs, e.g. Halide.
717
4.90M
  void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
718
};
719
720
} // end anonymous namespace
721
722
//===----------------------------------------------------------------------===//
723
//  TargetLowering::DAGCombinerInfo implementation
724
//===----------------------------------------------------------------------===//
725
726
15.8k
void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
727
15.8k
  ((DAGCombiner*)DC)->AddToWorklist(N);
728
15.8k
}
729
730
SDValue TargetLowering::DAGCombinerInfo::
731
1.74k
CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
732
1.74k
  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
733
1.74k
}
734
735
SDValue TargetLowering::DAGCombinerInfo::
736
3.47k
CombineTo(SDNode *N, SDValue Res, bool AddTo) {
737
3.47k
  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
738
3.47k
}
739
740
SDValue TargetLowering::DAGCombinerInfo::
741
8.98k
CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
742
8.98k
  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
743
8.98k
}
744
745
void TargetLowering::DAGCombinerInfo::
746
4.01k
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
747
4.01k
  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
748
4.01k
}
749
750
//===----------------------------------------------------------------------===//
751
// Helper Functions
752
//===----------------------------------------------------------------------===//
753
754
1.08M
void DAGCombiner::deleteAndRecombine(SDNode *N) {
755
1.08M
  removeFromWorklist(N);
756
1.08M
757
1.08M
  // If the operands of this node are only used by the node, they will now be
758
1.08M
  // dead. Make sure to re-visit them and recursively delete dead nodes.
759
1.08M
  for (const SDValue &Op : N->ops())
760
2.76M
    // For an operand generating multiple values, one of the values may
761
2.76M
    // become dead allowing further simplification (e.g. split index
762
2.76M
    // arithmetic from an indexed load).
763
2.76M
    if (Op->hasOneUse() || 
Op->getNumValues() > 12.28M
)
764
982k
      AddToWorklist(Op.getNode());
765
1.08M
766
1.08M
  DAG.DeleteNode(N);
767
1.08M
}
768
769
/// Return 1 if we can compute the negated form of the specified expression for
770
/// the same cost as the expression itself, or 2 if we can compute the negated
771
/// form more cheaply than the expression itself.
772
static char isNegatibleForFree(SDValue Op, bool LegalOperations,
773
                               const TargetLowering &TLI,
774
                               const TargetOptions *Options,
775
                               bool ForCodeSize,
776
448k
                               unsigned Depth = 0) {
777
448k
  // fneg is removable even if it has multiple uses.
778
448k
  if (Op.getOpcode() == ISD::FNEG)
779
1.01k
    return 2;
780
447k
781
447k
  // Don't allow anything with multiple uses unless we know it is free.
782
447k
  EVT VT = Op.getValueType();
783
447k
  const SDNodeFlags Flags = Op->getFlags();
784
447k
  if (!Op.hasOneUse() &&
785
447k
      
!(170k
Op.getOpcode() == ISD::FP_EXTEND170k
&&
786
170k
        
TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())591
))
787
170k
    return 0;
788
276k
789
276k
  // Don't recurse exponentially.
790
276k
  if (Depth > 6)
791
2.99k
    return 0;
792
273k
793
273k
  switch (Op.getOpcode()) {
794
273k
  
default: return false156k
;
795
273k
  case ISD::ConstantFP: {
796
13.6k
    if (!LegalOperations)
797
10.5k
      return 1;
798
3.03k
799
3.03k
    // Don't invert constant FP values after legalization unless the target says
800
3.03k
    // the negated constant is legal.
801
3.03k
    return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
802
3.03k
           TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
803
2.12k
                            ForCodeSize);
804
3.03k
  }
805
3.03k
  case ISD::BUILD_VECTOR: {
806
2.93k
    // Only permit BUILD_VECTOR of constants.
807
10.5k
    if (
llvm::any_of(Op->op_values(), [&](SDValue N) 2.93k
{
808
10.5k
          return !N.isUndef() && 
!isa<ConstantFPSDNode>(N)10.3k
;
809
10.5k
        }))
810
1.07k
      return 0;
811
1.86k
    if (!LegalOperations)
812
1.57k
      return 1;
813
287
    if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&
814
287
        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
815
0
      return 1;
816
287
    return llvm::all_of(Op->op_values(), [&](SDValue N) {
817
287
      return N.isUndef() ||
818
287
             TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
819
287
                              ForCodeSize);
820
287
    });
821
287
  }
822
28.9k
  case ISD::FADD:
823
28.9k
    if (!Options->UnsafeFPMath && 
!Flags.hasNoSignedZeros()27.5k
)
824
27.2k
      return 0;
825
1.75k
826
1.75k
    // After operation legalization, it might not be legal to create new FSUBs.
827
1.75k
    if (LegalOperations && 
!TLI.isOperationLegalOrCustom(ISD::FSUB, VT)670
)
828
0
      return 0;
829
1.75k
830
1.75k
    // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
831
1.75k
    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
832
22
                                    Options, ForCodeSize, Depth + 1))
833
22
      return V;
834
1.73k
    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
835
1.73k
    return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
836
1.73k
                              ForCodeSize, Depth + 1);
837
4.78k
  case ISD::FSUB:
838
4.78k
    // We can't turn -(A-B) into B-A when we honor signed zeros.
839
4.78k
    if (!Options->NoSignedZerosFPMath && 
!Flags.hasNoSignedZeros()4.74k
)
840
3.94k
      return 0;
841
841
842
841
    // fold (fneg (fsub A, B)) -> (fsub B, A)
843
841
    return 1;
844
841
845
63.3k
  case ISD::FMUL:
846
63.3k
  case ISD::FDIV:
847
63.3k
    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
848
63.3k
    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
849
770
                                    Options, ForCodeSize, Depth + 1))
850
770
      return V;
851
62.5k
852
62.5k
    return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
853
62.5k
                              ForCodeSize, Depth + 1);
854
62.5k
855
62.5k
  case ISD::FP_EXTEND:
856
4.16k
  case ISD::FP_ROUND:
857
4.16k
  case ISD::FSIN:
858
4.16k
    return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
859
4.16k
                              ForCodeSize, Depth + 1);
860
273k
  }
861
273k
}
862
863
/// If isNegatibleForFree returns true, return the newly negated expression.
864
static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
865
                                    bool LegalOperations, bool ForCodeSize,
866
1.43k
                                    unsigned Depth = 0) {
867
1.43k
  // fneg is removable even if it has multiple uses.
868
1.43k
  if (Op.getOpcode() == ISD::FNEG)
869
550
    return Op.getOperand(0);
870
886
871
886
  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
872
886
  const TargetOptions &Options = DAG.getTarget().Options;
873
886
  const SDNodeFlags Flags = Op->getFlags();
874
886
875
886
  switch (Op.getOpcode()) {
876
886
  
default: 0
llvm_unreachable0
("Unknown code");
877
886
  case ISD::ConstantFP: {
878
289
    APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
879
289
    V.changeSign();
880
289
    return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
881
886
  }
882
886
  case ISD::BUILD_VECTOR: {
883
90
    SmallVector<SDValue, 4> Ops;
884
454
    for (SDValue C : Op->op_values()) {
885
454
      if (C.isUndef()) {
886
8
        Ops.push_back(C);
887
8
        continue;
888
8
      }
889
446
      APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
890
446
      V.changeSign();
891
446
      Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
892
446
    }
893
90
    return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
894
886
  }
895
886
  case ISD::FADD:
896
20
    assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
897
20
898
20
    // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
899
20
    if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
900
20
                           DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
901
20
                           Depth + 1))
902
19
      return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
903
19
                         GetNegatedExpression(Op.getOperand(0), DAG,
904
19
                                              LegalOperations, ForCodeSize,
905
19
                                              Depth + 1),
906
19
                         Op.getOperand(1), Flags);
907
1
    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
908
1
    return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
909
1
                       GetNegatedExpression(Op.getOperand(1), DAG,
910
1
                                            LegalOperations, ForCodeSize,
911
1
                                            Depth + 1),
912
1
                       Op.getOperand(0), Flags);
913
19
  case ISD::FSUB:
914
19
    // fold (fneg (fsub 0, B)) -> B
915
19
    if (ConstantFPSDNode *N0CFP =
916
7
            isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
917
7
      if (N0CFP->isZero())
918
6
        return Op.getOperand(1);
919
13
920
13
    // fold (fneg (fsub A, B)) -> (fsub B, A)
921
13
    return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
922
13
                       Op.getOperand(1), Op.getOperand(0), Flags);
923
13
924
423
  case ISD::FMUL:
925
423
  case ISD::FDIV:
926
423
    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
927
423
    if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
928
423
                           DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
929
423
                           Depth + 1))
930
121
      return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
931
121
                         GetNegatedExpression(Op.getOperand(0), DAG,
932
121
                                              LegalOperations, ForCodeSize,
933
121
                                              Depth + 1),
934
121
                         Op.getOperand(1), Flags);
935
302
936
302
    // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
937
302
    return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
938
302
                       Op.getOperand(0),
939
302
                       GetNegatedExpression(Op.getOperand(1), DAG,
940
302
                                            LegalOperations, ForCodeSize,
941
302
                                            Depth + 1), Flags);
942
302
943
302
  case ISD::FP_EXTEND:
944
21
  case ISD::FSIN:
945
21
    return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
946
21
                       GetNegatedExpression(Op.getOperand(0), DAG,
947
21
                                            LegalOperations, ForCodeSize,
948
21
                                            Depth + 1));
949
24
  case ISD::FP_ROUND:
950
24
    return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
951
24
                       GetNegatedExpression(Op.getOperand(0), DAG,
952
24
                                            LegalOperations, ForCodeSize,
953
24
                                            Depth + 1),
954
24
                       Op.getOperand(1));
955
886
  }
956
886
}
957
958
// APInts must be the same size for most operations, this helper
959
// function zero extends the shorter of the pair so that they match.
960
// We provide an Offset so that we can create bitwidths that won't overflow.
961
10.9k
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
962
10.9k
  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
963
10.9k
  LHS = LHS.zextOrSelf(Bits);
964
10.9k
  RHS = RHS.zextOrSelf(Bits);
965
10.9k
}
966
967
// Return true if this node is a setcc, or is a select_cc
968
// that selects between the target values used for true and false, making it
969
// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
970
// the appropriate nodes based on the type of node we are checking. This
971
// simplifies life a bit for the callers.
972
bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
973
1.49M
                                    SDValue &CC) const {
974
1.49M
  if (N.getOpcode() == ISD::SETCC) {
975
498k
    LHS = N.getOperand(0);
976
498k
    RHS = N.getOperand(1);
977
498k
    CC  = N.getOperand(2);
978
498k
    return true;
979
498k
  }
980
996k
981
996k
  if (N.getOpcode() != ISD::SELECT_CC ||
982
996k
      
!TLI.isConstTrueVal(N.getOperand(2).getNode())2.10k
||
983
996k
      
!TLI.isConstFalseVal(N.getOperand(3).getNode())263
)
984
996k
    return false;
985
259
986
259
  if (TLI.getBooleanContents(N.getValueType()) ==
987
259
      TargetLowering::UndefinedBooleanContent)
988
0
    return false;
989
259
990
259
  LHS = N.getOperand(0);
991
259
  RHS = N.getOperand(1);
992
259
  CC  = N.getOperand(4);
993
259
  return true;
994
259
}
995
996
/// Return true if this is a SetCC-equivalent operation with only one use.
997
/// If this is true, it allows the users to invert the operation for free when
998
/// it is profitable to do so.
999
5.77k
bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1000
5.77k
  SDValue N0, N1, N2;
1001
5.77k
  if (isSetCCEquivalent(N, N0, N1, N2) && 
N.getNode()->hasOneUse()5.70k
)
1002
5.70k
    return true;
1003
72
  return false;
1004
72
}
1005
1006
// Returns the SDNode if it is a constant float BuildVector
1007
// or constant float.
1008
325k
static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
1009
325k
  if (isa<ConstantFPSDNode>(N))
1010
12.0k
    return N.getNode();
1011
313k
  if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
1012
17.6k
    return N.getNode();
1013
295k
  return nullptr;
1014
295k
}
1015
1016
// Determines if it is a constant integer or a build vector of constant
1017
// integers (and undefs).
1018
// Do not permit build vector implicit truncation.
1019
4.45M
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1020
4.45M
  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1021
3.47M
    return !(Const->isOpaque() && 
NoOpaques3.00k
);
1022
987k
  if (N.getOpcode() != ISD::BUILD_VECTOR)
1023
912k
    return false;
1024
75.6k
  unsigned BitWidth = N.getScalarValueSizeInBits();
1025
330k
  for (const SDValue &Op : N->op_values()) {
1026
330k
    if (Op.isUndef())
1027
5.82k
      continue;
1028
324k
    ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1029
324k
    if (!Const || 
Const->getAPIntValue().getBitWidth() != BitWidth322k
||
1030
324k
        
(321k
Const->isOpaque()321k
&&
NoOpaques0
))
1031
2.90k
      return false;
1032
324k
  }
1033
75.6k
  
return true72.7k
;
1034
75.6k
}
1035
1036
// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1037
// undef's.
1038
58.7k
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1039
58.7k
  if (V.getOpcode() != ISD::BUILD_VECTOR)
1040
51.6k
    return false;
1041
7.12k
  return isConstantOrConstantVector(V, NoOpaques) ||
1042
7.12k
         
ISD::isBuildVectorOfConstantFPSDNodes(V.getNode())1.03k
;
1043
7.12k
}
1044
1045
bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1046
                                                             const SDLoc &DL,
1047
                                                             SDValue N0,
1048
4.14M
                                                             SDValue N1) {
1049
4.14M
  // Currently this only tries to ensure we don't undo the GEP splits done by
1050
4.14M
  // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1051
4.14M
  // we check if the following transformation would be problematic:
1052
4.14M
  // (load/store (add, (add, x, offset1), offset2)) ->
1053
4.14M
  // (load/store (add, x, offset1+offset2)).
1054
4.14M
1055
4.14M
  if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1056
3.34M
    return false;
1057
795k
1058
795k
  if (N0.hasOneUse())
1059
201k
    return false;
1060
593k
1061
593k
  auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1062
593k
  auto *C2 = dyn_cast<ConstantSDNode>(N1);
1063
593k
  if (!C1 || 
!C2458k
)
1064
146k
    return false;
1065
446k
1066
446k
  const APInt &C1APIntVal = C1->getAPIntValue();
1067
446k
  const APInt &C2APIntVal = C2->getAPIntValue();
1068
446k
  if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1069
0
    return false;
1070
446k
1071
446k
  const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1072
446k
  if (CombinedValueIntVal.getBitWidth() > 64)
1073
0
    return false;
1074
446k
  const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1075
446k
1076
1.39M
  for (SDNode *Node : N0->uses()) {
1077
1.39M
    auto LoadStore = dyn_cast<MemSDNode>(Node);
1078
1.39M
    if (LoadStore) {
1079
460k
      // Is x[offset2] already not a legal addressing mode? If so then
1080
460k
      // reassociating the constants breaks nothing (we test offset2 because
1081
460k
      // that's the one we hope to fold into the load or store).
1082
460k
      TargetLoweringBase::AddrMode AM;
1083
460k
      AM.HasBaseReg = true;
1084
460k
      AM.BaseOffs = C2APIntVal.getSExtValue();
1085
460k
      EVT VT = LoadStore->getMemoryVT();
1086
460k
      unsigned AS = LoadStore->getAddressSpace();
1087
460k
      Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1088
460k
      if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1089
9.75k
        continue;
1090
451k
1091
451k
      // Would x[offset1+offset2] still be a legal addressing mode?
1092
451k
      AM.BaseOffs = CombinedValue;
1093
451k
      if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1094
3.87k
        return true;
1095
451k
    }
1096
1.39M
  }
1097
446k
1098
446k
  
return false442k
;
1099
446k
}
1100
1101
// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1102
// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1103
SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1104
10.8M
                                               SDValue N0, SDValue N1) {
1105
10.8M
  EVT VT = N0.getValueType();
1106
10.8M
1107
10.8M
  if (N0.getOpcode() != Opc)
1108
9.96M
    return SDValue();
1109
860k
1110
860k
  // Don't reassociate reductions.
1111
860k
  if (N0->getFlags().hasVectorReduction())
1112
393
    return SDValue();
1113
859k
1114
859k
  if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1115
537k
    if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1116
479k
      // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1117
479k
      if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
1118
479k
        return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1119
526
      return SDValue();
1120
526
    }
1121
57.5k
    if (N0.hasOneUse()) {
1122
41.0k
      // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1123
41.0k
      //              iff (op x, c1) has one use
1124
41.0k
      SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1125
41.0k
      if (!OpNode.getNode())
1126
0
        return SDValue();
1127
41.0k
      AddToWorklist(OpNode.getNode());
1128
41.0k
      return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1129
41.0k
    }
1130
57.5k
  }
1131
339k
  return SDValue();
1132
339k
}
1133
1134
// Try to reassociate commutative binops.
1135
SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1136
5.67M
                                    SDValue N1, SDNodeFlags Flags) {
1137
5.67M
  assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1138
5.67M
  // Don't reassociate reductions.
1139
5.67M
  if (Flags.hasVectorReduction())
1140
3.59k
    return SDValue();
1141
5.66M
1142
5.66M
  // Floating-point reassociation is not allowed without loose FP math.
1143
5.66M
  if (N0.getValueType().isFloatingPoint() ||
1144
5.66M
      N1.getValueType().isFloatingPoint())
1145
0
    if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1146
0
      return SDValue();
1147
5.66M
1148
5.66M
  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1149
506k
    return Combined;
1150
5.16M
  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1151
13.5k
    return Combined;
1152
5.14M
  return SDValue();
1153
5.14M
}
1154
1155
SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1156
578k
                               bool AddTo) {
1157
578k
  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1158
578k
  ++NodesCombined;
1159
578k
  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1160
578k
             To[0].getNode()->dump(&DAG);
1161
578k
             dbgs() << " and " << NumTo - 1 << " other values\n");
1162
1.36M
  for (unsigned i = 0, e = NumTo; i != e; 
++i782k
)
1163
578k
    assert((!To[i].getNode() ||
1164
578k
            N->getValueType(i) == To[i].getValueType()) &&
1165
578k
           "Cannot combine value to value of different type!");
1166
578k
1167
578k
  WorklistRemover DeadNodes(*this);
1168
578k
  DAG.ReplaceAllUsesWith(N, To);
1169
578k
  if (AddTo) {
1170
474k
    // Push the new nodes and any users onto the worklist
1171
1.15M
    for (unsigned i = 0, e = NumTo; i != e; 
++i678k
) {
1172
678k
      if (To[i].getNode()) {
1173
678k
        AddToWorklist(To[i].getNode());
1174
678k
        AddUsersToWorklist(To[i].getNode());
1175
678k
      }
1176
678k
    }
1177
474k
  }
1178
578k
1179
578k
  // Finally, if the node is now dead, remove it from the graph.  The node
1180
578k
  // may not be dead if the replacement process recursively simplified to
1181
578k
  // something else needing this node.
1182
578k
  if (N->use_empty())
1183
578k
    deleteAndRecombine(N);
1184
578k
  return SDValue(N, 0);
1185
578k
}
1186
1187
void DAGCombiner::
1188
356k
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1189
356k
  // Replace all uses.  If any nodes become isomorphic to other nodes and
1190
356k
  // are deleted, make sure to remove them from our worklist.
1191
356k
  WorklistRemover DeadNodes(*this);
1192
356k
  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1193
356k
1194
356k
  // Push the new node and any (possibly new) users onto the worklist.
1195
356k
  AddToWorklist(TLO.New.getNode());
1196
356k
  AddUsersToWorklist(TLO.New.getNode());
1197
356k
1198
356k
  // Finally, if the node is now dead, remove it from the graph.  The node
1199
356k
  // may not be dead if the replacement process recursively simplified to
1200
356k
  // something else needing this node.
1201
356k
  if (TLO.Old.getNode()->use_empty())
1202
354k
    deleteAndRecombine(TLO.Old.getNode());
1203
356k
}
1204
1205
/// Check the specified integer node value to see if it can be simplified or if
1206
/// things it uses can be simplified by bit propagation. If so, return true.
1207
bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1208
5.97M
                                       const APInt &DemandedElts) {
1209
5.97M
  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1210
5.97M
  KnownBits Known;
1211
5.97M
  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
1212
5.62M
    return false;
1213
346k
1214
346k
  // Revisit the node.
1215
346k
  AddToWorklist(Op.getNode());
1216
346k
1217
346k
  // Replace the old value with the new one.
1218
346k
  ++NodesCombined;
1219
346k
  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1220
346k
             dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1221
346k
             dbgs() << '\n');
1222
346k
1223
346k
  CommitTargetLoweringOpt(TLO);
1224
346k
  return true;
1225
346k
}
1226
1227
/// Check the specified vector node value to see if it can be simplified or
1228
/// if things it uses can be simplified as it only uses some of the elements.
1229
/// If so, return true.
1230
bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1231
                                             const APInt &DemandedElts,
1232
773k
                                             bool AssumeSingleUse) {
1233
773k
  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1234
773k
  APInt KnownUndef, KnownZero;
1235
773k
  if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1236
773k
                                      TLO, 0, AssumeSingleUse))
1237
767k
    return false;
1238
6.53k
1239
6.53k
  // Revisit the node.
1240
6.53k
  AddToWorklist(Op.getNode());
1241
6.53k
1242
6.53k
  // Replace the old value with the new one.
1243
6.53k
  ++NodesCombined;
1244
6.53k
  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1245
6.53k
             dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1246
6.53k
             dbgs() << '\n');
1247
6.53k
1248
6.53k
  CommitTargetLoweringOpt(TLO);
1249
6.53k
  return true;
1250
6.53k
}
1251
1252
349
void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1253
349
  SDLoc DL(Load);
1254
349
  EVT VT = Load->getValueType(0);
1255
349
  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1256
349
1257
349
  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1258
349
             Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1259
349
  WorklistRemover DeadNodes(*this);
1260
349
  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1261
349
  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1262
349
  deleteAndRecombine(Load);
1263
349
  AddToWorklist(Trunc.getNode());
1264
349
}
1265
1266
6.38k
SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1267
6.38k
  Replace = false;
1268
6.38k
  SDLoc DL(Op);
1269
6.38k
  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1270
481
    LoadSDNode *LD = cast<LoadSDNode>(Op);
1271
481
    EVT MemVT = LD->getMemoryVT();
1272
481
    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? 
ISD::EXTLOAD457
1273
481
                                                      : 
LD->getExtensionType()24
;
1274
481
    Replace = true;
1275
481
    return DAG.getExtLoad(ExtType, DL, PVT,
1276
481
                          LD->getChain(), LD->getBasePtr(),
1277
481
                          MemVT, LD->getMemOperand());
1278
481
  }
1279
5.90k
1280
5.90k
  unsigned Opc = Op.getOpcode();
1281
5.90k
  switch (Opc) {
1282
5.90k
  
default: break4.41k
;
1283
5.90k
  case ISD::AssertSext:
1284
0
    if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1285
0
      return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1286
0
    break;
1287
34
  case ISD::AssertZext:
1288
34
    if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1289
34
      return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1290
0
    break;
1291
1.46k
  case ISD::Constant: {
1292
1.46k
    unsigned ExtOpc =
1293
1.46k
      Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : 
ISD::ZERO_EXTEND0
;
1294
1.46k
    return DAG.getNode(ExtOpc, DL, PVT, Op);
1295
4.41k
  }
1296
4.41k
  }
1297
4.41k
1298
4.41k
  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1299
0
    return SDValue();
1300
4.41k
  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1301
4.41k
}
1302
1303
37
SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1304
37
  if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1305
0
    return SDValue();
1306
37
  EVT OldVT = Op.getValueType();
1307
37
  SDLoc DL(Op);
1308
37
  bool Replace = false;
1309
37
  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1310
37
  if (!NewOp.getNode())
1311
0
    return SDValue();
1312
37
  AddToWorklist(NewOp.getNode());
1313
37
1314
37
  if (Replace)
1315
9
    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1316
37
  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1317
37
                     DAG.getValueType(OldVT));
1318
37
}
1319
1320
1.02k
SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1321
1.02k
  EVT OldVT = Op.getValueType();
1322
1.02k
  SDLoc DL(Op);
1323
1.02k
  bool Replace = false;
1324
1.02k
  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1325
1.02k
  if (!NewOp.getNode())
1326
0
    return SDValue();
1327
1.02k
  AddToWorklist(NewOp.getNode());
1328
1.02k
1329
1.02k
  if (Replace)
1330
89
    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1331
1.02k
  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1332
1.02k
}
1333
1334
/// Promote the specified integer binary operation if the target indicates it is
1335
/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1336
/// i32 since i16 instructions are longer.
1337
4.52M
SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1338
4.52M
  if (!LegalOperations)
1339
2.55M
    return SDValue();
1340
1.97M
1341
1.97M
  EVT VT = Op.getValueType();
1342
1.97M
  if (VT.isVector() || 
!VT.isInteger()1.74M
)
1343
223k
    return SDValue();
1344
1.74M
1345
1.74M
  // If operation type is 'undesirable', e.g. i16 on x86, consider
1346
1.74M
  // promoting it.
1347
1.74M
  unsigned Opc = Op.getOpcode();
1348
1.74M
  if (TLI.isTypeDesirableForOp(Opc, VT))
1349
1.74M
    return SDValue();
1350
3.41k
1351
3.41k
  EVT PVT = VT;
1352
3.41k
  // Consult target whether it is a good idea to promote this operation and
1353
3.41k
  // what's the right type to promote it to.
1354
3.41k
  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1355
2.42k
    assert(PVT != VT && "Don't know what type to promote to!");
1356
2.42k
1357
2.42k
    LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1358
2.42k
1359
2.42k
    bool Replace0 = false;
1360
2.42k
    SDValue N0 = Op.getOperand(0);
1361
2.42k
    SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1362
2.42k
1363
2.42k
    bool Replace1 = false;
1364
2.42k
    SDValue N1 = Op.getOperand(1);
1365
2.42k
    SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1366
2.42k
    SDLoc DL(Op);
1367
2.42k
1368
2.42k
    SDValue RV =
1369
2.42k
        DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1370
2.42k
1371
2.42k
    // We are always replacing N0/N1's use in N and only need
1372
2.42k
    // additional replacements if there are additional uses.
1373
2.42k
    Replace0 &= !N0->hasOneUse();
1374
2.42k
    Replace1 &= (N0 != N1) && 
!N1->hasOneUse()2.42k
;
1375
2.42k
1376
2.42k
    // Combine Op here so it is preserved past replacements.
1377
2.42k
    CombineTo(Op.getNode(), RV);
1378
2.42k
1379
2.42k
    // If operands have a use ordering, make sure we deal with
1380
2.42k
    // predecessor first.
1381
2.42k
    if (Replace0 && 
Replace1107
&&
N0.getNode()->isPredecessorOf(N1.getNode())2
) {
1382
0
      std::swap(N0, N1);
1383
0
      std::swap(NN0, NN1);
1384
0
    }
1385
2.42k
1386
2.42k
    if (Replace0) {
1387
107
      AddToWorklist(NN0.getNode());
1388
107
      ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1389
107
    }
1390
2.42k
    if (Replace1) {
1391
89
      AddToWorklist(NN1.getNode());
1392
89
      ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1393
89
    }
1394
2.42k
    return Op;
1395
2.42k
  }
1396
983
  return SDValue();
1397
983
}
1398
1399
/// Promote the specified integer shift operation if the target indicates it is
1400
/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1401
/// i32 since i16 instructions are longer.
1402
591k
SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1403
591k
  if (!LegalOperations)
1404
313k
    return SDValue();
1405
277k
1406
277k
  EVT VT = Op.getValueType();
1407
277k
  if (VT.isVector() || 
!VT.isInteger()271k
)
1408
5.89k
    return SDValue();
1409
271k
1410
271k
  // If operation type is 'undesirable', e.g. i16 on x86, consider
1411
271k
  // promoting it.
1412
271k
  unsigned Opc = Op.getOpcode();
1413
271k
  if (TLI.isTypeDesirableForOp(Opc, VT))
1414
264k
    return SDValue();
1415
6.65k
1416
6.65k
  EVT PVT = VT;
1417
6.65k
  // Consult target whether it is a good idea to promote this operation and
1418
6.65k
  // what's the right type to promote it to.
1419
6.65k
  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1420
1.50k
    assert(PVT != VT && "Don't know what type to promote to!");
1421
1.50k
1422
1.50k
    LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1423
1.50k
1424
1.50k
    bool Replace = false;
1425
1.50k
    SDValue N0 = Op.getOperand(0);
1426
1.50k
    SDValue N1 = Op.getOperand(1);
1427
1.50k
    if (Opc == ISD::SRA)
1428
37
      N0 = SExtPromoteOperand(N0, PVT);
1429
1.46k
    else if (Opc == ISD::SRL)
1430
987
      N0 = ZExtPromoteOperand(N0, PVT);
1431
476
    else
1432
476
      N0 = PromoteOperand(N0, PVT, Replace);
1433
1.50k
1434
1.50k
    if (!N0.getNode())
1435
0
      return SDValue();
1436
1.50k
1437
1.50k
    SDLoc DL(Op);
1438
1.50k
    SDValue RV =
1439
1.50k
        DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1440
1.50k
1441
1.50k
    AddToWorklist(N0.getNode());
1442
1.50k
    if (Replace)
1443
55
      ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1444
1.50k
1445
1.50k
    // Deal with Op being deleted.
1446
1.50k
    if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1447
1.50k
      return RV;
1448
5.15k
  }
1449
5.15k
  return SDValue();
1450
5.15k
}
1451
1452
312k
SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1453
312k
  if (!LegalOperations)
1454
164k
    return SDValue();
1455
148k
1456
148k
  EVT VT = Op.getValueType();
1457
148k
  if (VT.isVector() || 
!VT.isInteger()85.0k
)
1458
63.0k
    return SDValue();
1459
85.0k
1460
85.0k
  // If operation type is 'undesirable', e.g. i16 on x86, consider
1461
85.0k
  // promoting it.
1462
85.0k
  unsigned Opc = Op.getOpcode();
1463
85.0k
  if (TLI.isTypeDesirableForOp(Opc, VT))
1464
84.7k
    return SDValue();
1465
235
1466
235
  EVT PVT = VT;
1467
235
  // Consult target whether it is a good idea to promote this operation and
1468
235
  // what's the right type to promote it to.
1469
235
  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1470
221
    assert(PVT != VT && "Don't know what type to promote to!");
1471
221
    // fold (aext (aext x)) -> (aext x)
1472
221
    // fold (aext (zext x)) -> (zext x)
1473
221
    // fold (aext (sext x)) -> (sext x)
1474
221
    LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1475
221
    return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1476
221
  }
1477
14
  return SDValue();
1478
14
}
1479
1480
2.69M
bool DAGCombiner::PromoteLoad(SDValue Op) {
1481
2.69M
  if (!LegalOperations)
1482
1.72M
    return false;
1483
970k
1484
970k
  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1485
7.46k
    return false;
1486
962k
1487
962k
  EVT VT = Op.getValueType();
1488
962k
  if (VT.isVector() || 
!VT.isInteger()761k
)
1489
254k
    return false;
1490
708k
1491
708k
  // If operation type is 'undesirable', e.g. i16 on x86, consider
1492
708k
  // promoting it.
1493
708k
  unsigned Opc = Op.getOpcode();
1494
708k
  if (TLI.isTypeDesirableForOp(Opc, VT))
1495
705k
    return false;
1496
2.68k
1497
2.68k
  EVT PVT = VT;
1498
2.68k
  // Consult target whether it is a good idea to promote this operation and
1499
2.68k
  // what's the right type to promote it to.
1500
2.68k
  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1501
0
    assert(PVT != VT && "Don't know what type to promote to!");
1502
0
1503
0
    SDLoc DL(Op);
1504
0
    SDNode *N = Op.getNode();
1505
0
    LoadSDNode *LD = cast<LoadSDNode>(N);
1506
0
    EVT MemVT = LD->getMemoryVT();
1507
0
    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1508
0
                                                      : LD->getExtensionType();
1509
0
    SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1510
0
                                   LD->getChain(), LD->getBasePtr(),
1511
0
                                   MemVT, LD->getMemOperand());
1512
0
    SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1513
0
1514
0
    LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1515
0
               Result.getNode()->dump(&DAG); dbgs() << '\n');
1516
0
    WorklistRemover DeadNodes(*this);
1517
0
    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1518
0
    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1519
0
    deleteAndRecombine(N);
1520
0
    AddToWorklist(Result.getNode());
1521
0
    return true;
1522
0
  }
1523
2.68k
  return false;
1524
2.68k
}
1525
1526
/// Recursively delete a node which has no uses and any operands for
1527
/// which it is the only use.
1528
///
1529
/// Note that this both deletes the nodes and removes them from the worklist.
1530
/// It also adds any nodes who have had a user deleted to the worklist as they
1531
/// may now have only one use and subject to other combines.
1532
74.2M
bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1533
74.2M
  if (!N->use_empty())
1534
69.5M
    return false;
1535
4.69M
1536
4.69M
  SmallSetVector<SDNode *, 16> Nodes;
1537
4.69M
  Nodes.insert(N);
1538
17.1M
  do {
1539
17.1M
    N = Nodes.pop_back_val();
1540
17.1M
    if (!N)
1541
0
      continue;
1542
17.1M
1543
17.1M
    if (N->use_empty()) {
1544
8.27M
      for (const SDValue &ChildN : N->op_values())
1545
13.1M
        Nodes.insert(ChildN.getNode());
1546
8.27M
1547
8.27M
      removeFromWorklist(N);
1548
8.27M
      DAG.DeleteNode(N);
1549
8.89M
    } else {
1550
8.89M
      AddToWorklist(N);
1551
8.89M
    }
1552
17.1M
  } while (!Nodes.empty());
1553
4.69M
  return true;
1554
4.69M
}
1555
1556
//===----------------------------------------------------------------------===//
1557
//  Main DAG Combiner implementation
1558
//===----------------------------------------------------------------------===//
1559
1560
2.88M
void DAGCombiner::Run(CombineLevel AtLevel) {
1561
2.88M
  // set the instance variables, so that the various visit routines may use it.
1562
2.88M
  Level = AtLevel;
1563
2.88M
  LegalOperations = Level >= AfterLegalizeVectorOps;
1564
2.88M
  LegalTypes = Level >= AfterLegalizeTypes;
1565
2.88M
1566
2.88M
  WorklistInserter AddNodes(*this);
1567
2.88M
1568
2.88M
  // Add all the dag nodes to the worklist.
1569
2.88M
  for (SDNode &Node : DAG.allnodes())
1570
65.2M
    AddToWorklist(&Node);
1571
2.88M
1572
2.88M
  // Create a dummy node (which is not added to allnodes), that adds a reference
1573
2.88M
  // to the root node, preventing it from being deleted, and tracking any
1574
2.88M
  // changes of the root.
1575
2.88M
  HandleSDNode Dummy(DAG.getRoot());
1576
2.88M
1577
2.88M
  // While we have a valid worklist entry node, try to combine it.
1578
72.3M
  while (SDNode *N = getNextWorklistEntry()) {
1579
69.5M
    // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1580
69.5M
    // N is deleted from the DAG, since they too may now be dead or may have a
1581
69.5M
    // reduced number of uses, allowing other xforms.
1582
69.5M
    if (recursivelyDeleteUnusedNodes(N))
1583
641
      continue;
1584
69.5M
1585
69.5M
    WorklistRemover DeadNodes(*this);
1586
69.5M
1587
69.5M
    // If this combine is running after legalizing the DAG, re-legalize any
1588
69.5M
    // nodes pulled off the worklist.
1589
69.5M
    if (Level == AfterLegalizeDAG) {
1590
27.4M
      SmallSetVector<SDNode *, 16> UpdatedNodes;
1591
27.4M
      bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1592
27.4M
1593
27.4M
      for (SDNode *LN : UpdatedNodes) {
1594
24.9k
        AddToWorklist(LN);
1595
24.9k
        AddUsersToWorklist(LN);
1596
24.9k
      }
1597
27.4M
      if (!NIsValid)
1598
11.9k
        continue;
1599
69.5M
    }
1600
69.5M
1601
69.5M
    LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1602
69.5M
1603
69.5M
    // Add any operands of the new node which have not yet been combined to the
1604
69.5M
    // worklist as well. Because the worklist uniques things already, this
1605
69.5M
    // won't repeatedly process the same operand.
1606
69.5M
    CombinedNodes.insert(N);
1607
69.5M
    for (const SDValue &ChildN : N->op_values())
1608
113M
      if (!CombinedNodes.count(ChildN.getNode()))
1609
102M
        AddToWorklist(ChildN.getNode());
1610
69.5M
1611
69.5M
    SDValue RV = combine(N);
1612
69.5M
1613
69.5M
    if (!RV.getNode())
1614
66.3M
      continue;
1615
3.19M
1616
3.19M
    ++NodesCombined;
1617
3.19M
1618
3.19M
    // If we get back the same node we passed in, rather than a new node or
1619
3.19M
    // zero, we know that the node must have defined multiple values and
1620
3.19M
    // CombineTo was used.  Since CombineTo takes care of the worklist
1621
3.19M
    // mechanics for us, we have no work to do in this case.
1622
3.19M
    if (RV.getNode() == N)
1623
949k
      continue;
1624
2.24M
1625
2.24M
    assert(N->getOpcode() != ISD::DELETED_NODE &&
1626
2.24M
           RV.getOpcode() != ISD::DELETED_NODE &&
1627
2.24M
           "Node was deleted but visit returned new node!");
1628
2.24M
1629
2.24M
    LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1630
2.24M
1631
2.24M
    if (N->getNumValues() == RV.getNode()->getNumValues())
1632
2.17M
      DAG.ReplaceAllUsesWith(N, RV.getNode());
1633
73.0k
    else {
1634
73.0k
      assert(N->getValueType(0) == RV.getValueType() &&
1635
73.0k
             N->getNumValues() == 1 && "Type mismatch");
1636
73.0k
      DAG.ReplaceAllUsesWith(N, &RV);
1637
73.0k
    }
1638
2.24M
1639
2.24M
    // Push the new node and any users onto the worklist
1640
2.24M
    AddToWorklist(RV.getNode());
1641
2.24M
    AddUsersToWorklist(RV.getNode());
1642
2.24M
1643
2.24M
    // Finally, if the node is now dead, remove it from the graph.  The node
1644
2.24M
    // may not be dead if the replacement process recursively simplified to
1645
2.24M
    // something else needing this node. This will also take care of adding any
1646
2.24M
    // operands which have lost a user to the worklist.
1647
2.24M
    recursivelyDeleteUnusedNodes(N);
1648
2.24M
  }
1649
2.88M
1650
2.88M
  // If the root changed (e.g. it was a dead load, update the root).
1651
2.88M
  DAG.setRoot(Dummy.getValue());
1652
2.88M
  DAG.RemoveDeadNodes();
1653
2.88M
}
1654
1655
69.5M
SDValue DAGCombiner::visit(SDNode *N) {
1656
69.5M
  switch (N->getOpcode()) {
1657
69.5M
  
default: break48.1M
;
1658
69.5M
  
case ISD::TokenFactor: return visitTokenFactor(N)1.87M
;
1659
69.5M
  
case ISD::MERGE_VALUES: return visitMERGE_VALUES(N)65.3k
;
1660
69.5M
  
case ISD::ADD: return visitADD(N)4.07M
;
1661
69.5M
  
case ISD::SUB: return visitSUB(N)167k
;
1662
69.5M
  case ISD::SADDSAT:
1663
3.23k
  case ISD::UADDSAT:            return visitADDSAT(N);
1664
3.57k
  case ISD::SSUBSAT:
1665
3.57k
  case ISD::USUBSAT:            return visitSUBSAT(N);
1666
3.57k
  
case ISD::ADDC: return visitADDC(N)625
;
1667
14.2k
  case ISD::SADDO:
1668
14.2k
  case ISD::UADDO:              return visitADDO(N);
1669
14.2k
  
case ISD::SUBC: return visitSUBC(N)128
;
1670
14.2k
  case ISD::SSUBO:
1671
6.09k
  case ISD::USUBO:              return visitSUBO(N);
1672
6.09k
  
case ISD::ADDE: return visitADDE(N)2.78k
;
1673
41.6k
  case ISD::ADDCARRY:           return visitADDCARRY(N);
1674
6.09k
  
case ISD::SUBE: return visitSUBE(N)102
;
1675
6.09k
  
case ISD::SUBCARRY: return visitSUBCARRY(N)1.59k
;
1676
138k
  case ISD::MUL:                return visitMUL(N);
1677
6.09k
  
case ISD::SDIV: return visitSDIV(N)6.06k
;
1678
6.09k
  
case ISD::UDIV: return visitUDIV(N)4.95k
;
1679
7.21k
  case ISD::SREM:
1680
7.21k
  case ISD::UREM:               return visitREM(N);
1681
8.94k
  case ISD::MULHU:              return visitMULHU(N);
1682
7.21k
  
case ISD::MULHS: return visitMULHS(N)2.21k
;
1683
7.21k
  
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N)889
;
1684
7.81k
  case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1685
7.21k
  case ISD::SMULO:
1686
1.52k
  case ISD::UMULO:              return visitMULO(N);
1687
41.7k
  case ISD::SMIN:
1688
41.7k
  case ISD::SMAX:
1689
41.7k
  case ISD::UMIN:
1690
41.7k
  case ISD::UMAX:               return visitIMINMAX(N);
1691
834k
  case ISD::AND:                return visitAND(N);
1692
228k
  case ISD::OR:                 return visitOR(N);
1693
118k
  case ISD::XOR:                return visitXOR(N);
1694
338k
  case ISD::SHL:                return visitSHL(N);
1695
48.8k
  case ISD::SRA:                return visitSRA(N);
1696
275k
  case ISD::SRL:                return visitSRL(N);
1697
41.7k
  case ISD::ROTR:
1698
7.80k
  case ISD::ROTL:               return visitRotate(N);
1699
7.80k
  case ISD::FSHL:
1700
922
  case ISD::FSHR:               return visitFunnelShift(N);
1701
2.37k
  case ISD::ABS:                return visitABS(N);
1702
2.28k
  case ISD::BSWAP:              return visitBSWAP(N);
1703
922
  
case ISD::BITREVERSE: return visitBITREVERSE(N)772
;
1704
5.87k
  case ISD::CTLZ:               return visitCTLZ(N);
1705
3.09k
  case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1706
1.03k
  case ISD::CTTZ:               return visitCTTZ(N);
1707
1.55k
  case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1708
2.81k
  case ISD::CTPOP:              return visitCTPOP(N);
1709
83.3k
  case ISD::SELECT:             return visitSELECT(N);
1710
48.1k
  case ISD::VSELECT:            return visitVSELECT(N);
1711
55.2k
  case ISD::SELECT_CC:          return visitSELECT_CC(N);
1712
654k
  case ISD::SETCC:              return visitSETCC(N);
1713
2.14k
  case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1714
149k
  case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1715
206k
  case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1716
88.0k
  case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1717
528k
  case ISD::AssertSext:
1718
528k
  case ISD::AssertZext:         return visitAssertExt(N);
1719
528k
  
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N)68.8k
;
1720
528k
  
case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N)5.13k
;
1721
528k
  
case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N)10.2k
;
1722
528k
  
case ISD::TRUNCATE: return visitTRUNCATE(N)469k
;
1723
583k
  case ISD::BITCAST:            return visitBITCAST(N);
1724
528k
  
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N)68.8k
;
1725
528k
  
case ISD::FADD: return visitFADD(N)83.3k
;
1726
528k
  
case ISD::FSUB: return visitFSUB(N)15.7k
;
1727
528k
  
case ISD::FMUL: return visitFMUL(N)61.9k
;
1728
528k
  
case ISD::FMA: return visitFMA(N)10.2k
;
1729
528k
  
case ISD::FDIV: return visitFDIV(N)64.8k
;
1730
528k
  
case ISD::FREM: return visitFREM(N)335
;
1731
528k
  
case ISD::FSQRT: return visitFSQRT(N)2.37k
;
1732
528k
  
case ISD::FCOPYSIGN: return visitFCOPYSIGN(N)3.29k
;
1733
528k
  
case ISD::FPOW: return visitFPOW(N)354
;
1734
528k
  
case ISD::SINT_TO_FP: return visitSINT_TO_FP(N)99.7k
;
1735
528k
  
case ISD::UINT_TO_FP: return visitUINT_TO_FP(N)35.0k
;
1736
528k
  
case ISD::FP_TO_SINT: return visitFP_TO_SINT(N)11.7k
;
1737
528k
  
case ISD::FP_TO_UINT: return visitFP_TO_UINT(N)5.76k
;
1738
528k
  
case ISD::FP_ROUND: return visitFP_ROUND(N)7.44k
;
1739
528k
  
case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N)0
;
1740
528k
  
case ISD::FP_EXTEND: return visitFP_EXTEND(N)17.7k
;
1741
528k
  
case ISD::FNEG: return visitFNEG(N)8.29k
;
1742
528k
  
case ISD::FABS: return visitFABS(N)5.69k
;
1743
528k
  
case ISD::FFLOOR: return visitFFLOOR(N)1.80k
;
1744
528k
  
case ISD::FMINNUM: return visitFMINNUM(N)2.46k
;
1745
528k
  
case ISD::FMAXNUM: return visitFMAXNUM(N)2.33k
;
1746
528k
  
case ISD::FMINIMUM: return visitFMINIMUM(N)385
;
1747
528k
  
case ISD::FMAXIMUM: return visitFMAXIMUM(N)488
;
1748
528k
  
case ISD::FCEIL: return visitFCEIL(N)1.64k
;
1749
528k
  
case ISD::FTRUNC: return visitFTRUNC(N)1.65k
;
1750
1.09M
  case ISD::BRCOND:             return visitBRCOND(N);
1751
528k
  
case ISD::BR_CC: return visitBR_CC(N)519k
;
1752
2.86M
  case ISD::LOAD:               return visitLOAD(N);
1753
3.23M
  case ISD::STORE:              return visitSTORE(N);
1754
528k
  
case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N)80.2k
;
1755
613k
  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1756
528k
  
case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N)518k
;
1757
528k
  
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N)55.1k
;
1758
528k
  
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N)176k
;
1759
528k
  
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N)128k
;
1760
528k
  
case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N)41.8k
;
1761
528k
  
case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N)54.7k
;
1762
528k
  
case ISD::MGATHER: return visitMGATHER(N)998
;
1763
528k
  
case ISD::MLOAD: return visitMLOAD(N)1.68k
;
1764
528k
  
case ISD::MSCATTER: return visitMSCATTER(N)293
;
1765
528k
  
case ISD::MSTORE: return visitMSTORE(N)1.69k
;
1766
528k
  
case ISD::LIFETIME_END: return visitLIFETIME_END(N)84.7k
;
1767
528k
  
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N)5.10k
;
1768
528k
  
case ISD::FP16_TO_FP: return visitFP16_TO_FP(N)7.31k
;
1769
528k
  case ISD::VECREDUCE_FADD:
1770
1.12k
  case ISD::VECREDUCE_FMUL:
1771
1.12k
  case ISD::VECREDUCE_ADD:
1772
1.12k
  case ISD::VECREDUCE_MUL:
1773
1.12k
  case ISD::VECREDUCE_AND:
1774
1.12k
  case ISD::VECREDUCE_OR:
1775
1.12k
  case ISD::VECREDUCE_XOR:
1776
1.12k
  case ISD::VECREDUCE_SMAX:
1777
1.12k
  case ISD::VECREDUCE_SMIN:
1778
1.12k
  case ISD::VECREDUCE_UMAX:
1779
1.12k
  case ISD::VECREDUCE_UMIN:
1780
1.12k
  case ISD::VECREDUCE_FMAX:
1781
1.12k
  case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1782
48.1M
  }
1783
48.1M
  return SDValue();
1784
48.1M
}
1785
1786
69.5M
SDValue DAGCombiner::combine(SDNode *N) {
1787
69.5M
  SDValue RV = visit(N);
1788
69.5M
1789
69.5M
  // If nothing happened, try a target-specific DAG combine.
1790
69.5M
  if (!RV.getNode()) {
1791
66.4M
    assert(N->getOpcode() != ISD::DELETED_NODE &&
1792
66.4M
           "Node was deleted but visit returned NULL!");
1793
66.4M
1794
66.4M
    if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1795
66.4M
        
TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())62.4M
) {
1796
16.0M
1797
16.0M
      // Expose the DAG combiner to the target combiner impls.
1798
16.0M
      TargetLowering::DAGCombinerInfo
1799
16.0M
        DagCombineInfo(DAG, Level, false, this);
1800
16.0M
1801
16.0M
      RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1802
16.0M
    }
1803
66.4M
  }
1804
69.5M
1805
69.5M
  // If nothing happened still, try promoting the operation.
1806
69.5M
  if (!RV.getNode()) {
1807
66.3M
    switch (N->getOpcode()) {
1808
66.3M
    
default: break58.1M
;
1809
66.3M
    case ISD::ADD:
1810
4.52M
    case ISD::SUB:
1811
4.52M
    case ISD::MUL:
1812
4.52M
    case ISD::AND:
1813
4.52M
    case ISD::OR:
1814
4.52M
    case ISD::XOR:
1815
4.52M
      RV = PromoteIntBinOp(SDValue(N, 0));
1816
4.52M
      break;
1817
4.52M
    case ISD::SHL:
1818
591k
    case ISD::SRA:
1819
591k
    case ISD::SRL:
1820
591k
      RV = PromoteIntShiftOp(SDValue(N, 0));
1821
591k
      break;
1822
591k
    case ISD::SIGN_EXTEND:
1823
312k
    case ISD::ZERO_EXTEND:
1824
312k
    case ISD::ANY_EXTEND:
1825
312k
      RV = PromoteExtend(SDValue(N, 0));
1826
312k
      break;
1827
2.69M
    case ISD::LOAD:
1828
2.69M
      if (PromoteLoad(SDValue(N, 0)))
1829
0
        RV = SDValue(N, 0);
1830
2.69M
      break;
1831
69.5M
    }
1832
69.5M
  }
1833
69.5M
1834
69.5M
  // If N is a commutative binary node, try to eliminate it if the commuted
1835
69.5M
  // version is already present in the DAG.
1836
69.5M
  if (!RV.getNode() && 
TLI.isCommutativeBinOp(N->getOpcode())66.3M
&&
1837
69.5M
      
N->getNumValues() == 14.62M
) {
1838
4.60M
    SDValue N0 = N->getOperand(0);
1839
4.60M
    SDValue N1 = N->getOperand(1);
1840
4.60M
1841
4.60M
    // Constant operands are canonicalized to RHS.
1842
4.60M
    if (N0 != N1 && 
(4.57M
isa<ConstantSDNode>(N0)4.57M
||
!isa<ConstantSDNode>(N1)4.57M
)) {
1843
1.33M
      SDValue Ops[] = {N1, N0};
1844
1.33M
      SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1845
1.33M
                                            N->getFlags());
1846
1.33M
      if (CSENode)
1847
3.90k
        return SDValue(CSENode, 0);
1848
69.4M
    }
1849
4.60M
  }
1850
69.4M
1851
69.4M
  return RV;
1852
69.4M
}
1853
1854
/// Given a node, return its input chain if it has one, otherwise return a null
1855
/// sd operand.
1856
2.12M
static SDValue getInputChainForNode(SDNode *N) {
1857
2.12M
  if (unsigned NumOps = N->getNumOperands()) {
1858
2.10M
    if (N->getOperand(0).getValueType() == MVT::Other)
1859
2.04M
      return N->getOperand(0);
1860
60.0k
    if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1861
60.0k
      return N->getOperand(NumOps-1);
1862
0
    for (unsigned i = 1; i < NumOps-1; ++i)
1863
0
      if (N->getOperand(i).getValueType() == MVT::Other)
1864
0
        return N->getOperand(i);
1865
0
  }
1866
2.12M
  
return SDValue()25.6k
;
1867
2.12M
}
1868
1869
1.87M
SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1870
1.87M
  // If N has two operands, where one has an input chain equal to the other,
1871
1.87M
  // the 'other' chain is redundant.
1872
1.87M
  if (N->getNumOperands() == 2) {
1873
1.06M
    if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1874
1.77k
      return N->getOperand(0);
1875
1.06M
    if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1876
16.5k
      return N->getOperand(1);
1877
1.85M
  }
1878
1.85M
1879
1.85M
  // Don't simplify token factors if optnone.
1880
1.85M
  if (OptLevel == CodeGenOpt::None)
1881
9.16k
    return SDValue();
1882
1.85M
1883
1.85M
  // If the sole user is a token factor, we should make sure we have a
1884
1.85M
  // chance to merge them together. This prevents TF chains from inhibiting
1885
1.85M
  // optimizations.
1886
1.85M
  if (N->hasOneUse() && 
N->use_begin()->getOpcode() == ISD::TokenFactor1.74M
)
1887
141k
    AddToWorklist(*(N->use_begin()));
1888
1.85M
1889
1.85M
  SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1890
1.85M
  SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1891
1.85M
  SmallPtrSet<SDNode*, 16> SeenOps;
1892
1.85M
  bool Changed = false;             // If we should replace this token factor.
1893
1.85M
1894
1.85M
  // Start out with this token factor.
1895
1.85M
  TFs.push_back(N);
1896
1.85M
1897
1.85M
  // Iterate through token factors.  The TFs grows when new token factors are
1898
1.85M
  // encountered.
1899
3.91M
  for (unsigned i = 0; i < TFs.size(); 
++i2.06M
) {
1900
2.06M
    // Limit number of nodes to inline, to avoid quadratic compile times.
1901
2.06M
    // We have to add the outstanding Token Factors to Ops, otherwise we might
1902
2.06M
    // drop Ops from the resulting Token Factors.
1903
2.06M
    if (Ops.size() > TokenFactorInlineLimit) {
1904
8
      for (unsigned j = i; j < TFs.size(); 
j++4
)
1905
4
        Ops.emplace_back(TFs[j], 0);
1906
4
      // Drop unprocessed Token Factors from TFs, so we do not add them to the
1907
4
      // combiner worklist later.
1908
4
      TFs.resize(i);
1909
4
      break;
1910
4
    }
1911
2.06M
1912
2.06M
    SDNode *TF = TFs[i];
1913
2.06M
    // Check each of the operands.
1914
7.50M
    for (const SDValue &Op : TF->op_values()) {
1915
7.50M
      switch (Op.getOpcode()) {
1916
7.50M
      case ISD::EntryToken:
1917
39.8k
        // Entry tokens don't need to be added to the list. They are
1918
39.8k
        // redundant.
1919
39.8k
        Changed = true;
1920
39.8k
        break;
1921
7.50M
1922
7.50M
      case ISD::TokenFactor:
1923
317k
        if (Op.hasOneUse() && 
!is_contained(TFs, Op.getNode())216k
) {
1924
216k
          // Queue up for processing.
1925
216k
          TFs.push_back(Op.getNode());
1926
216k
          Changed = true;
1927
216k
          break;
1928
216k
        }
1929
100k
        LLVM_FALLTHROUGH;
1930
100k
1931
7.25M
      default:
1932
7.25M
        // Only add if it isn't already in the list.
1933
7.25M
        if (SeenOps.insert(Op.getNode()).second)
1934
7.23M
          Ops.push_back(Op);
1935
21.0k
        else
1936
21.0k
          Changed = true;
1937
7.25M
        break;
1938
7.50M
      }
1939
7.50M
    }
1940
2.06M
  }
1941
1.85M
1942
1.85M
  // Re-visit inlined Token Factors, to clean them up in case they have been
1943
1.85M
  // removed. Skip the first Token Factor, as this is the current node.
1944
2.06M
  
for (unsigned i = 1, e = TFs.size(); 1.85M
i < e;
i++216k
)
1945
216k
    AddToWorklist(TFs[i]);
1946
1.85M
1947
1.85M
  // Remove Nodes that are chained to another node in the list. Do so
1948
1.85M
  // by walking up chains breath-first stopping when we've seen
1949
1.85M
  // another operand. In general we must climb to the EntryNode, but we can exit
1950
1.85M
  // early if we find all remaining work is associated with just one operand as
1951
1.85M
  // no further pruning is possible.
1952
1.85M
1953
1.85M
  // List of nodes to search through and original Ops from which they originate.
1954
1.85M
  SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1955
1.85M
  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1956
1.85M
  SmallPtrSet<SDNode *, 16> SeenChains;
1957
1.85M
  bool DidPruneOps = false;
1958
1.85M
1959
1.85M
  unsigned NumLeftToConsider = 0;
1960
7.23M
  for (const SDValue &Op : Ops) {
1961
7.23M
    Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1962
7.23M
    OpWorkCount.push_back(1);
1963
7.23M
  }
1964
1.85M
1965
10.4M
  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1966
10.4M
    // If this is an Op, we can remove the op from the list. Remark any
1967
10.4M
    // search associated with it as from the current OpNumber.
1968
10.4M
    if (SeenOps.count(Op) != 0) {
1969
260k
      Changed = true;
1970
260k
      DidPruneOps = true;
1971
260k
      unsigned OrigOpNumber = 0;
1972
3.91M
      while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1973
3.65M
        OrigOpNumber++;
1974
260k
      assert((OrigOpNumber != Ops.size()) &&
1975
260k
             "expected to find TokenFactor Operand");
1976
260k
      // Re-mark worklist from OrigOpNumber to OpNumber
1977
3.66M
      for (unsigned i = CurIdx + 1; i < Worklist.size(); 
++i3.40M
) {
1978
3.40M
        if (Worklist[i].second == OrigOpNumber) {
1979
118k
          Worklist[i].second = OpNumber;
1980
118k
        }
1981
3.40M
      }
1982
260k
      OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1983
260k
      OpWorkCount[OrigOpNumber] = 0;
1984
260k
      NumLeftToConsider--;
1985
260k
    }
1986
10.4M
    // Add if it's a new chain
1987
10.4M
    if (SeenChains.insert(Op).second) {
1988
4.80M
      OpWorkCount[OpNumber]++;
1989
4.80M
      Worklist.push_back(std::make_pair(Op, OpNumber));
1990
4.80M
    }
1991
10.4M
  };
1992
1.85M
1993
12.1M
  for (unsigned i = 0; i < Worklist.size() && 
i < 102411.6M
;
++i10.3M
) {
1994
11.6M
    // We need at least be consider at least 2 Ops to prune.
1995
11.6M
    if (NumLeftToConsider <= 1)
1996
1.28M
      break;
1997
10.3M
    auto CurNode = Worklist[i].first;
1998
10.3M
    auto CurOpNumber = Worklist[i].second;
1999
10.3M
    assert((OpWorkCount[CurOpNumber] > 0) &&
2000
10.3M
           "Node should not appear in worklist");
2001
10.3M
    switch (CurNode->getOpcode()) {
2002
10.3M
    case ISD::EntryToken:
2003
572k
      // Hitting EntryToken is the only way for the search to terminate without
2004
572k
      // hitting
2005
572k
      // another operand's search. Prevent us from marking this operand
2006
572k
      // considered.
2007
572k
      NumLeftToConsider++;
2008
572k
      break;
2009
10.3M
    case ISD::TokenFactor:
2010
644k
      for (const SDValue &Op : CurNode->op_values())
2011
2.28M
        AddToWorklist(i, Op.getNode(), CurOpNumber);
2012
644k
      break;
2013
10.3M
    case ISD::LIFETIME_START:
2014
2.46M
    case ISD::LIFETIME_END:
2015
2.46M
    case ISD::CopyFromReg:
2016
2.46M
    case ISD::CopyToReg:
2017
2.46M
      AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2018
2.46M
      break;
2019
6.65M
    default:
2020
6.65M
      if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2021
5.68M
        AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2022
6.65M
      break;
2023
10.3M
    }
2024
10.3M
    OpWorkCount[CurOpNumber]--;
2025
10.3M
    if (OpWorkCount[CurOpNumber] == 0)
2026
5.76M
      NumLeftToConsider--;
2027
10.3M
  }
2028
1.85M
2029
1.85M
  // If we've changed things around then replace token factor.
2030
1.85M
  if (Changed) {
2031
249k
    SDValue Result;
2032
249k
    if (Ops.empty()) {
2033
524
      // The entry token is the only possible outcome.
2034
524
      Result = DAG.getEntryNode();
2035
248k
    } else {
2036
248k
      if (DidPruneOps) {
2037
94.4k
        SmallVector<SDValue, 8> PrunedOps;
2038
94.4k
        //
2039
596k
        for (const SDValue &Op : Ops) {
2040
596k
          if (SeenChains.count(Op.getNode()) == 0)
2041
441k
            PrunedOps.push_back(Op);
2042
596k
        }
2043
94.4k
        Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2044
154k
      } else {
2045
154k
        Result = DAG.getTokenFactor(SDLoc(N), Ops);
2046
154k
      }
2047
248k
    }
2048
249k
    return Result;
2049
249k
  }
2050
1.60M
  return SDValue();
2051
1.60M
}
2052
2053
/// MERGE_VALUES can always be eliminated.
2054
65.3k
SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2055
65.3k
  WorklistRemover DeadNodes(*this);
2056
65.3k
  // Replacing results may cause a different MERGE_VALUES to suddenly
2057
65.3k
  // be CSE'd with N, and carry its uses with it. Iterate until no
2058
65.3k
  // uses remain, to ensure that the node can be safely deleted.
2059
65.3k
  // First add the users of this node to the work list so that they
2060
65.3k
  // can be tried again once they have new operands.
2061
65.3k
  AddUsersToWorklist(N);
2062
65.3k
  do {
2063
65.3k
    // Do as a single replacement to avoid rewalking use lists.
2064
65.3k
    SmallVector<SDValue, 8> Ops;
2065
217k
    for (unsigned i = 0, e = N->getNumOperands(); i != e; 
++i151k
)
2066
151k
      Ops.push_back(N->getOperand(i));
2067
65.3k
    DAG.ReplaceAllUsesWith(N, Ops.data());
2068
65.3k
  } while (!N->use_empty());
2069
65.3k
  deleteAndRecombine(N);
2070
65.3k
  return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2071
65.3k
}
2072
2073
/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2074
/// ConstantSDNode pointer else nullptr.
2075
2.72M
static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2076
2.72M
  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2077
2.72M
  return Const != nullptr && 
!Const->isOpaque()353k
?
Const351k
:
nullptr2.37M
;
2078
2.72M
}
2079
2080
6.75M
SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2081
6.75M
  assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2082
6.75M
         "Unexpected binary operator");
2083
6.75M
2084
6.75M
  // Don't do this unless the old select is going away. We want to eliminate the
2085
6.75M
  // binary operator, not replace a binop with a select.
2086
6.75M
  // TODO: Handle ISD::SELECT_CC.
2087
6.75M
  unsigned SelOpNo = 0;
2088
6.75M
  SDValue Sel = BO->getOperand(0);
2089
6.75M
  if (Sel.getOpcode() != ISD::SELECT || 
!Sel.hasOneUse()10.8k
) {
2090
6.74M
    SelOpNo = 1;
2091
6.74M
    Sel = BO->getOperand(1);
2092
6.74M
  }
2093
6.75M
2094
6.75M
  if (Sel.getOpcode() != ISD::SELECT || 
!Sel.hasOneUse()10.6k
)
2095
6.74M
    return SDValue();
2096
9.60k
2097
9.60k
  SDValue CT = Sel.getOperand(1);
2098
9.60k
  if (!isConstantOrConstantVector(CT, true) &&
2099
9.60k
      
!isConstantFPBuildVectorOrConstantFP(CT)4.32k
)
2100
4.23k
    return SDValue();
2101
5.37k
2102
5.37k
  SDValue CF = Sel.getOperand(2);
2103
5.37k
  if (!isConstantOrConstantVector(CF, true) &&
2104
5.37k
      
!isConstantFPBuildVectorOrConstantFP(CF)713
)
2105
653
    return SDValue();
2106
4.71k
2107
4.71k
  // Bail out if any constants are opaque because we can't constant fold those.
2108
4.71k
  // The exception is "and" and "or" with either 0 or -1 in which case we can
2109
4.71k
  // propagate non constant operands into select. I.e.:
2110
4.71k
  // and (select Cond, 0, -1), X --> select Cond, 0, X
2111
4.71k
  // or X, (select Cond, -1, 0) --> select Cond, -1, X
2112
4.71k
  auto BinOpcode = BO->getOpcode();
2113
4.71k
  bool CanFoldNonConst =
2114
4.71k
      (BinOpcode == ISD::AND || 
BinOpcode == ISD::OR4.47k
) &&
2115
4.71k
      
(792
isNullOrNullSplat(CT)792
||
isAllOnesOrAllOnesSplat(CT)685
) &&
2116
4.71k
      
(294
isNullOrNullSplat(CF)294
||
isAllOnesOrAllOnesSplat(CF)107
);
2117
4.71k
2118
4.71k
  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2119
4.71k
  if (!CanFoldNonConst &&
2120
4.71k
      
!isConstantOrConstantVector(CBO, true)4.51k
&&
2121
4.71k
      
!isConstantFPBuildVectorOrConstantFP(CBO)4.33k
)
2122
4.30k
    return SDValue();
2123
411
2124
411
  EVT VT = Sel.getValueType();
2125
411
2126
411
  // In case of shift value and shift amount may have different VT. For instance
2127
411
  // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2128
411
  // swapped operands and value types do not match. NB: x86 is fine if operands
2129
411
  // are not swapped with shift amount VT being not bigger than shifted value.
2130
411
  // TODO: that is possible to check for a shift operation, correct VTs and
2131
411
  // still perform optimization on x86 if needed.
2132
411
  if (SelOpNo && 
VT != CBO.getValueType()74
)
2133
12
    return SDValue();
2134
399
2135
399
  // We have a select-of-constants followed by a binary operator with a
2136
399
  // constant. Eliminate the binop by pulling the constant math into the select.
2137
399
  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2138
399
  SDLoc DL(Sel);
2139
399
  SDValue NewCT = SelOpNo ? 
DAG.getNode(BinOpcode, DL, VT, CBO, CT)62
2140
399
                          : 
DAG.getNode(BinOpcode, DL, VT, CT, CBO)337
;
2141
399
  if (!CanFoldNonConst && 
!NewCT.isUndef()194
&&
2142
399
      
!isConstantOrConstantVector(NewCT, true)194
&&
2143
399
      
!isConstantFPBuildVectorOrConstantFP(NewCT)31
)
2144
0
    return SDValue();
2145
399
2146
399
  SDValue NewCF = SelOpNo ? 
DAG.getNode(BinOpcode, DL, VT, CBO, CF)62
2147
399
                          : 
DAG.getNode(BinOpcode, DL, VT, CF, CBO)337
;
2148
399
  if (!CanFoldNonConst && 
!NewCF.isUndef()194
&&
2149
399
      
!isConstantOrConstantVector(NewCF, true)194
&&
2150
399
      
!isConstantFPBuildVectorOrConstantFP(NewCF)31
)
2151
0
    return SDValue();
2152
399
2153
399
  SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2154
399
  SelectOp->setFlags(BO->getFlags());
2155
399
  return SelectOp;
2156
399
}
2157
2158
3.68M
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2159
3.68M
  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2160
3.68M
         "Expecting add or sub");
2161
3.68M
2162
3.68M
  // Match a constant operand and a zext operand for the math instruction:
2163
3.68M
  // add Z, C
2164
3.68M
  // sub C, Z
2165
3.68M
  bool IsAdd = N->getOpcode() == ISD::ADD;
2166
3.68M
  SDValue C = IsAdd ? 
N->getOperand(1)3.52M
:
N->getOperand(0)159k
;
2167
3.68M
  SDValue Z = IsAdd ? 
N->getOperand(0)3.52M
:
N->getOperand(1)159k
;
2168
3.68M
  auto *CN = dyn_cast<ConstantSDNode>(C);
2169
3.68M
  if (!CN || 
Z.getOpcode() != ISD::ZERO_EXTEND2.87M
)
2170
3.68M
    return SDValue();
2171
3.29k
2172
3.29k
  // Match the zext operand as a setcc of a boolean.
2173
3.29k
  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2174
3.29k
      
Z.getOperand(0).getValueType() != MVT::i1751
)
2175
2.96k
    return SDValue();
2176
331
2177
331
  // Match the compare as: setcc (X & 1), 0, eq.
2178
331
  SDValue SetCC = Z.getOperand(0);
2179
331
  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2180
331
  if (CC != ISD::SETEQ || 
!isNullConstant(SetCC.getOperand(1))134
||
2181
331
      
SetCC.getOperand(0).getOpcode() != ISD::AND100
||
2182
331
      
!isOneConstant(SetCC.getOperand(0).getOperand(1))52
)
2183
295
    return SDValue();
2184
36
2185
36
  // We are adding/subtracting a constant and an inverted low bit. Turn that
2186
36
  // into a subtract/add of the low bit with incremented/decremented constant:
2187
36
  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2188
36
  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2189
36
  EVT VT = C.getValueType();
2190
36
  SDLoc DL(N);
2191
36
  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2192
36
  SDValue C1 = IsAdd ? 
DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)18
:
2193
36
                       
DAG.getConstant(CN->getAPIntValue() - 1, DL, VT)18
;
2194
36
  return DAG.getNode(IsAdd ? 
ISD::SUB18
:
ISD::ADD18
, DL, VT, C1, LowBit);
2195
36
}
2196
2197
/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2198
/// a shift and add with a different constant.
2199
3.68M
static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2200
3.68M
  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2201
3.68M
         "Expecting add or sub");
2202
3.68M
2203
3.68M
  // We need a constant operand for the add/sub, and the other operand is a
2204
3.68M
  // logical shift right: add (srl), C or sub C, (srl).
2205
3.68M
  // TODO - support non-uniform vector amounts.
2206
3.68M
  bool IsAdd = N->getOpcode() == ISD::ADD;
2207
3.68M
  SDValue ConstantOp = IsAdd ? 
N->getOperand(1)3.52M
:
N->getOperand(0)159k
;
2208
3.68M
  SDValue ShiftOp = IsAdd ? 
N->getOperand(0)3.52M
:
N->getOperand(1)159k
;
2209
3.68M
  ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2210
3.68M
  if (!C || 
ShiftOp.getOpcode() != ISD::SRL2.93M
)
2211
3.68M
    return SDValue();
2212
5.91k
2213
5.91k
  // The shift must be of a 'not' value.
2214
5.91k
  SDValue Not = ShiftOp.getOperand(0);
2215
5.91k
  if (!Not.hasOneUse() || 
!isBitwiseNot(Not)3.09k
)
2216
5.89k
    return SDValue();
2217
20
2218
20
  // The shift must be moving the sign bit to the least-significant-bit.
2219
20
  EVT VT = ShiftOp.getValueType();
2220
20
  SDValue ShAmt = ShiftOp.getOperand(1);
2221
20
  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2222
20
  if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2223
0
    return SDValue();
2224
20
2225
20
  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2226
20
  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2227
20
  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2228
20
  SDLoc DL(N);
2229
20
  auto ShOpcode = IsAdd ? 
ISD::SRA10
:
ISD::SRL10
;
2230
20
  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2231
20
  APInt NewC = IsAdd ? 
C->getAPIntValue() + 110
:
C->getAPIntValue() - 110
;
2232
20
  return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2233
20
}
2234
2235
/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2236
/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2237
/// are no common bits set in the operands).
2238
4.16M
SDValue DAGCombiner::visitADDLike(SDNode *N) {
2239
4.16M
  SDValue N0 = N->getOperand(0);
2240
4.16M
  SDValue N1 = N->getOperand(1);
2241
4.16M
  EVT VT = N0.getValueType();
2242
4.16M
  SDLoc DL(N);
2243
4.16M
2244
4.16M
  // fold vector ops
2245
4.16M
  if (VT.isVector()) {
2246
149k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
2247
48
      return FoldedVOp;
2248
149k
2249
149k
    // fold (add x, 0) -> x, vector edition
2250
149k
    if (ISD::isBuildVectorAllZeros(N1.getNode()))
2251
309
      return N0;
2252
149k
    if (ISD::isBuildVectorAllZeros(N0.getNode()))
2253
238
      return N1;
2254
4.16M
  }
2255
4.16M
2256
4.16M
  // fold (add x, undef) -> undef
2257
4.16M
  if (N0.isUndef())
2258
3
    return N0;
2259
4.16M
2260
4.16M
  if (N1.isUndef())
2261
158
    return N1;
2262
4.16M
2263
4.16M
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2264
4.00k
    // canonicalize constant to RHS
2265
4.00k
    if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2266
1.01k
      return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2267
2.99k
    // fold (add c1, c2) -> c1+c2
2268
2.99k
    return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2269
2.99k
                                      N1.getNode());
2270
2.99k
  }
2271
4.15M
2272
4.15M
  // fold (add x, 0) -> x
2273
4.15M
  if (isNullConstant(N1))
2274
546
    return N0;
2275
4.15M
2276
4.15M
  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2277
3.41M
    // fold ((A-c1)+c2) -> (A+(c2-c1))
2278
3.41M
    if (N0.getOpcode() == ISD::SUB &&
2279
3.41M
        
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)14.9k
) {
2280
7.43k
      SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
2281
7.43k
                                               N0.getOperand(1).getNode());
2282
7.43k
      assert(Sub && "Constant folding failed");
2283
7.43k
      return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2284
7.43k
    }
2285
3.40M
2286
3.40M
    // fold ((c1-A)+c2) -> (c1+c2)-A
2287
3.40M
    if (N0.getOpcode() == ISD::SUB &&
2288
3.40M
        
isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)7.56k
) {
2289
287
      SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
2290
287
                                               N0.getOperand(0).getNode());
2291
287
      assert(Add && "Constant folding failed");
2292
287
      return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2293
287
    }
2294
3.40M
2295
3.40M
    // add (sext i1 X), 1 -> zext (not i1 X)
2296
3.40M
    // We don't transform this pattern:
2297
3.40M
    //   add (zext i1 X), -1 -> sext (not i1 X)
2298
3.40M
    // because most (?) targets generate better code for the zext form.
2299
3.40M
    if (N0.getOpcode() == ISD::SIGN_EXTEND && 
N0.hasOneUse()6.14k
&&
2300
3.40M
        
isOneOrOneSplat(N1)1.10k
) {
2301
252
      SDValue X = N0.getOperand(0);
2302
252
      if ((!LegalOperations ||
2303
252
           
(46
TLI.isOperationLegal(ISD::XOR, X.getValueType())46
&&
2304
46
            
TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)18
)) &&
2305
252
          
X.getScalarValueSizeInBits() == 1224
) {
2306
17
        SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2307
17
        return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2308
17
      }
2309
3.40M
    }
2310
3.40M
2311
3.40M
    // Undo the add -> or combine to merge constant offsets from a frame index.
2312
3.40M
    if (N0.getOpcode() == ISD::OR &&
2313
3.40M
        
isa<FrameIndexSDNode>(N0.getOperand(0))6.65k
&&
2314
3.40M
        
isa<ConstantSDNode>(N0.getOperand(1))4.78k
&&
2315
3.40M
        
DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))4.77k
) {
2316
4.77k
      SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2317
4.77k
      return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2318
4.77k
    }
2319
4.14M
  }
2320
4.14M
2321
4.14M
  if (SDValue NewSel = foldBinOpIntoSelect(N))
2322
8
    return NewSel;
2323
4.14M
2324
4.14M
  // reassociate add
2325
4.14M
  if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2326
4.13M
    if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2327
516k
      return RADD;
2328
3.62M
  }
2329
3.62M
  // fold ((0-A) + B) -> B-A
2330
3.62M
  if (N0.getOpcode() == ISD::SUB && 
isNullOrNullSplat(N0.getOperand(0))13.1k
)
2331
198
    return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2332
3.62M
2333
3.62M
  // fold (A + (0-B)) -> A-B
2334
3.62M
  if (N1.getOpcode() == ISD::SUB && 
isNullOrNullSplat(N1.getOperand(0))6.38k
)
2335
1.84k
    return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2336
3.62M
2337
3.62M
  // fold (A+(B-A)) -> B
2338
3.62M
  if (N1.getOpcode() == ISD::SUB && 
N0 == N1.getOperand(1)4.53k
)
2339
17
    return N1.getOperand(0);
2340
3.62M
2341
3.62M
  // fold ((B-A)+A) -> B
2342
3.62M
  if (N0.getOpcode() == ISD::SUB && 
N1 == N0.getOperand(1)12.9k
)
2343
11
    return N0.getOperand(0);
2344
3.62M
2345
3.62M
  // fold ((A-B)+(C-A)) -> (C-B)
2346
3.62M
  if (N0.getOpcode() == ISD::SUB && 
N1.getOpcode() == ISD::SUB12.9k
&&
2347
3.62M
      
N0.getOperand(0) == N1.getOperand(1)353
)
2348
10
    return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2349
10
                       N0.getOperand(1));
2350
3.62M
2351
3.62M
  // fold ((A-B)+(B-C)) -> (A-C)
2352
3.62M
  if (N0.getOpcode() == ISD::SUB && 
N1.getOpcode() == ISD::SUB12.9k
&&
2353
3.62M
      
N0.getOperand(1) == N1.getOperand(0)343
)
2354
12
    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2355
12
                       N1.getOperand(1));
2356
3.62M
2357
3.62M
  // fold (A+(B-(A+C))) to (B-C)
2358
3.62M
  if (N1.getOpcode() == ISD::SUB && 
N1.getOperand(1).getOpcode() == ISD::ADD4.49k
&&
2359
3.62M
      
N0 == N1.getOperand(1).getOperand(0)199
)
2360
29
    return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2361
29
                       N1.getOperand(1).getOperand(1));
2362
3.62M
2363
3.62M
  // fold (A+(B-(C+A))) to (B-C)
2364
3.62M
  if (N1.getOpcode() == ISD::SUB && 
N1.getOperand(1).getOpcode() == ISD::ADD4.47k
&&
2365
3.62M
      
N0 == N1.getOperand(1).getOperand(1)170
)
2366
2
    return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2367
2
                       N1.getOperand(1).getOperand(0));
2368
3.62M
2369
3.62M
  // fold (A+((B-A)+or-C)) to (B+or-C)
2370
3.62M
  if ((N1.getOpcode() == ISD::SUB || 
N1.getOpcode() == ISD::ADD3.62M
) &&
2371
3.62M
      
N1.getOperand(0).getOpcode() == ISD::SUB23.4k
&&
2372
3.62M
      
N0 == N1.getOperand(0).getOperand(1)467
)
2373
17
    return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2374
17
                       N1.getOperand(1));
2375
3.62M
2376
3.62M
  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2377
3.62M
  if (N0.getOpcode() == ISD::SUB && 
N1.getOpcode() == ISD::SUB12.8k
) {
2378
331
    SDValue N00 = N0.getOperand(0);
2379
331
    SDValue N01 = N0.getOperand(1);
2380
331
    SDValue N10 = N1.getOperand(0);
2381
331
    SDValue N11 = N1.getOperand(1);
2382
331
2383
331
    if (isConstantOrConstantVector(N00) || 
isConstantOrConstantVector(N10)323
)
2384
10
      return DAG.getNode(ISD::SUB, DL, VT,
2385
10
                         DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2386
10
                         DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2387
3.62M
  }
2388
3.62M
2389
3.62M
  // fold (add (umax X, C), -C) --> (usubsat X, C)
2390
3.62M
  if (N0.getOpcode() == ISD::UMAX && 
hasOperation(ISD::USUBSAT, VT)164
) {
2391
2.18k
    auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2392
2.18k
      return (!Max && 
!Op28
) ||
2393
2.18k
             
(2.15k
Max2.15k
&&
Op2.15k
&&
Max->getAPIntValue() == (-Op->getAPIntValue())2.15k
);
2394
2.18k
    };
2395
132
    if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2396
132
                                  /*AllowUndefs*/ true))
2397
132
      return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2398
132
                         N0.getOperand(1));
2399
3.62M
  }
2400
3.62M
2401
3.62M
  if (SimplifyDemandedBits(SDValue(N, 0)))
2402
6.75k
    return SDValue(N, 0);
2403
3.61M
2404
3.61M
  if (isOneOrOneSplat(N1)) {
2405
148k
    // fold (add (xor a, -1), 1) -> (sub 0, a)
2406
148k
    if (isBitwiseNot(N0))
2407
9
      return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2408
9
                         N0.getOperand(0));
2409
148k
2410
148k
    // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2411
148k
    if (N0.getOpcode() == ISD::ADD ||
2412
148k
        
N0.getOpcode() == ISD::UADDO141k
||
2413
148k
        
N0.getOpcode() == ISD::SADDO141k
) {
2414
7.12k
      SDValue A, Xor;
2415
7.12k
2416
7.12k
      if (isBitwiseNot(N0.getOperand(0))) {
2417
7
        A = N0.getOperand(1);
2418
7
        Xor = N0.getOperand(0);
2419
7.11k
      } else if (isBitwiseNot(N0.getOperand(1))) {
2420
3
        A = N0.getOperand(0);
2421
3
        Xor = N0.getOperand(1);
2422
3
      }
2423
7.12k
2424
7.12k
      if (Xor)
2425
10
        return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2426
148k
    }
2427
148k
2428
148k
    // Look for:
2429
148k
    //   add (add x, y), 1
2430
148k
    // And if the target does not like this form then turn into:
2431
148k
    //   sub y, (xor x, -1)
2432
148k
    if (!TLI.preferIncOfAddToSubOfNot(VT) && 
N0.hasOneUse()13.1k
&&
2433
148k
        
N0.getOpcode() == ISD::ADD621
) {
2434
168
      SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2435
168
                                DAG.getAllOnesConstant(DL, VT));
2436
168
      return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2437
168
    }
2438
3.61M
  }
2439
3.61M
2440
3.61M
  // (x - y) + -1  ->  add (xor y, -1), x
2441
3.61M
  if (N0.hasOneUse() && 
N0.getOpcode() == ISD::SUB1.53M
&&
2442
3.61M
      
isAllOnesOrAllOnesSplat(N1)10.8k
) {
2443
208
    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2444
208
    return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2445
208
  }
2446
3.61M
2447
3.61M
  if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2448
2.14k
    return Combined;
2449
3.61M
2450
3.61M
  if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2451
1.43k
    return Combined;
2452
3.61M
2453
3.61M
  return SDValue();
2454
3.61M
}
2455
2456
4.07M
SDValue DAGCombiner::visitADD(SDNode *N) {
2457
4.07M
  SDValue N0 = N->getOperand(0);
2458
4.07M
  SDValue N1 = N->getOperand(1);
2459
4.07M
  EVT VT = N0.getValueType();
2460
4.07M
  SDLoc DL(N);
2461
4.07M
2462
4.07M
  if (SDValue Combined = visitADDLike(N))
2463
544k
    return Combined;
2464
3.52M
2465
3.52M
  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2466
18
    return V;
2467
3.52M
2468
3.52M
  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2469
10
    return V;
2470
3.52M
2471
3.52M
  // fold (a+b) -> (a|b) iff a and b share no bits.
2472
3.52M
  if ((!LegalOperations || 
TLI.isOperationLegal(ISD::OR, VT)1.47M
) &&
2473
3.52M
      
DAG.haveNoCommonBitsSet(N0, N1)3.47M
)
2474
26.5k
    return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2475
3.50M
2476
3.50M
  return SDValue();
2477
3.50M
}
2478
2479
3.23k
SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2480
3.23k
  unsigned Opcode = N->getOpcode();
2481
3.23k
  SDValue N0 = N->getOperand(0);
2482
3.23k
  SDValue N1 = N->getOperand(1);
2483
3.23k
  EVT VT = N0.getValueType();
2484
3.23k
  SDLoc DL(N);
2485
3.23k
2486
3.23k
  // fold vector ops
2487
3.23k
  if (VT.isVector()) {
2488
3.06k
    // TODO SimplifyVBinOp
2489
3.06k
2490
3.06k
    // fold (add_sat x, 0) -> x, vector edition
2491
3.06k
    if (ISD::isBuildVectorAllZeros(N1.getNode()))
2492
15
      return N0;
2493
3.04k
    if (ISD::isBuildVectorAllZeros(N0.getNode()))
2494
0
      return N1;
2495
3.21k
  }
2496
3.21k
2497
3.21k
  // fold (add_sat x, undef) -> -1
2498
3.21k
  if (N0.isUndef() || N1.isUndef())
2499
0
    return DAG.getAllOnesConstant(DL, VT);
2500
3.21k
2501
3.21k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2502
14
    // canonicalize constant to RHS
2503
14
    if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2504
14
      return DAG.getNode(Opcode, DL, VT, N1, N0);
2505
0
    // fold (add_sat c1, c2) -> c3
2506
0
    return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2507
0
                                      N1.getNode());
2508
0
  }
2509
3.20k
2510
3.20k
  // fold (add_sat x, 0) -> x
2511
3.20k
  if (isNullConstant(N1))
2512
14
    return N0;
2513
3.18k
2514
3.18k
  // If it cannot overflow, transform into an add.
2515
3.18k
  if (Opcode == ISD::UADDSAT)
2516
2.03k
    if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2517
14
      return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2518
3.17k
2519
3.17k
  return SDValue();
2520
3.17k
}
2521
2522
2.53M
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2523
2.53M
  bool Masked = false;
2524
2.53M
2525
2.53M
  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2526
2.55M
  while (true) {
2527
2.55M
    if (V.getOpcode() == ISD::TRUNCATE || 
V.getOpcode() == ISD::ZERO_EXTEND2.54M
) {
2528
20.2k
      V = V.getOperand(0);
2529
20.2k
      continue;
2530
20.2k
    }
2531
2.53M
2532
2.53M
    if (V.getOpcode() == ISD::AND && 
isOneConstant(V.getOperand(1))29.6k
) {
2533
1.52k
      Masked = true;
2534
1.52k
      V = V.getOperand(0);
2535
1.52k
      continue;
2536
1.52k
    }
2537
2.53M
2538
2.53M
    break;
2539
2.53M
  }
2540
2.53M
2541
2.53M
  // If this is not a carry, return.
2542
2.53M
  if (V.getResNo() != 1)
2543
2.51M
    return SDValue();
2544
14.0k
2545
14.0k
  if (V.getOpcode() != ISD::ADDCARRY && 
V.getOpcode() != ISD::SUBCARRY13.7k
&&
2546
14.0k
      
V.getOpcode() != ISD::UADDO13.7k
&&
V.getOpcode() != ISD::USUBO13.0k
)
2547
13.0k
    return SDValue();
2548
1.02k
2549
1.02k
  EVT VT = V.getNode()->getValueType(0);
2550
1.02k
  if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2551
1
    return SDValue();
2552
1.01k
2553
1.01k
  // If the result is masked, then no matter what kind of bool it is we can
2554
1.01k
  // return. If it isn't, then we need to make sure the bool type is either 0 or
2555
1.01k
  // 1 and not other values.
2556
1.01k
  if (Masked ||
2557
1.01k
      TLI.getBooleanContents(V.getValueType()) ==
2558
539
          TargetLoweringBase::ZeroOrOneBooleanContent)
2559
1.01k
    return V;
2560
0
2561
0
  return SDValue();
2562
0
}
2563
2564
/// Given the operands of an add/sub operation, see if the 2nd operand is a
2565
/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2566
/// the opcode and bypass the mask operation.
2567
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2568
7.39M
                                 SelectionDAG &DAG, const SDLoc &DL) {
2569
7.39M
  if (N1.getOpcode() != ISD::AND || 
!isOneOrOneSplat(N1->getOperand(1))64.2k
)
2570
7.38M
    return SDValue();
2571
4.82k
2572
4.82k
  EVT VT = N0.getValueType();
2573
4.82k
  if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2574
4.67k
    return SDValue();
2575
147
2576
147
  // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2577
147
  // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2578
147
  return DAG.getNode(IsAdd ? 
ISD::SUB142
:
ISD::ADD5
, DL, VT, N0, N1.getOperand(0));
2579
147
}
2580
2581
/// Helper for doing combines based on N0 and N1 being added to each other.
2582
SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2583
7.23M
                                          SDNode *LocReference) {
2584
7.23M
  EVT VT = N0.getValueType();
2585
7.23M
  SDLoc DL(LocReference);
2586
7.23M
2587
7.23M
  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2588
7.23M
  if (N1.getOpcode() == ISD::SHL && 
N1.getOperand(0).getOpcode() == ISD::SUB239k
&&
2589
7.23M
      
isNullOrNullSplat(N1.getOperand(0).getOperand(0))2.36k
)
2590
531
    return DAG.getNode(ISD::SUB, DL, VT, N0,
2591
531
                       DAG.getNode(ISD::SHL, DL, VT,
2592
531
                                   N1.getOperand(0).getOperand(1),
2593
531
                                   N1.getOperand(1)));
2594
7.23M
2595
7.23M
  if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2596
142
    return V;
2597
7.23M
2598
7.23M
  // Look for:
2599
7.23M
  //   add (add x, 1), y
2600
7.23M
  // And if the target does not like this form then turn into:
2601
7.23M
  //   sub y, (xor x, -1)
2602
7.23M
  if (!TLI.preferIncOfAddToSubOfNot(VT) && 
N0.hasOneUse()205k
&&
2603
7.23M
      
N0.getOpcode() == ISD::ADD102k
&&
isOneOrOneSplat(N0.getOperand(1))3.33k
) {
2604
0
    SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2605
0
                              DAG.getAllOnesConstant(DL, VT));
2606
0
    return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2607
0
  }
2608
7.23M
2609
7.23M
  // Hoist one-use subtraction by non-opaque constant:
2610
7.23M
  //   (x - C) + y  ->  (x + y) - C
2611
7.23M
  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2612
7.23M
  if (N0.hasOneUse() && 
N0.getOpcode() == ISD::SUB4.38M
&&
2613
7.23M
      
isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)14.1k
) {
2614
218
    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2615
218
    return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2616
218
  }
2617
7.23M
  // Hoist one-use subtraction from non-opaque constant:
2618
7.23M
  //   (C - x) + y  ->  (y - x) + C
2619
7.23M
  if (N0.hasOneUse() && 
N0.getOpcode() == ISD::SUB4.37M
&&
2620
7.23M
      
isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)13.9k
) {
2621
1.79k
    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2622
1.79k
    return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2623
1.79k
  }
2624
7.23M
2625
7.23M
  // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2626
7.23M
  // rather than 'add 0/-1' (the zext should get folded).
2627
7.23M
  // add (sext i1 Y), X --> sub X, (zext i1 Y)
2628
7.23M
  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2629
7.23M
      
N0.getOperand(0).getScalarValueSizeInBits() == 111.6k
&&
2630
7.23M
      
TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent281
) {
2631
218
    SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2632
218
    return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2633
218
  }
2634
7.23M
2635
7.23M
  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2636
7.23M
  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2637
1.64k
    VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2638
1.64k
    if (TN->getVT() == MVT::i1) {
2639
642
      SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2640
642
                                 DAG.getConstant(1, DL, VT));
2641
642
      return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2642
642
    }
2643
7.22M
  }
2644
7.22M
2645
7.22M
  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2646
7.22M
  if (N1.getOpcode() == ISD::ADDCARRY && 
isNullConstant(N1.getOperand(1))71
&&
2647
7.22M
      
N1.getResNo() == 025
)
2648
24
    return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2649
24
                       N0, N1.getOperand(0), N1.getOperand(2));
2650
7.22M
2651
7.22M
  // (add X, Carry) -> (addcarry X, 0, Carry)
2652
7.22M
  if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2653
2.43M
    if (SDValue Carry = getAsCarry(TLI, N1))
2654
13
      return DAG.getNode(ISD::ADDCARRY, DL,
2655
13
                         DAG.getVTList(VT, Carry.getValueType()), N0,
2656
13
                         DAG.getConstant(0, DL, VT), Carry);
2657
7.22M
2658
7.22M
  return SDValue();
2659
7.22M
}
2660
2661
625
SDValue DAGCombiner::visitADDC(SDNode *N) {
2662
625
  SDValue N0 = N->getOperand(0);
2663
625
  SDValue N1 = N->getOperand(1);
2664
625
  EVT VT = N0.getValueType();
2665
625
  SDLoc DL(N);
2666
625
2667
625
  // If the flag result is dead, turn this into an ADD.
2668
625
  if (!N->hasAnyUseOfValue(1))
2669
25
    return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2670
25
                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2671
600
2672
600
  // canonicalize constant to RHS.
2673
600
  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2674
600
  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2675
600
  if (N0C && 
!N1C13
)
2676
0
    return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2677
600
2678
600
  // fold (addc x, 0) -> x + no carry out
2679
600
  if (isNullConstant(N1))
2680
13
    return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2681
13
                                        DL, MVT::Glue));
2682
587
2683
587
  // If it cannot overflow, transform into an add.
2684
587
  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2685
17
    return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2686
17
                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2687
570
2688
570
  return SDValue();
2689
570
}
2690
2691
static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2692
15
                           SelectionDAG &DAG, const TargetLowering &TLI) {
2693
15
  EVT VT = V.getValueType();
2694
15
2695
15
  SDValue Cst;
2696
15
  switch (TLI.getBooleanContents(VT)) {
2697
15
  case TargetLowering::ZeroOrOneBooleanContent:
2698
13
  case TargetLowering::UndefinedBooleanContent:
2699
13
    Cst = DAG.getConstant(1, DL, VT);
2700
13
    break;
2701
13
  case TargetLowering::ZeroOrNegativeOneBooleanContent:
2702
2
    Cst = DAG.getAllOnesConstant(DL, VT);
2703
2
    break;
2704
15
  }
2705
15
2706
15
  return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2707
15
}
2708
2709
/**
2710
 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2711
 * then the flip also occurs if computing the inverse is the same cost.
2712
 * This function returns an empty SDValue in case it cannot flip the boolean
2713
 * without increasing the cost of the computation. If you want to flip a boolean
2714
 * no matter what, use flipBoolean.
2715
 */
2716
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2717
                                  const TargetLowering &TLI,
2718
130k
                                  bool Force) {
2719
130k
  if (Force && 
isa<ConstantSDNode>(V)38
)
2720
2
    return flipBoolean(V, SDLoc(V), DAG, TLI);
2721
130k
2722
130k
  if (V.getOpcode() != ISD::XOR)
2723
129k
    return SDValue();
2724
573
2725
573
  ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2726
573
  if (!Const)
2727
330
    return SDValue();
2728
243
2729
243
  EVT VT = V.getValueType();
2730
243
2731
243
  bool IsFlip = false;
2732
243
  switch(TLI.getBooleanContents(VT)) {
2733
243
    case TargetLowering::ZeroOrOneBooleanContent:
2734
25
      IsFlip = Const->isOne();
2735
25
      break;
2736
243
    case TargetLowering::ZeroOrNegativeOneBooleanContent:
2737
215
      IsFlip = Const->isAllOnesValue();
2738
215
      break;
2739
243
    case TargetLowering::UndefinedBooleanContent:
2740
3
      IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2741
3
      break;
2742
243
  }
2743
243
2744
243
  if (IsFlip)
2745
243
    return V.getOperand(0);
2746
0
  if (Force)
2747
0
    return flipBoolean(V, SDLoc(V), DAG, TLI);
2748
0
  return SDValue();
2749
0
}
2750
2751
14.2k
SDValue DAGCombiner::visitADDO(SDNode *N) {
2752
14.2k
  SDValue N0 = N->getOperand(0);
2753
14.2k
  SDValue N1 = N->getOperand(1);
2754
14.2k
  EVT VT = N0.getValueType();
2755
14.2k
  bool IsSigned = (ISD::SADDO == N->getOpcode());
2756
14.2k
2757
14.2k
  EVT CarryVT = N->getValueType(1);
2758
14.2k
  SDLoc DL(N);
2759
14.2k
2760
14.2k
  // If the flag result is dead, turn this into an ADD.
2761
14.2k
  if (!N->hasAnyUseOfValue(1))
2762
612
    return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2763
612
                     DAG.getUNDEF(CarryVT));
2764
13.6k
2765
13.6k
  // canonicalize constant to RHS.
2766
13.6k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2767
13.6k
      
!DAG.isConstantIntBuildVectorOrConstantInt(N1)95
)
2768
8
    return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2769
13.6k
2770
13.6k
  // fold (addo x, 0) -> x + no carry out
2771
13.6k
  if (isNullOrNullSplat(N1))
2772
639
    return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2773
12.9k
2774
12.9k
  if (!IsSigned) {
2775
11.7k
    // If it cannot overflow, transform into an add.
2776
11.7k
    if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2777
196
      return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2778
196
                       DAG.getConstant(0, DL, CarryVT));
2779
11.5k
2780
11.5k
    // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2781
11.5k
    if (isBitwiseNot(N0) && 
isOneOrOneSplat(N1)27
) {
2782
8
      SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2783
8
                                DAG.getConstant(0, DL, VT), N0.getOperand(0));
2784
8
      return CombineTo(N, Sub,
2785
8
                       flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2786
8
    }
2787
11.5k
2788
11.5k
    if (SDValue Combined = visitUADDOLike(N0, N1, N))
2789
273
      return Combined;
2790
11.2k
2791
11.2k
    if (SDValue Combined = visitUADDOLike(N1, N0, N))
2792
10
      return Combined;
2793
12.4k
  }
2794
12.4k
2795
12.4k
  return SDValue();
2796
12.4k
}
2797
2798
22.8k
SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2799
22.8k
  EVT VT = N0.getValueType();
2800
22.8k
  if (VT.isVector())
2801
370
    return SDValue();
2802
22.4k
2803
22.4k
  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2804
22.4k
  // If Y + 1 cannot overflow.
2805
22.4k
  if (N1.getOpcode() == ISD::ADDCARRY && 
isNullConstant(N1.getOperand(1))759
) {
2806
406
    SDValue Y = N1.getOperand(0);
2807
406
    SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2808
406
    if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2809
283
      return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2810
283
                         N1.getOperand(2));
2811
22.1k
  }
2812
22.1k
2813
22.1k
  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2814
22.1k
  if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2815
16.3k
    if (SDValue Carry = getAsCarry(TLI, N1))
2816
0
      return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2817
0
                         DAG.getConstant(0, SDLoc(N), VT), Carry);
2818
22.1k
2819
22.1k
  return SDValue();
2820
22.1k
}
2821
2822
2.78k
SDValue DAGCombiner::visitADDE(SDNode *N) {
2823
2.78k
  SDValue N0 = N->getOperand(0);
2824
2.78k
  SDValue N1 = N->getOperand(1);
2825
2.78k
  SDValue CarryIn = N->getOperand(2);
2826
2.78k
2827
2.78k
  // canonicalize constant to RHS
2828
2.78k
  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2829
2.78k
  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2830
2.78k
  if (N0C && 
!N1C54
)
2831
9
    return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2832
9
                       N1, N0, CarryIn);
2833
2.77k
2834
2.77k
  // fold (adde x, y, false) -> (addc x, y)
2835
2.77k
  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2836
30
    return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2837
2.74k
2838
2.74k
  return SDValue();
2839
2.74k
}
2840
2841
41.6k
SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2842
41.6k
  SDValue N0 = N->getOperand(0);
2843
41.6k
  SDValue N1 = N->getOperand(1);
2844
41.6k
  SDValue CarryIn = N->getOperand(2);
2845
41.6k
  SDLoc DL(N);
2846
41.6k
2847
41.6k
  // canonicalize constant to RHS
2848
41.6k
  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2849
41.6k
  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2850
41.6k
  if (N0C && 
!N1C1.28k
)
2851
478
    return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2852
41.1k
2853
41.1k
  // fold (addcarry x, y, false) -> (uaddo x, y)
2854
41.1k
  if (isNullConstant(CarryIn)) {
2855
915
    if (!LegalOperations ||
2856
915
        
TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0))92
)
2857
915
      return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2858
40.2k
  }
2859
40.2k
2860
40.2k
  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2861
40.2k
  if (isNullConstant(N0) && 
isNullConstant(N1)742
) {
2862
608
    EVT VT = N0.getValueType();
2863
608
    EVT CarryVT = CarryIn.getValueType();
2864
608
    SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2865
608
    AddToWorklist(CarryExt.getNode());
2866
608
    return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2867
608
                                    DAG.getConstant(1, DL, VT)),
2868
608
                     DAG.getConstant(0, DL, CarryVT));
2869
608
  }
2870
39.6k
2871
39.6k
  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2872
272
    return Combined;
2873
39.3k
2874
39.3k
  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2875
2
    return Combined;
2876
39.3k
2877
39.3k
  return SDValue();
2878
39.3k
}
2879
2880
/**
2881
 * If we are facing some sort of diamond carry propapagtion pattern try to
2882
 * break it up to generate something like:
2883
 *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2884
 *
2885
 * The end result is usually an increase in operation required, but because the
2886
 * carry is now linearized, other tranforms can kick in and optimize the DAG.
2887
 *
2888
 * Patterns typically look something like
2889
 *            (uaddo A, B)
2890
 *             /       \
2891
 *          Carry      Sum
2892
 *            |          \
2893
 *            | (addcarry *, 0, Z)
2894
 *            |       /
2895
 *             \   Carry
2896
 *              |   /
2897
 * (addcarry X, *, *)
2898
 *
2899
 * But numerous variation exist. Our goal is to identify A, B, X and Z and
2900
 * produce a combine with a single path for carry propagation.
2901
 */
2902
static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2903
                                      SDValue X, SDValue Carry0, SDValue Carry1,
2904
2.01k
                                      SDNode *N) {
2905
2.01k
  if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2906
0
    return SDValue();
2907
2.01k
  if (Carry1.getOpcode() != ISD::UADDO)
2908
585
    return SDValue();
2909
1.42k
2910
1.42k
  SDValue Z;
2911
1.42k
2912
1.42k
  /**
2913
1.42k
   * First look for a suitable Z. It will present itself in the form of
2914
1.42k
   * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2915
1.42k
   */
2916
1.42k
  if (Carry0.getOpcode() == ISD::ADDCARRY &&
2917
1.42k
      
isNullConstant(Carry0.getOperand(1))13
) {
2918
4
    Z = Carry0.getOperand(2);
2919
1.42k
  } else if (Carry0.getOpcode() == ISD::UADDO &&
2920
1.42k
             
isOneConstant(Carry0.getOperand(1))1.41k
) {
2921
3
    EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2922
3
    Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2923
1.41k
  } else {
2924
1.41k
    // We couldn't find a suitable Z.
2925
1.41k
    return SDValue();
2926
1.41k
  }
2927
7
2928
7
2929
7
  auto cancelDiamond = [&](SDValue A,SDValue B) {
2930
5
    SDLoc DL(N);
2931
5
    SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2932
5
    Combiner.AddToWorklist(NewY.getNode());
2933
5
    return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2934
5
                       DAG.getConstant(0, DL, X.getValueType()),
2935
5
                       NewY.getValue(1));
2936
5
  };
2937
7
2938
7
  /**
2939
7
   *      (uaddo A, B)
2940
7
   *           |
2941
7
   *          Sum
2942
7
   *           |
2943
7
   * (addcarry *, 0, Z)
2944
7
   */
2945
7
  if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2946
4
    return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2947
4
  }
2948
3
2949
3
  /**
2950
3
   * (addcarry A, 0, Z)
2951
3
   *         |
2952
3
   *        Sum
2953
3
   *         |
2954
3
   *  (uaddo *, B)
2955
3
   */
2956
3
  if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2957
0
    return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2958
0
  }
2959
3
2960
3
  if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2961
1
    return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2962
1
  }
2963
2
2964
2
  return SDValue();
2965
2
}
2966
2967
SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2968
79.0k
                                       SDNode *N) {
2969
79.0k
  // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
2970
79.0k
  if (isBitwiseNot(N0))
2971
38
    if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
2972
5
      SDLoc DL(N);
2973
5
      SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
2974
5
                                N0.getOperand(0), NotC);
2975
5
      return CombineTo(N, Sub,
2976
5
                       flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2977
5
    }
2978
79.0k
2979
79.0k
  // Iff the flag result is dead:
2980
79.0k
  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2981
79.0k
  // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
2982
79.0k
  // or the dependency between the instructions.
2983
79.0k
  if ((N0.getOpcode() == ISD::ADD ||
2984
79.0k
       
(77.4k
N0.getOpcode() == ISD::UADDO77.4k
&&
N0.getResNo() == 08.12k
&&
2985
77.4k
        
N0.getValue(1) != CarryIn8.03k
)) &&
2986
79.0k
      
isNullConstant(N1)9.63k
&&
!N->hasAnyUseOfValue(1)6.58k
)
2987
264
    return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2988
264
                       N0.getOperand(0), N0.getOperand(1), CarryIn);
2989
78.7k
2990
78.7k
  /**
2991
78.7k
   * When one of the addcarry argument is itself a carry, we may be facing
2992
78.7k
   * a diamond carry propagation. In which case we try to transform the DAG
2993
78.7k
   * to ensure linear carry propagation if that is possible.
2994
78.7k
   */
2995
78.7k
  if (auto Y = getAsCarry(TLI, N1)) {
2996
1.00k
    // Because both are carries, Y and Z can be swapped.
2997
1.00k
    if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
2998
1
      return R;
2999
1.00k
    if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3000
4
      return R;
3001
78.7k
  }
3002
78.7k
3003
78.7k
  return SDValue();
3004
78.7k
}
3005
3006
// Since it may not be valid to emit a fold to zero for vector initializers
3007
// check if we can before folding.
3008
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3009
132
                             SelectionDAG &DAG, bool LegalOperations) {
3010
132
  if (!VT.isVector())
3011
41
    return DAG.getConstant(0, DL, VT);
3012
91
  if (!LegalOperations || 
TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)20
)
3013
71
    return DAG.getConstant(0, DL, VT);
3014
20
  return SDValue();
3015
20
}
3016
3017
167k
SDValue DAGCombiner::visitSUB(SDNode *N) {
3018
167k
  SDValue N0 = N->getOperand(0);
3019
167k
  SDValue N1 = N->getOperand(1);
3020
167k
  EVT VT = N0.getValueType();
3021
167k
  SDLoc DL(N);
3022
167k
3023
167k
  // fold vector ops
3024
167k
  if (VT.isVector()) {
3025
23.9k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
3026
8
      return FoldedVOp;
3027
23.8k
3028
23.8k
    // fold (sub x, 0) -> x, vector edition
3029
23.8k
    if (ISD::isBuildVectorAllZeros(N1.getNode()))
3030
8
      return N0;
3031
167k
  }
3032
167k
3033
167k
  // fold (sub x, x) -> 0
3034
167k
  // FIXME: Refactor this and xor and other similar operations together.
3035
167k
  if (N0 == N1)
3036
42
    return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3037
167k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3038
167k
      
DAG.isConstantIntBuildVectorOrConstantInt(N1)59.7k
) {
3039
1
    // fold (sub c1, c2) -> c1-c2
3040
1
    return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
3041
1
                                      N1.getNode());
3042
1
  }
3043
167k
3044
167k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
3045
11
    return NewSel;
3046
167k
3047
167k
  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3048
167k
3049
167k
  // fold (sub x, c) -> (add x, -c)
3050
167k
  if (N1C) {
3051
5.00k
    return DAG.getNode(ISD::ADD, DL, VT, N0,
3052
5.00k
                       DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3053
5.00k
  }
3054
162k
3055
162k
  if (isNullOrNullSplat(N0)) {
3056
19.3k
    unsigned BitWidth = VT.getScalarSizeInBits();
3057
19.3k
    // Right-shifting everything out but the sign bit followed by negation is
3058
19.3k
    // the same as flipping arithmetic/logical shift type without the negation:
3059
19.3k
    // -(X >>u 31) -> (X >>s 31)
3060
19.3k
    // -(X >>s 31) -> (X >>u 31)
3061
19.3k
    if (N1->getOpcode() == ISD::SRA || 
N1->getOpcode() == ISD::SRL19.1k
) {
3062
262
      ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3063
262
      if (ShiftAmt && 
ShiftAmt->getAPIntValue() == (BitWidth - 1)238
) {
3064
11
        auto NewSh = N1->getOpcode() == ISD::SRA ? 
ISD::SRL7
:
ISD::SRA4
;
3065
11
        if (!LegalOperations || 
TLI.isOperationLegal(NewSh, VT)0
)
3066
11
          return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3067
19.3k
      }
3068
262
    }
3069
19.3k
3070
19.3k
    // 0 - X --> 0 if the sub is NUW.
3071
19.3k
    if (N->getFlags().hasNoUnsignedWrap())
3072
2
      return N0;
3073
19.3k
3074
19.3k
    if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3075
404
      // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3076
404
      // N1 must be 0 because negating the minimum signed value is undefined.
3077
404
      if (N->getFlags().hasNoSignedWrap())
3078
2
        return N0;
3079
402
3080
402
      // 0 - X --> X if X is 0 or the minimum signed value.
3081
402
      return N1;
3082
402
    }
3083
19.3k
  }
3084
162k
3085
162k
  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3086
162k
  if (isAllOnesOrAllOnesSplat(N0))
3087
145
    return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3088
162k
3089
162k
  // fold (A - (0-B)) -> A+B
3090
162k
  if (N1.getOpcode() == ISD::SUB && 
isNullOrNullSplat(N1.getOperand(0))3.54k
)
3091
43
    return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3092
162k
3093
162k
  // fold A-(A-B) -> B
3094
162k
  if (N1.getOpcode() == ISD::SUB && 
N0 == N1.getOperand(0)3.50k
)
3095
1.89k
    return N1.getOperand(1);
3096
160k
3097
160k
  // fold (A+B)-A -> B
3098
160k
  if (N0.getOpcode() == ISD::ADD && 
N0.getOperand(0) == N16.04k
)
3099
13
    return N0.getOperand(1);
3100
160k
3101
160k
  // fold (A+B)-B -> A
3102
160k
  if (N0.getOpcode() == ISD::ADD && 
N0.getOperand(1) == N16.03k
)
3103
3
    return N0.getOperand(0);
3104
160k
3105
160k
  // fold (A+C1)-C2 -> A+(C1-C2)
3106
160k
  if (N0.getOpcode() == ISD::ADD &&
3107
160k
      
isConstantOrConstantVector(N1, /* NoOpaques */ true)6.02k
&&
3108
160k
      
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)231
) {
3109
25
    SDValue NewC = DAG.FoldConstantArithmetic(
3110
25
        ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
3111
25
    assert(NewC && "Constant folding failed");
3112
25
    return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3113
25
  }
3114
160k
3115
160k
  // fold C2-(A+C1) -> (C2-C1)-A
3116
160k
  if (N1.getOpcode() == ISD::ADD) {
3117
2.58k
    SDValue N11 = N1.getOperand(1);
3118
2.58k
    if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3119
2.58k
        
isConstantOrConstantVector(N11, /* NoOpaques */ true)520
) {
3120
236
      SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
3121
236
                                                N11.getNode());
3122
236
      assert(NewC && "Constant folding failed");
3123
236
      return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3124
236
    }
3125
160k
  }
3126
160k
3127
160k
  // fold (A-C1)-C2 -> A-(C1+C2)
3128
160k
  if (N0.getOpcode() == ISD::SUB &&
3129
160k
      
isConstantOrConstantVector(N1, /* NoOpaques */ true)3.21k
&&
3130
160k
      
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)34
) {
3131
9
    SDValue NewC = DAG.FoldConstantArithmetic(
3132
9
        ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
3133
9
    assert(NewC && "Constant folding failed");
3134
9
    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3135
9
  }
3136
160k
3137
160k
  // fold (c1-A)-c2 -> (c1-c2)-A
3138
160k
  if (N0.getOpcode() == ISD::SUB &&
3139
160k
      
isConstantOrConstantVector(N1, /* NoOpaques */ true)3.20k
&&
3140
160k
      
isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)25
) {
3141
9
    SDValue NewC = DAG.FoldConstantArithmetic(
3142
9
        ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
3143
9
    assert(NewC && "Constant folding failed");
3144
9
    return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3145
9
  }
3146
160k
3147
160k
  // fold ((A+(B+or-C))-B) -> A+or-C
3148
160k
  if (N0.getOpcode() == ISD::ADD &&
3149
160k
      
(6.00k
N0.getOperand(1).getOpcode() == ISD::SUB6.00k
||
3150
6.00k
       
N0.getOperand(1).getOpcode() == ISD::ADD5.76k
) &&
3151
160k
      
N0.getOperand(1).getOperand(0) == N1387
)
3152
7
    return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3153
7
                       N0.getOperand(1).getOperand(1));
3154
159k
3155
159k
  // fold ((A+(C+B))-B) -> A+C
3156
159k
  if (N0.getOpcode() == ISD::ADD && 
N0.getOperand(1).getOpcode() == ISD::ADD5.99k
&&
3157
159k
      
N0.getOperand(1).getOperand(1) == N1148
)
3158
1
    return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3159
1
                       N0.getOperand(1).getOperand(0));
3160
159k
3161
159k
  // fold ((A-(B-C))-C) -> A-B
3162
159k
  if (N0.getOpcode() == ISD::SUB && 
N0.getOperand(1).getOpcode() == ISD::SUB3.19k
&&
3163
159k
      
N0.getOperand(1).getOperand(1) == N179
)
3164
3
    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3165
3
                       N0.getOperand(1).getOperand(0));
3166
159k
3167
159k
  // fold (A-(B-C)) -> A+(C-B)
3168
159k
  if (N1.getOpcode() == ISD::SUB && 
N1.hasOneUse()1.60k
)
3169
636
    return DAG.getNode(ISD::ADD, DL, VT, N0,
3170
636
                       DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3171
636
                                   N1.getOperand(0)));
3172
159k
3173
159k
  // fold (X - (-Y * Z)) -> (X + (Y * Z))
3174
159k
  if (N1.getOpcode() == ISD::MUL && 
N1.hasOneUse()8.50k
) {
3175
7.49k
    if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3176
7.49k
        
isNullOrNullSplat(N1.getOperand(0).getOperand(0))52
) {
3177
16
      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3178
16
                                N1.getOperand(0).getOperand(1),
3179
16
                                N1.getOperand(1));
3180
16
      return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3181
16
    }
3182
7.48k
    if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3183
7.48k
        
isNullOrNullSplat(N1.getOperand(1).getOperand(0))0
) {
3184
0
      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3185
0
                                N1.getOperand(0),
3186
0
                                N1.getOperand(1).getOperand(1));
3187
0
      return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3188
0
    }
3189
159k
  }
3190
159k
3191
159k
  // If either operand of a sub is undef, the result is undef
3192
159k
  if (N0.isUndef())
3193
1
    return N0;
3194
159k
  if (N1.isUndef())
3195
7
    return N1;
3196
159k
3197
159k
  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3198
18
    return V;
3199
159k
3200
159k
  if (SDValue V = foldAddSubOfSignBit(N, DAG))
3201
10
    return V;
3202
159k
3203
159k
  if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3204
5
    return V;
3205
159k
3206
159k
  // (x - y) - 1  ->  add (xor y, -1), x
3207
159k
  if (N0.hasOneUse() && 
N0.getOpcode() == ISD::SUB115k
&&
isOneOrOneSplat(N1)2.09k
) {
3208
4
    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3209
4
                              DAG.getAllOnesConstant(DL, VT));
3210
4
    return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3211
4
  }
3212
159k
3213
159k
  // Look for:
3214
159k
  //   sub y, (xor x, -1)
3215
159k
  // And if the target does not like this form then turn into:
3216
159k
  //   add (add x, y), 1
3217
159k
  if (TLI.preferIncOfAddToSubOfNot(VT) && 
N1.hasOneUse()150k
&&
isBitwiseNot(N1)95.9k
) {
3218
244
    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3219
244
    return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3220
244
  }
3221
159k
3222
159k
  // Hoist one-use addition by non-opaque constant:
3223
159k
  //   (x + C) - y  ->  (x - y) + C
3224
159k
  if (N0.hasOneUse() && 
N0.getOpcode() == ISD::ADD115k
&&
3225
159k
      
isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)3.03k
) {
3226
1.27k
    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3227
1.27k
    return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3228
1.27k
  }
3229
157k
  // y - (x + C)  ->  (y - x) - C
3230
157k
  if (N1.hasOneUse() && 
N1.getOpcode() == ISD::ADD100k
&&
3231
157k
      
isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)684
) {
3232
53
    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3233
53
    return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3234
53
  }
3235
157k
  // (x - C) - y  ->  (x - y) - C
3236
157k
  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3237
157k
  if (N0.hasOneUse() && 
N0.getOpcode() == ISD::SUB113k
&&
3238
157k
      
isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)2.07k
) {
3239
22
    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3240
22
    return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3241
22
  }
3242
157k
  // (C - x) - y  ->  C - (x + y)
3243
157k
  if (N0.hasOneUse() && 
N0.getOpcode() == ISD::SUB113k
&&
3244
157k
      
isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)2.05k
) {
3245
55
    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3246
55
    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3247
55
  }
3248
157k
3249
157k
  // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3250
157k
  // rather than 'sub 0/1' (the sext should get folded).
3251
157k
  // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3252
157k
  if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3253
157k
      
N1.getOperand(0).getScalarValueSizeInBits() == 13.18k
&&
3254
157k
      TLI.getBooleanContents(VT) ==
3255
366
          TargetLowering::ZeroOrNegativeOneBooleanContent) {
3256
6
    SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3257
6
    return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3258
6
  }
3259
157k
3260
157k
  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3261
157k
  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3262
62.5k
    if (N0.getOpcode() == ISD::XOR && 
N1.getOpcode() == ISD::SRA959
) {
3263
120
      SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3264
120
      SDValue S0 = N1.getOperand(0);
3265
120
      if ((X0 == S0 && 
X1 == N12
) ||
(118
X0 == N1118
&&
X1 == S089
)) {
3266
91
        unsigned OpSizeInBits = VT.getScalarSizeInBits();
3267
91
        if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3268
91
          if (C->getAPIntValue() == (OpSizeInBits - 1))
3269
91
            return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3270
157k
      }
3271
120
    }
3272
62.5k
  }
3273
157k
3274
157k
  // If the relocation model supports it, consider symbol offsets.
3275
157k
  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3276
29
    if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3277
0
      // fold (sub Sym, c) -> Sym-c
3278
0
      if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3279
0
        return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3280
0
                                    GA->getOffset() -
3281
0
                                        (uint64_t)N1C->getSExtValue());
3282
0
      // fold (sub Sym+c1, Sym+c2) -> c1-c2
3283
0
      if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3284
0
        if (GA->getGlobal() == GB->getGlobal())
3285
0
          return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3286
0
                                 DL, VT);
3287
157k
    }
3288
157k
3289
157k
  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3290
157k
  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3291
738
    VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3292
738
    if (TN->getVT() == MVT::i1) {
3293
54
      SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3294
54
                                 DAG.getConstant(1, DL, VT));
3295
54
      return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3296
54
    }
3297
157k
  }
3298
157k
3299
157k
  // Prefer an add for more folding potential and possibly better codegen:
3300
157k
  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3301
157k
  if (!LegalOperations && 
N1.getOpcode() == ISD::SRL80.9k
&&
N1.hasOneUse()480
) {
3302
215
    SDValue ShAmt = N1.getOperand(1);
3303
215
    ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3304
215
    if (ShAmtC &&
3305
215
        
ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)214
) {
3306
18
      SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3307
18
      return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3308
18
    }
3309
157k
  }
3310
157k
3311
157k
  return SDValue();
3312
157k
}
3313
3314
3.57k
SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3315
3.57k
  SDValue N0 = N->getOperand(0);
3316
3.57k
  SDValue N1 = N->getOperand(1);
3317
3.57k
  EVT VT = N0.getValueType();
3318
3.57k
  SDLoc DL(N);
3319
3.57k
3320
3.57k
  // fold vector ops
3321
3.57k
  if (VT.isVector()) {
3322
3.35k
    // TODO SimplifyVBinOp
3323
3.35k
3324
3.35k
    // fold (sub_sat x, 0) -> x, vector edition
3325
3.35k
    if (ISD::isBuildVectorAllZeros(N1.getNode()))
3326
14
      return N0;
3327
3.56k
  }
3328
3.56k
3329
3.56k
  // fold (sub_sat x, undef) -> 0
3330
3.56k
  if (N0.isUndef() || N1.isUndef())
3331
0
    return DAG.getConstant(0, DL, VT);
3332
3.56k
3333
3.56k
  // fold (sub_sat x, x) -> 0
3334
3.56k
  if (N0 == N1)
3335
28
    return DAG.getConstant(0, DL, VT);
3336
3.53k
3337
3.53k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3338
3.53k
      
DAG.isConstantIntBuildVectorOrConstantInt(N1)3
) {
3339
0
    // fold (sub_sat c1, c2) -> c3
3340
0
    return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
3341
0
                                      N1.getNode());
3342
0
  }
3343
3.53k
3344
3.53k
  // fold (sub_sat x, 0) -> x
3345
3.53k
  if (isNullConstant(N1))
3346
14
    return N0;
3347
3.52k
3348
3.52k
  return SDValue();
3349
3.52k
}
3350
3351
128
SDValue DAGCombiner::visitSUBC(SDNode *N) {
3352
128
  SDValue N0 = N->getOperand(0);
3353
128
  SDValue N1 = N->getOperand(1);
3354
128
  EVT VT = N0.getValueType();
3355
128
  SDLoc DL(N);
3356
128
3357
128
  // If the flag result is dead, turn this into an SUB.
3358
128
  if (!N->hasAnyUseOfValue(1))
3359
24
    return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3360
24
                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3361
104
3362
104
  // fold (subc x, x) -> 0 + no borrow
3363
104
  if (N0 == N1)
3364
0
    return CombineTo(N, DAG.getConstant(0, DL, VT),
3365
0
                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3366
104
3367
104
  // fold (subc x, 0) -> x + no borrow
3368
104
  if (isNullConstant(N1))
3369
0
    return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3370
104
3371
104
  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3372
104
  if (isAllOnesConstant(N0))
3373
0
    return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3374
0
                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3375
104
3376
104
  return SDValue();
3377
104
}
3378
3379
6.09k
SDValue DAGCombiner::visitSUBO(SDNode *N) {
3380
6.09k
  SDValue N0 = N->getOperand(0);
3381
6.09k
  SDValue N1 = N->getOperand(1);
3382
6.09k
  EVT VT = N0.getValueType();
3383
6.09k
  bool IsSigned = (ISD::SSUBO == N->getOpcode());
3384
6.09k
3385
6.09k
  EVT CarryVT = N->getValueType(1);
3386
6.09k
  SDLoc DL(N);
3387
6.09k
3388
6.09k
  // If the flag result is dead, turn this into an SUB.
3389
6.09k
  if (!N->hasAnyUseOfValue(1))
3390
59
    return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3391
59
                     DAG.getUNDEF(CarryVT));
3392
6.03k
3393
6.03k
  // fold (subo x, x) -> 0 + no borrow
3394
6.03k
  if (N0 == N1)
3395
55
    return CombineTo(N, DAG.getConstant(0, DL, VT),
3396
55
                     DAG.getConstant(0, DL, CarryVT));
3397
5.98k
3398
5.98k
  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3399
5.98k
3400
5.98k
  // fold (subox, c) -> (addo x, -c)
3401
5.98k
  if (IsSigned && 
N1C667
&&
!N1C->getAPIntValue().isMinSignedValue()153
) {
3402
145
    return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3403
145
                       DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3404
145
  }
3405
5.83k
3406
5.83k
  // fold (subo x, 0) -> x + no borrow
3407
5.83k
  if (isNullOrNullSplat(N1))
3408
194
    return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3409
5.64k
3410
5.64k
  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3411
5.64k
  if (!IsSigned && 
isAllOnesOrAllOnesSplat(N0)5.12k
)
3412
15
    return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3413
15
                     DAG.getConstant(0, DL, CarryVT));
3414
5.62k
3415
5.62k
  return SDValue();
3416
5.62k
}
3417
3418
102
SDValue DAGCombiner::visitSUBE(SDNode *N) {
3419
102
  SDValue N0 = N->getOperand(0);
3420
102
  SDValue N1 = N->getOperand(1);
3421
102
  SDValue CarryIn = N->getOperand(2);
3422
102
3423
102
  // fold (sube x, y, false) -> (subc x, y)
3424
102
  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3425
0
    return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3426
102
3427
102
  return SDValue();
3428
102
}
3429
3430
1.59k
SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3431
1.59k
  SDValue N0 = N->getOperand(0);
3432
1.59k
  SDValue N1 = N->getOperand(1);
3433
1.59k
  SDValue CarryIn = N->getOperand(2);
3434
1.59k
3435
1.59k
  // fold (subcarry x, y, false) -> (usubo x, y)
3436
1.59k
  if (isNullConstant(CarryIn)) {
3437
145
    if (!LegalOperations ||
3438
145
        
TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0))125
)
3439
145
      return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3440
1.44k
  }
3441
1.44k
3442
1.44k
  return SDValue();
3443
1.44k
}
3444
3445
138k
SDValue DAGCombiner::visitMUL(SDNode *N) {
3446
138k
  SDValue N0 = N->getOperand(0);
3447
138k
  SDValue N1 = N->getOperand(1);
3448
138k
  EVT VT = N0.getValueType();
3449
138k
3450
138k
  // fold (mul x, undef) -> 0
3451
138k
  if (N0.isUndef() || N1.isUndef())
3452
252
    return DAG.getConstant(0, SDLoc(N), VT);
3453
138k
3454
138k
  bool N0IsConst = false;
3455
138k
  bool N1IsConst = false;
3456
138k
  bool N1IsOpaqueConst = false;
3457
138k
  bool N0IsOpaqueConst = false;
3458
138k
  APInt ConstValue0, ConstValue1;
3459
138k
  // fold vector ops
3460
138k
  if (VT.isVector()) {
3461
32.4k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
3462
9
      return FoldedVOp;
3463
32.4k
3464
32.4k
    N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
3465
32.4k
    N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3466
32.4k
    assert((!N0IsConst ||
3467
32.4k
            ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
3468
32.4k
           "Splat APInt should be element width");
3469
32.4k
    assert((!N1IsConst ||
3470
32.4k
            ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3471
32.4k
           "Splat APInt should be element width");
3472
105k
  } else {
3473
105k
    N0IsConst = isa<ConstantSDNode>(N0);
3474
105k
    if (N0IsConst) {
3475
14
      ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
3476
14
      N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
3477
14
    }
3478
105k
    N1IsConst = isa<ConstantSDNode>(N1);
3479
105k
    if (N1IsConst) {
3480
60.2k
      ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3481
60.2k
      N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3482
60.2k
    }
3483
105k
  }
3484
138k
3485
138k
  // fold (mul c1, c2) -> c1*c2
3486
138k
  
if (138k
N0IsConst138k
&&
N1IsConst27
&&
!N0IsOpaqueConst14
&&
!N1IsOpaqueConst14
)
3487
14
    return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
3488
14
                                      N0.getNode(), N1.getNode());
3489
138k
3490
138k
  // canonicalize constant to RHS (vector doesn't have to splat)
3491
138k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3492
138k
     
!DAG.isConstantIntBuildVectorOrConstantInt(N1)32
)
3493
32
    return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3494
138k
  // fold (mul x, 0) -> 0
3495
138k
  if (N1IsConst && 
ConstValue1.isNullValue()62.8k
)
3496
29
    return N1;
3497
138k
  // fold (mul x, 1) -> x
3498
138k
  if (N1IsConst && 
ConstValue1.isOneValue()62.8k
)
3499
861
    return N0;
3500
137k
3501
137k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
3502
3
    return NewSel;
3503
137k
3504
137k
  // fold (mul x, -1) -> 0-x
3505
137k
  if (N1IsConst && 
ConstValue1.isAllOnesValue()61.9k
) {
3506
653
    SDLoc DL(N);
3507
653
    return DAG.getNode(ISD::SUB, DL, VT,
3508
653
                       DAG.getConstant(0, DL, VT), N0);
3509
653
  }
3510
136k
  // fold (mul x, (1 << c)) -> x << c
3511
136k
  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3512
136k
      
DAG.isKnownToBeAPowerOfTwo(N1)62.7k
&&
3513
136k
      
(5.73k
!VT.isVector()5.73k
||
Level <= AfterLegalizeVectorOps754
)) {
3514
5.73k
    SDLoc DL(N);
3515
5.73k
    SDValue LogBase2 = BuildLogBase2(N1, DL);
3516
5.73k
    EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3517
5.73k
    SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3518
5.73k
    return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3519
5.73k
  }
3520
130k
  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3521
130k
  if (N1IsConst && 
!N1IsOpaqueConst56.2k
&&
(-ConstValue1).isPowerOf2()54.9k
) {
3522
1.18k
    unsigned Log2Val = (-ConstValue1).logBase2();
3523
1.18k
    SDLoc DL(N);
3524
1.18k
    // FIXME: If the input is something that is easily negated (e.g. a
3525
1.18k
    // single-use add), we should put the negate there.
3526
1.18k
    return DAG.getNode(ISD::SUB, DL, VT,
3527
1.18k
                       DAG.getConstant(0, DL, VT),
3528
1.18k
                       DAG.getNode(ISD::SHL, DL, VT, N0,
3529
1.18k
                            DAG.getConstant(Log2Val, DL,
3530
1.18k
                                      getShiftAmountTy(N0.getValueType()))));
3531
1.18k
  }
3532
129k
3533
129k
  // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3534
129k
  // mul x, (2^N + 1) --> add (shl x, N), x
3535
129k
  // mul x, (2^N - 1) --> sub (shl x, N), x
3536
129k
  // Examples: x * 33 --> (x << 5) + x
3537
129k
  //           x * 15 --> (x << 4) - x
3538
129k
  //           x * -33 --> -((x << 5) + x)
3539
129k
  //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3540
129k
  if (N1IsConst && 
TLI.decomposeMulByConstant(VT, N1)55.0k
) {
3541
67
    // TODO: We could handle more general decomposition of any constant by
3542
67
    //       having the target set a limit on number of ops and making a
3543
67
    //       callback to determine that sequence (similar to sqrt expansion).
3544
67
    unsigned MathOp = ISD::DELETED_NODE;
3545
67
    APInt MulC = ConstValue1.abs();
3546
67
    if ((MulC - 1).isPowerOf2())
3547
19
      MathOp = ISD::ADD;
3548
48
    else if ((MulC + 1).isPowerOf2())
3549
48
      MathOp = ISD::SUB;
3550
67
3551
67
    if (MathOp != ISD::DELETED_NODE) {
3552
67
      unsigned ShAmt =
3553
67
          MathOp == ISD::ADD ? 
(MulC - 1).logBase2()19
:
(MulC + 1).logBase2()48
;
3554
67
      assert(ShAmt < VT.getScalarSizeInBits() &&
3555
67
             "multiply-by-constant generated out of bounds shift");
3556
67
      SDLoc DL(N);
3557
67
      SDValue Shl =
3558
67
          DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3559
67
      SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3560
67
      if (ConstValue1.isNegative())
3561
16
        R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3562
67
      return R;
3563
67
    }
3564
129k
  }
3565
129k
3566
129k
  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3567
129k
  if (N0.getOpcode() == ISD::SHL &&
3568
129k
      
isConstantOrConstantVector(N1, /* NoOpaques */ true)156
&&
3569
129k
      
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)18
) {
3570
7
    SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3571
7
    if (isConstantOrConstantVector(C3))
3572
7
      return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3573
129k
  }
3574
129k
3575
129k
  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3576
129k
  // use.
3577
129k
  {
3578
129k
    SDValue Sh(nullptr, 0), Y(nullptr, 0);
3579
129k
3580
129k
    // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3581
129k
    if (N0.getOpcode() == ISD::SHL &&
3582
129k
        
isConstantOrConstantVector(N0.getOperand(1))149
&&
3583
129k
        
N0.getNode()->hasOneUse()52
) {
3584
39
      Sh = N0; Y = N1;
3585
129k
    } else if (N1.getOpcode() == ISD::SHL &&
3586
129k
               
isConstantOrConstantVector(N1.getOperand(1))39
&&
3587
129k
               
N1.getNode()->hasOneUse()12
) {
3588
5
      Sh = N1; Y = N0;
3589
5
    }
3590
129k
3591
129k
    if (Sh.getNode()) {
3592
44
      SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3593
44
      return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3594
44
    }
3595
129k
  }
3596
129k
3597
129k
  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3598
129k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3599
129k
      
N0.getOpcode() == ISD::ADD57.1k
&&
3600
129k
      
DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))2.20k
&&
3601
129k
      
isMulAddWithConstProfitable(N, N0, N1)459
)
3602
379
      return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3603
379
                         DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3604
379
                                     N0.getOperand(0), N1),
3605
379
                         DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3606
379
                                     N0.getOperand(1), N1));
3607
129k
3608
129k
  // reassociate mul
3609
129k
  if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3610
104
    return RMUL;
3611
129k
3612
129k
  return SDValue();
3613
129k
}
3614
3615
/// Return true if divmod libcall is available.
3616
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3617
6.63k
                                     const TargetLowering &TLI) {
3618
6.63k
  RTLIB::Libcall LC;
3619
6.63k
  EVT NodeType = Node->getValueType(0);
3620
6.63k
  if (!NodeType.isSimple())
3621
0
    return false;
3622
6.63k
  switch (NodeType.getSimpleVT().SimpleTy) {
3623
6.63k
  
default: return false0
; // No libcall for vector types.
3624
6.63k
  
case MVT::i8: LC= isSigned 0
?
RTLIB::SDIVREM_I80
:
RTLIB::UDIVREM_I80
; break;
3625
6.63k
  
case MVT::i16: LC= isSigned 4
?
RTLIB::SDIVREM_I162
:
RTLIB::UDIVREM_I162
; break;
3626
6.63k
  
case MVT::i32: LC= isSigned 5.66k
?
RTLIB::SDIVREM_I324.15k
:
RTLIB::UDIVREM_I321.50k
; break;
3627
6.63k
  
case MVT::i64: LC= isSigned 964
?
RTLIB::SDIVREM_I64292
:
RTLIB::UDIVREM_I64672
; break;
3628
6.63k
  
case MVT::i128: LC= isSigned 3
?
RTLIB::SDIVREM_I1283
:
RTLIB::UDIVREM_I1280
; break;
3629
6.63k
  }
3630
6.63k
3631
6.63k
  return TLI.getLibcallName(LC) != nullptr;
3632
6.63k
}
3633
3634
/// Issue divrem if both quotient and remainder are needed.
3635
12.5k
SDValue DAGCombiner::useDivRem(SDNode *Node) {
3636
12.5k
  if (Node->use_empty())
3637
0
    return SDValue(); // This is a dead node, leave it alone.
3638
12.5k
3639
12.5k
  unsigned Opcode = Node->getOpcode();
3640
12.5k
  bool isSigned = (Opcode == ISD::SDIV) || 
(Opcode == ISD::SREM)8.85k
;
3641
12.5k
  unsigned DivRemOpc = isSigned ? 
ISD::SDIVREM6.56k
:
ISD::UDIVREM6.00k
;
3642
12.5k
3643
12.5k
  // DivMod lib calls can still work on non-legal types if using lib-calls.
3644
12.5k
  EVT VT = Node->getValueType(0);
3645
12.5k
  if (VT.isVector() || 
!VT.isInteger()12.0k
)
3646
513
    return SDValue();
3647
12.0k
3648
12.0k
  if (!TLI.isTypeLegal(VT) && 
!TLI.isOperationCustom(DivRemOpc, VT)987
)
3649
947
    return SDValue();
3650
11.1k
3651
11.1k
  // If DIVREM is going to get expanded into a libcall,
3652
11.1k
  // but there is no libcall available, then don't combine.
3653
11.1k
  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3654
11.1k
      
!isDivRemLibcallAvailable(Node, isSigned, TLI)6.63k
)
3655
5.95k
    return SDValue();
3656
5.15k
3657
5.15k
  // If div is legal, it's better to do the normal expansion
3658
5.15k
  unsigned OtherOpcode = 0;
3659
5.15k
  if ((Opcode == ISD::SDIV) || 
(Opcode == ISD::UDIV)4.34k
) {
3660
2.83k
    OtherOpcode = isSigned ? 
ISD::SREM807
:
ISD::UREM2.03k
;
3661
2.83k
    if (TLI.isOperationLegalOrCustom(Opcode, VT))
3662
1.05k
      return SDValue();
3663
2.31k
  } else {
3664
2.31k
    OtherOpcode = isSigned ? 
ISD::SDIV743
:
ISD::UDIV1.57k
;
3665
2.31k
    if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3666
403
      return SDValue();
3667
3.70k
  }
3668
3.70k
3669
3.70k
  SDValue Op0 = Node->getOperand(0);
3670
3.70k
  SDValue Op1 = Node->getOperand(1);
3671
3.70k
  SDValue combined;
3672
3.70k
  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3673
8.09k
         UE = Op0.getNode()->use_end(); UI != UE; 
++UI4.39k
) {
3674
4.39k
    SDNode *User = *UI;
3675
4.39k
    if (User == Node || 
User->getOpcode() == ISD::DELETED_NODE698
||
3676
4.39k
        
User->use_empty()698
)
3677
3.70k
      continue;
3678
698
    // Convert the other matching node(s), too;
3679
698
    // otherwise, the DIVREM may get target-legalized into something
3680
698
    // target-specific that we won't be able to recognize.
3681
698
    unsigned UserOpc = User->getOpcode();
3682
698
    if ((UserOpc == Opcode || 
UserOpc == OtherOpcode609
||
UserOpc == DivRemOpc361
) &&
3683
698
        
User->getOperand(0) == Op0359
&&
3684
698
        
User->getOperand(1) == Op1189
) {
3685
181
      if (!combined) {
3686
181
        if (UserOpc == OtherOpcode) {
3687
181
          SDVTList VTs = DAG.getVTList(VT, VT);
3688
181
          combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3689
181
        } else 
if (0
UserOpc == DivRemOpc0
) {
3690
0
          combined = SDValue(User, 0);
3691
0
        } else {
3692
0
          assert(UserOpc == Opcode);
3693
0
          continue;
3694
0
        }
3695
181
      }
3696
181
      if (UserOpc == ISD::SDIV || 
UserOpc == ISD::UDIV89
)
3697
145
        CombineTo(User, combined);
3698
36
      else if (UserOpc == ISD::SREM || 
UserOpc == ISD::UREM22
)
3699
36
        CombineTo(User, combined.getValue(1));
3700
181
    }
3701
698
  }
3702
3.70k
  return combined;
3703
3.70k
}
3704
3705
18.1k
static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3706
18.1k
  SDValue N0 = N->getOperand(0);
3707
18.1k
  SDValue N1 = N->getOperand(1);
3708
18.1k
  EVT VT = N->getValueType(0);
3709
18.1k
  SDLoc DL(N);
3710
18.1k
3711
18.1k
  unsigned Opc = N->getOpcode();
3712
18.1k
  bool IsDiv = (ISD::SDIV == Opc) || 
(ISD::UDIV == Opc)12.1k
;
3713
18.1k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
3714
18.1k
3715
18.1k
  // X / undef -> undef
3716
18.1k
  // X % undef -> undef
3717
18.1k
  // X / 0 -> undef
3718
18.1k
  // X % 0 -> undef
3719
18.1k
  // NOTE: This includes vectors where any divisor element is zero/undef.
3720
18.1k
  if (DAG.isUndef(Opc, {N0, N1}))
3721
1
    return DAG.getUNDEF(VT);
3722
18.1k
3723
18.1k
  // undef / X -> 0
3724
18.1k
  // undef % X -> 0
3725
18.1k
  if (N0.isUndef())
3726
0
    return DAG.getConstant(0, DL, VT);
3727
18.1k
3728
18.1k
  // 0 / X -> 0
3729
18.1k
  // 0 % X -> 0
3730
18.1k
  ConstantSDNode *N0C = isConstOrConstSplat(N0);
3731
18.1k
  if (N0C && 
N0C->isNullValue()1.21k
)
3732
46
    return N0;
3733
18.1k
3734
18.1k
  // X / X -> 1
3735
18.1k
  // X % X -> 0
3736
18.1k
  if (N0 == N1)
3737
47
    return DAG.getConstant(IsDiv ? 
126
:
021
, DL, VT);
3738
18.0k
3739
18.0k
  // X / 1 -> X
3740
18.0k
  // X % 1 -> 0
3741
18.0k
  // If this is a boolean op (single-bit element type), we can't have
3742
18.0k
  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3743
18.0k
  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3744
18.0k
  // it's a 1.
3745
18.0k
  if ((N1C && 
N1C->isOne()5.21k
) ||
(VT.getScalarType() == MVT::i1)18.0k
)
3746
172
    return IsDiv ? 
N080
:
DAG.getConstant(0, DL, VT)92
;
3747
17.9k
3748
17.9k
  return SDValue();
3749
17.9k
}
3750
3751
6.06k
SDValue DAGCombiner::visitSDIV(SDNode *N) {
3752
6.06k
  SDValue N0 = N->getOperand(0);
3753
6.06k
  SDValue N1 = N->getOperand(1);
3754
6.06k
  EVT VT = N->getValueType(0);
3755
6.06k
  EVT CCVT = getSetCCResultType(VT);
3756
6.06k
3757
6.06k
  // fold vector ops
3758
6.06k
  if (VT.isVector())
3759
513
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
3760
1
      return FoldedVOp;
3761
6.06k
3762
6.06k
  SDLoc DL(N);
3763
6.06k
3764
6.06k
  // fold (sdiv c1, c2) -> c1/c2
3765
6.06k
  ConstantSDNode *N0C = isConstOrConstSplat(N0);
3766
6.06k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
3767
6.06k
  if (N0C && 
N1C187
&&
!N0C->isOpaque()0
&&
!N1C->isOpaque()0
)
3768
0
    return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3769
6.06k
  // fold (sdiv X, -1) -> 0-X
3770
6.06k
  if (N1C && 
N1C->isAllOnesValue()2.15k
)
3771
14
    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3772
6.05k
  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3773
6.05k
  if (N1C && 
N1C->getAPIntValue().isMinSignedValue()2.13k
)
3774
17
    return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3775
17
                         DAG.getConstant(1, DL, VT),
3776
17
                         DAG.getConstant(0, DL, VT));
3777
6.03k
3778
6.03k
  if (SDValue V = simplifyDivRem(N, DAG))
3779
74
    return V;
3780
5.96k
3781
5.96k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
3782
7
    return NewSel;
3783
5.95k
3784
5.95k
  // If we know the sign bits of both operands are zero, strength reduce to a
3785
5.95k
  // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3786
5.95k
  if (DAG.SignBitIsZero(N1) && 
DAG.SignBitIsZero(N0)3.59k
)
3787
24
    return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3788
5.93k
3789
5.93k
  if (SDValue V = visitSDIVLike(N0, N1, N)) {
3790
2.09k
    // If the corresponding remainder node exists, update its users with
3791
2.09k
    // (Dividend - (Quotient * Divisor).
3792
2.09k
    if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3793
12
                                              { N0, N1 })) {
3794
12
      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3795
12
      SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3796
12
      AddToWorklist(Mul.getNode());
3797
12
      AddToWorklist(Sub.getNode());
3798
12
      CombineTo(RemNode, Sub);
3799
12
    }
3800
2.09k
    return V;
3801
2.09k
  }
3802
3.83k
3803
3.83k
  // sdiv, srem -> sdivrem
3804
3.83k
  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3805
3.83k
  // true.  Otherwise, we break the simplification logic in visitREM().
3806
3.83k
  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3807
3.83k
  if (!N1C || 
TLI.isIntDivCheap(N->getValueType(0), Attr)165
)
3808
3.71k
    if (SDValue DivRem = useDivRem(N))
3809
14
        return DivRem;
3810
3.81k
3811
3.81k
  return SDValue();
3812
3.81k
}
3813
3814
6.57k
SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3815
6.57k
  SDLoc DL(N);
3816
6.57k
  EVT VT = N->getValueType(0);
3817
6.57k
  EVT CCVT = getSetCCResultType(VT);
3818
6.57k
  unsigned BitWidth = VT.getScalarSizeInBits();
3819
6.57k
3820
6.57k
  // Helper for determining whether a value is a power-2 constant scalar or a
3821
6.57k
  // vector of such elements.
3822
6.57k
  auto IsPowerOfTwo = [](ConstantSDNode *C) {
3823
3.69k
    if (C->isNullValue() || C->isOpaque())
3824
11
      return false;
3825
3.68k
    if (C->getAPIntValue().isPowerOf2())
3826
2.19k
      return true;
3827
1.48k
    if ((-C->getAPIntValue()).isPowerOf2())
3828
353
      return true;
3829
1.13k
    return false;
3830
1.13k
  };
3831
6.57k
3832
6.57k
  // fold (sdiv X, pow2) -> simple ops after legalize
3833
6.57k
  // FIXME: We check for the exact bit here because the generic lowering gives
3834
6.57k
  // better results in that case. The target-specific lowering should learn how
3835
6.57k
  // to handle exact sdivs efficiently.
3836
6.57k
  if (!N->getFlags().hasExact() && 
ISD::matchUnaryPredicate(N1, IsPowerOfTwo)5.75k
) {
3837
946
    // Target-specific implementation of sdiv x, pow2.
3838
946
    if (SDValue Res = BuildSDIVPow2(N))
3839
455
      return Res;
3840
491
3841
491
    // Create constants that are functions of the shift amount value.
3842
491
    EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3843
491
    SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3844
491
    SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3845
491
    C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3846
491
    SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3847
491
    if (!isConstantOrConstantVector(Inexact))
3848
0
      return SDValue();
3849
491
3850
491
    // Splat the sign bit into the register
3851
491
    SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3852
491
                               DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3853
491
    AddToWorklist(Sign.getNode());
3854
491
3855
491
    // Add (N0 < 0) ? abs2 - 1 : 0;
3856
491
    SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3857
491
    AddToWorklist(Srl.getNode());
3858
491
    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3859
491
    AddToWorklist(Add.getNode());
3860
491
    SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3861
491
    AddToWorklist(Sra.getNode());
3862
491
3863
491
    // Special case: (sdiv X, 1) -> X
3864
491
    // Special Case: (sdiv X, -1) -> 0-X
3865
491
    SDValue One = DAG.getConstant(1, DL, VT);
3866
491
    SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3867
491
    SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3868
491
    SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3869
491
    SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3870
491
    Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3871
491
3872
491
    // If dividing by a positive value, we're done. Otherwise, the result must
3873
491
    // be negated.
3874
491
    SDValue Zero = DAG.getConstant(0, DL, VT);
3875
491
    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3876
491
3877
491
    // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3878
491
    SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3879
491
    SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3880
491
    return Res;
3881
491
  }
3882
5.62k
3883
5.62k
  // If integer divide is expensive and we satisfy the requirements, emit an
3884
5.62k
  // alternate sequence.  Targets may check function attributes for size/speed
3885
5.62k
  // trade-offs.
3886
5.62k
  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3887
5.62k
  if (isConstantOrConstantVector(N1) &&
3888
5.62k
      
!TLI.isIntDivCheap(N->getValueType(0), Attr)1.95k
)
3889
1.91k
    if (SDValue Op = BuildSDIV(N))
3890
1.69k
      return Op;
3891
3.93k
3892
3.93k
  return SDValue();
3893
3.93k
}
3894
3895
4.95k
SDValue DAGCombiner::visitUDIV(SDNode *N) {
3896
4.95k
  SDValue N0 = N->getOperand(0);
3897
4.95k
  SDValue N1 = N->getOperand(1);
3898
4.95k
  EVT VT = N->getValueType(0);
3899
4.95k
  EVT CCVT = getSetCCResultType(VT);
3900
4.95k
3901
4.95k
  // fold vector ops
3902
4.95k
  if (VT.isVector())
3903
310
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
3904
1
      return FoldedVOp;
3905
4.95k
3906
4.95k
  SDLoc DL(N);
3907
4.95k
3908
4.95k
  // fold (udiv c1, c2) -> c1/c2
3909
4.95k
  ConstantSDNode *N0C = isConstOrConstSplat(N0);
3910
4.95k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
3911
4.95k
  if (N0C && 
N1C890
)
3912
0
    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3913
0
                                                    N0C, N1C))
3914
0
      return Folded;
3915
4.95k
  // fold (udiv X, -1) -> select(X == -1, 1, 0)
3916
4.95k
  if (N1C && 
N1C->getAPIntValue().isAllOnesValue()1.31k
)
3917
13
    return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3918
13
                         DAG.getConstant(1, DL, VT),
3919
13
                         DAG.getConstant(0, DL, VT));
3920
4.93k
3921
4.93k
  if (SDValue V = simplifyDivRem(N, DAG))
3922
61
    return V;
3923
4.87k
3924
4.87k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
3925
7
    return NewSel;
3926
4.86k
3927
4.86k
  if (SDValue V = visitUDIVLike(N0, N1, N)) {
3928
1.19k
    // If the corresponding remainder node exists, update its users with
3929
1.19k
    // (Dividend - (Quotient * Divisor).
3930
1.19k
    if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3931
346
                                              { N0, N1 })) {
3932
346
      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3933
346
      SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3934
346
      AddToWorklist(Mul.getNode());
3935
346
      AddToWorklist(Sub.getNode());
3936
346
      CombineTo(RemNode, Sub);
3937
346
    }
3938
1.19k
    return V;
3939
1.19k
  }
3940
3.67k
3941
3.67k
  // sdiv, srem -> sdivrem
3942
3.67k
  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3943
3.67k
  // true.  Otherwise, we break the simplification logic in visitREM().
3944
3.67k
  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3945
3.67k
  if (!N1C || 
TLI.isIntDivCheap(N->getValueType(0), Attr)160
)
3946
3.56k
    if (SDValue DivRem = useDivRem(N))
3947
22
        return DivRem;
3948
3.65k
3949
3.65k
  return SDValue();
3950
3.65k
}
3951
3952
5.49k
SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3953
5.49k
  SDLoc DL(N);
3954
5.49k
  EVT VT = N->getValueType(0);
3955
5.49k
3956
5.49k
  // fold (udiv x, (1 << c)) -> x >>u c
3957
5.49k
  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3958
5.49k
      
DAG.isKnownToBeAPowerOfTwo(N1)1.96k
) {
3959
86
    SDValue LogBase2 = BuildLogBase2(N1, DL);
3960
86
    AddToWorklist(LogBase2.getNode());
3961
86
3962
86
    EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3963
86
    SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3964
86
    AddToWorklist(Trunc.getNode());
3965
86
    return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3966
86
  }
3967
5.41k
3968
5.41k
  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3969
5.41k
  if (N1.getOpcode() == ISD::SHL) {
3970
29
    SDValue N10 = N1.getOperand(0);
3971
29
    if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3972
29
        
DAG.isKnownToBeAPowerOfTwo(N10)15
) {
3973
15
      SDValue LogBase2 = BuildLogBase2(N10, DL);
3974
15
      AddToWorklist(LogBase2.getNode());
3975
15
3976
15
      EVT ADDVT = N1.getOperand(1).getValueType();
3977
15
      SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3978
15
      AddToWorklist(Trunc.getNode());
3979
15
      SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3980
15
      AddToWorklist(Add.getNode());
3981
15
      return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3982
15
    }
3983
5.39k
  }
3984
5.39k
3985
5.39k
  // fold (udiv x, c) -> alternate
3986
5.39k
  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3987
5.39k
  if (isConstantOrConstantVector(N1) &&
3988
5.39k
      
!TLI.isIntDivCheap(N->getValueType(0), Attr)1.88k
)
3989
1.83k
    if (SDValue Op = BuildUDIV(N))
3990
1.59k
      return Op;
3991
3.80k
3992
3.80k
  return SDValue();
3993
3.80k
}
3994
3995
// handles ISD::SREM and ISD::UREM
3996
7.21k
SDValue DAGCombiner::visitREM(SDNode *N) {
3997
7.21k
  unsigned Opcode = N->getOpcode();
3998
7.21k
  SDValue N0 = N->getOperand(0);
3999
7.21k
  SDValue N1 = N->getOperand(1);
4000
7.21k
  EVT VT = N->getValueType(0);
4001
7.21k
  EVT CCVT = getSetCCResultType(VT);
4002
7.21k
4003
7.21k
  bool isSigned = (Opcode == ISD::SREM);
4004
7.21k
  SDLoc DL(N);
4005
7.21k
4006
7.21k
  // fold (rem c1, c2) -> c1%c2
4007
7.21k
  ConstantSDNode *N0C = isConstOrConstSplat(N0);
4008
7.21k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
4009
7.21k
  if (N0C && 
N1C144
)
4010
2
    if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
4011
2
      return Folded;
4012
7.21k
  // fold (urem X, -1) -> select(X == -1, 0, x)
4013
7.21k
  if (!isSigned && 
N1C3.74k
&&
N1C->getAPIntValue().isAllOnesValue()1.26k
)
4014
8
    return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4015
8
                         DAG.getConstant(0, DL, VT), N0);
4016
7.20k
4017
7.20k
  if (SDValue V = simplifyDivRem(N, DAG))
4018
131
    return V;
4019
7.07k
4020
7.07k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
4021
14
    return NewSel;
4022
7.06k
4023
7.06k
  if (isSigned) {
4024
3.40k
    // If we know the sign bits of both operands are zero, strength reduce to a
4025
3.40k
    // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4026
3.40k
    if (DAG.SignBitIsZero(N1) && 
DAG.SignBitIsZero(N0)1.96k
)
4027
16
      return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4028
3.65k
  } else {
4029
3.65k
    SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4030
3.65k
    if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4031
706
      // fold (urem x, pow2) -> (and x, pow2-1)
4032
706
      SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4033
706
      AddToWorklist(Add.getNode());
4034
706
      return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4035
706
    }
4036
2.95k
    if (N1.getOpcode() == ISD::SHL &&
4037
2.95k
        
DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))6
) {
4038
6
      // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4039
6
      SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4040
6
      AddToWorklist(Add.getNode());
4041
6
      return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4042
6
    }
4043
6.33k
  }
4044
6.33k
4045
6.33k
  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4046
6.33k
4047
6.33k
  // If X/C can be simplified by the division-by-constant logic, lower
4048
6.33k
  // X%C to the equivalent of X-X/C*C.
4049
6.33k
  // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4050
6.33k
  // speculative DIV must not cause a DIVREM conversion.  We guard against this
4051
6.33k
  // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4052
6.33k
  // combine will not return a DIVREM.  Regardless, checking cheapness here
4053
6.33k
  // makes sense since the simplification results in fatter code.
4054
6.33k
  if (DAG.isKnownNeverZero(N1) && 
!TLI.isIntDivCheap(VT, Attr)1.34k
) {
4055
1.26k
    SDValue OptimizedDiv =
4056
1.26k
        isSigned ? 
visitSDIVLike(N0, N1, N)638
:
visitUDIVLike(N0, N1, N)630
;
4057
1.26k
    if (OptimizedDiv.getNode()) {
4058
1.04k
      // If the equivalent Div node also exists, update its users.
4059
1.04k
      unsigned DivOpcode = isSigned ? 
ISD::SDIV540
:
ISD::UDIV504
;
4060
1.04k
      if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4061
38
                                                { N0, N1 }))
4062
38
        CombineTo(DivNode, OptimizedDiv);
4063
1.04k
      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4064
1.04k
      SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4065
1.04k
      AddToWorklist(OptimizedDiv.getNode());
4066
1.04k
      AddToWorklist(Mul.getNode());
4067
1.04k
      return Sub;
4068
1.04k
    }
4069
5.29k
  }
4070
5.29k
4071
5.29k
  // sdiv, srem -> sdivrem
4072
5.29k
  if (SDValue DivRem = useDivRem(N))
4073
145
    return DivRem.getValue(1);
4074
5.14k
4075
5.14k
  return SDValue();
4076
5.14k
}
4077
4078
2.21k
SDValue DAGCombiner::visitMULHS(SDNode *N) {
4079
2.21k
  SDValue N0 = N->getOperand(0);
4080
2.21k
  SDValue N1 = N->getOperand(1);
4081
2.21k
  EVT VT = N->getValueType(0);
4082
2.21k
  SDLoc DL(N);
4083
2.21k
4084
2.21k
  if (VT.isVector()) {
4085
1.22k
    // fold (mulhs x, 0) -> 0
4086
1.22k
    if (ISD::isBuildVectorAllZeros(N1.getNode()))
4087
0
      return N1;
4088
1.22k
    if (ISD::isBuildVectorAllZeros(N0.getNode()))
4089
2
      return N0;
4090
2.21k
  }
4091
2.21k
4092
2.21k
  // fold (mulhs x, 0) -> 0
4093
2.21k
  if (isNullConstant(N1))
4094
1
    return N1;
4095
2.21k
  // fold (mulhs x, 1) -> (sra x, size(x)-1)
4096
2.21k
  if (isOneConstant(N1))
4097
0
    return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4098
0
                       DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
4099
0
                                       getShiftAmountTy(N0.getValueType())));
4100
2.21k
4101
2.21k
  // fold (mulhs x, undef) -> 0
4102
2.21k
  if (N0.isUndef() || N1.isUndef())
4103
0
    return DAG.getConstant(0, DL, VT);
4104
2.21k
4105
2.21k
  // If the type twice as wide is legal, transform the mulhs to a wider multiply
4106
2.21k
  // plus a shift.
4107
2.21k
  if (VT.isSimple() && !VT.isVector()) {
4108
989
    MVT Simple = VT.getSimpleVT();
4109
989
    unsigned SimpleSize = Simple.getSizeInBits();
4110
989
    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4111
989
    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4112
271
      N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4113
271
      N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4114
271
      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4115
271
      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4116
271
            DAG.getConstant(SimpleSize, DL,
4117
271
                            getShiftAmountTy(N1.getValueType())));
4118
271
      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4119
271
    }
4120
1.94k
  }
4121
1.94k
4122
1.94k
  return SDValue();
4123
1.94k
}
4124
4125
8.94k
SDValue DAGCombiner::visitMULHU(SDNode *N) {
4126
8.94k
  SDValue N0 = N->getOperand(0);
4127
8.94k
  SDValue N1 = N->getOperand(1);
4128
8.94k
  EVT VT = N->getValueType(0);
4129
8.94k
  SDLoc DL(N);
4130
8.94k
4131
8.94k
  if (VT.isVector()) {
4132
1.35k
    // fold (mulhu x, 0) -> 0
4133
1.35k
    if (ISD::isBuildVectorAllZeros(N1.getNode()))
4134
0
      return N1;
4135
1.35k
    if (ISD::isBuildVectorAllZeros(N0.getNode()))
4136
0
      return N0;
4137
8.94k
  }
4138
8.94k
4139
8.94k
  // fold (mulhu x, 0) -> 0
4140
8.94k
  if (isNullConstant(N1))
4141
6
    return N1;
4142
8.93k
  // fold (mulhu x, 1) -> 0
4143
8.93k
  if (isOneConstant(N1))
4144
0
    return DAG.getConstant(0, DL, N0.getValueType());
4145
8.93k
  // fold (mulhu x, undef) -> 0
4146
8.93k
  if (N0.isUndef() || N1.isUndef())
4147
0
    return DAG.getConstant(0, DL, VT);
4148
8.93k
4149
8.93k
  // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4150
8.93k
  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4151
8.93k
      
DAG.isKnownToBeAPowerOfTwo(N1)3.24k
&&
hasOperation(ISD::SRL, VT)378
) {
4152
322
    unsigned NumEltBits = VT.getScalarSizeInBits();
4153
322
    SDValue LogBase2 = BuildLogBase2(N1, DL);
4154
322
    SDValue SRLAmt = DAG.getNode(
4155
322
        ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4156
322
    EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4157
322
    SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4158
322
    return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4159
322
  }
4160
8.61k
4161
8.61k
  // If the type twice as wide is legal, transform the mulhu to a wider multiply
4162
8.61k
  // plus a shift.
4163
8.61k
  if (VT.isSimple() && !VT.isVector()) {
4164
7.34k
    MVT Simple = VT.getSimpleVT();
4165
7.34k
    unsigned SimpleSize = Simple.getSizeInBits();
4166
7.34k
    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4167
7.34k
    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4168
754
      N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4169
754
      N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4170
754
      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4171
754
      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4172
754
            DAG.getConstant(SimpleSize, DL,
4173
754
                            getShiftAmountTy(N1.getValueType())));
4174
754
      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4175
754
    }
4176
7.85k
  }
4177
7.85k
4178
7.85k
  return SDValue();
4179
7.85k
}
4180
4181
/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4182
/// give the opcodes for the two computations that are being performed. Return
4183
/// true if a simplification was made.
4184
SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4185
8.70k
                                                unsigned HiOp) {
4186
8.70k
  // If the high half is not needed, just compute the low half.
4187
8.70k
  bool HiExists = N->hasAnyUseOfValue(1);
4188
8.70k
  if (!HiExists && 
(58
!LegalOperations58
||
4189
58
                    
TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0))7
)) {
4190
51
    SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4191
51
    return CombineTo(N, Res, Res);
4192
51
  }
4193
8.65k
4194
8.65k
  // If the low half is not needed, just compute the high half.
4195
8.65k
  bool LoExists = N->hasAnyUseOfValue(0);
4196
8.65k
  if (!LoExists && 
(2.36k
!LegalOperations2.36k
||
4197
2.36k
                    
TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1))1.00k
)) {
4198
1.35k
    SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4199
1.35k
    return CombineTo(N, Res, Res);
4200
1.35k
  }
4201
7.29k
4202
7.29k
  // If both halves are used, return as it is.
4203
7.29k
  if (LoExists && 
HiExists6.29k
)
4204
6.28k
    return SDValue();
4205
1.01k
4206
1.01k
  // If the two computed results can be simplified separately, separate them.
4207
1.01k
  if (LoExists) {
4208
7
    SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4209
7
    AddToWorklist(Lo.getNode());
4210
7
    SDValue LoOpt = combine(Lo.getNode());
4211
7
    if (LoOpt.getNode() && 
LoOpt.getNode() != Lo.getNode()0
&&
4212
7
        
(0
!LegalOperations0
||
4213
0
         TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4214
0
      return CombineTo(N, LoOpt, LoOpt);
4215
1.01k
  }
4216
1.01k
4217
1.01k
  if (HiExists) {
4218
1.00k
    SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4219
1.00k
    AddToWorklist(Hi.getNode());
4220
1.00k
    SDValue HiOpt = combine(Hi.getNode());
4221
1.00k
    if (HiOpt.getNode() && 
HiOpt != Hi0
&&
4222
1.00k
        
(0
!LegalOperations0
||
4223
0
         TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4224
0
      return CombineTo(N, HiOpt, HiOpt);
4225
1.01k
  }
4226
1.01k
4227
1.01k
  return SDValue();
4228
1.01k
}
4229
4230
889
SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4231
889
  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4232
298
    return Res;
4233
591
4234
591
  EVT VT = N->getValueType(0);
4235
591
  SDLoc DL(N);
4236
591
4237
591
  // If the type is twice as wide is legal, transform the mulhu to a wider
4238
591
  // multiply plus a shift.
4239
591
  if (VT.isSimple() && !VT.isVector()) {
4240
591
    MVT Simple = VT.getSimpleVT();
4241
591
    unsigned SimpleSize = Simple.getSizeInBits();
4242
591
    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4243
591
    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4244
10
      SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4245
10
      SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4246
10
      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4247
10
      // Compute the high part as N1.
4248
10
      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4249
10
            DAG.getConstant(SimpleSize, DL,
4250
10
                            getShiftAmountTy(Lo.getValueType())));
4251
10
      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4252
10
      // Compute the low part as N0.
4253
10
      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4254
10
      return CombineTo(N, Lo, Hi);
4255
10
    }
4256
581
  }
4257
581
4258
581
  return SDValue();
4259
581
}
4260
4261
7.81k
SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4262
7.81k
  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4263
1.10k
    return Res;
4264
6.70k
4265
6.70k
  EVT VT = N->getValueType(0);
4266
6.70k
  SDLoc DL(N);
4267
6.70k
4268
6.70k
  // If the type is twice as wide is legal, transform the mulhu to a wider
4269
6.70k
  // multiply plus a shift.
4270
6.70k
  if (VT.isSimple() && !VT.isVector()) {
4271
6.70k
    MVT Simple = VT.getSimpleVT();
4272
6.70k
    unsigned SimpleSize = Simple.getSizeInBits();
4273
6.70k
    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4274
6.70k
    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4275
3
      SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4276
3
      SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4277
3
      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4278
3
      // Compute the high part as N1.
4279
3
      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4280
3
            DAG.getConstant(SimpleSize, DL,
4281
3
                            getShiftAmountTy(Lo.getValueType())));
4282
3
      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4283
3
      // Compute the low part as N0.
4284
3
      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4285
3
      return CombineTo(N, Lo, Hi);
4286
3
    }
4287
6.70k
  }
4288
6.70k
4289
6.70k
  return SDValue();
4290
6.70k
}
4291
4292
1.52k
SDValue DAGCombiner::visitMULO(SDNode *N) {
4293
1.52k
  bool IsSigned = (ISD::SMULO == N->getOpcode());
4294
1.52k
4295
1.52k
  // (mulo x, 2) -> (addo x, x)
4296
1.52k
  if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1)))
4297
595
    if (C2->getAPIntValue() == 2)
4298
19
      return DAG.getNode(IsSigned ? 
ISD::SADDO8
:
ISD::UADDO11
, SDLoc(N),
4299
19
                         N->getVTList(), N->getOperand(0), N->getOperand(0));
4300
1.50k
4301
1.50k
  return SDValue();
4302
1.50k
}
4303
4304
41.7k
SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4305
41.7k
  SDValue N0 = N->getOperand(0);
4306
41.7k
  SDValue N1 = N->getOperand(1);
4307
41.7k
  EVT VT = N0.getValueType();
4308
41.7k
4309
41.7k
  // fold vector ops
4310
41.7k
  if (VT.isVector())
4311
35.3k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
4312
392
      return FoldedVOp;
4313
41.3k
4314
41.3k
  // fold operation with constant operands.
4315
41.3k
  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4316
41.3k
  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
4317
41.3k
  if (N0C && 
N1C0
)
4318
0
    return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
4319
41.3k
4320
41.3k
  // canonicalize constant to RHS
4321
41.3k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4322
41.3k
     
!DAG.isConstantIntBuildVectorOrConstantInt(N1)10
)
4323
10
    return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4324
41.3k
4325
41.3k
  // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4326
41.3k
  // Only do this if the current op isn't legal and the flipped is.
4327
41.3k
  unsigned Opcode = N->getOpcode();
4328
41.3k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4329
41.3k
  if (!TLI.isOperationLegal(Opcode, VT) &&
4330
41.3k
      
(10.1k
N0.isUndef()10.1k
||
DAG.SignBitIsZero(N0)10.1k
) &&
4331
41.3k
      
(164
N1.isUndef()164
||
DAG.SignBitIsZero(N1)164
)) {
4332
130
    unsigned AltOpcode;
4333
130
    switch (Opcode) {
4334
130
    
case ISD::SMIN: AltOpcode = ISD::UMIN; break1
;
4335
130
    
case ISD::SMAX: AltOpcode = ISD::UMAX; break1
;
4336
130
    
case ISD::UMIN: AltOpcode = ISD::SMIN; break71
;
4337
130
    
case ISD::UMAX: AltOpcode = ISD::SMAX; break57
;
4338
130
    
default: 0
llvm_unreachable0
("Unknown MINMAX opcode");
4339
130
    }
4340
130
    if (TLI.isOperationLegal(AltOpcode, VT))
4341
11
      return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4342
41.3k
  }
4343
41.3k
4344
41.3k
  return SDValue();
4345
41.3k
}
4346
4347
/// If this is a bitwise logic instruction and both operands have the same
4348
/// opcode, try to sink the other opcode after the logic instruction.
4349
106k
SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4350
106k
  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4351
106k
  EVT VT = N0.getValueType();
4352
106k
  unsigned LogicOpcode = N->getOpcode();
4353
106k
  unsigned HandOpcode = N0.getOpcode();
4354
106k
  assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4355
106k
          LogicOpcode == ISD::XOR) && "Expected logic opcode");
4356
106k
  assert(HandOpcode == N1.getOpcode() && "Bad input!");
4357
106k
4358
106k
  // Bail early if none of these transforms apply.
4359
106k
  if (N0.getNumOperands() == 0)
4360
1.49k
    return SDValue();
4361
104k
4362
104k
  // FIXME: We should check number of uses of the operands to not increase
4363
104k
  //        the instruction count for all transforms.
4364
104k
4365
104k
  // Handle size-changing casts.
4366
104k
  SDValue X = N0.getOperand(0);
4367
104k
  SDValue Y = N1.getOperand(0);
4368
104k
  EVT XVT = X.getValueType();
4369
104k
  SDLoc DL(N);
4370
104k
  if (HandOpcode == ISD::ANY_EXTEND || 
HandOpcode == ISD::ZERO_EXTEND104k
||
4371
104k
      
HandOpcode == ISD::SIGN_EXTEND104k
) {
4372
602
    // If both operands have other uses, this transform would create extra
4373
602
    // instructions without eliminating anything.
4374
602
    if (!N0.hasOneUse() && 
!N1.hasOneUse()110
)
4375
23
      return SDValue();
4376
579
    // We need matching integer source types.
4377
579
    if (XVT != Y.getValueType())
4378
19
      return SDValue();
4379
560
    // Don't create an illegal op during or after legalization. Don't ever
4380
560
    // create an unsupported vector op.
4381
560
    if ((VT.isVector() || 
LegalOperations516
) &&
4382
560
        
!TLI.isOperationLegalOrCustom(LogicOpcode, XVT)386
)
4383
30
      return SDValue();
4384
530
    // Avoid infinite looping with PromoteIntBinOp.
4385
530
    // TODO: Should we apply desirable/legal constraints to all opcodes?
4386
530
    if (HandOpcode == ISD::ANY_EXTEND && 
LegalTypes340
&&
4387
530
        
!TLI.isTypeDesirableForOp(LogicOpcode, XVT)340
)
4388
332
      return SDValue();
4389
198
    // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4390
198
    SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4391
198
    return DAG.getNode(HandOpcode, DL, VT, Logic);
4392
198
  }
4393
104k
4394
104k
  // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4395
104k
  if (HandOpcode == ISD::TRUNCATE) {
4396
1.96k
    // If both operands have other uses, this transform would create extra
4397
1.96k
    // instructions without eliminating anything.
4398
1.96k
    if (!N0.hasOneUse() && 
!N1.hasOneUse()152
)
4399
72
      return SDValue();
4400
1.89k
    // We need matching source types.
4401
1.89k
    if (XVT != Y.getValueType())
4402
53
      return SDValue();
4403
1.84k
    // Don't create an illegal op during or after legalization.
4404
1.84k
    if (LegalOperations && 
!TLI.isOperationLegal(LogicOpcode, XVT)399
)
4405
5
      return SDValue();
4406
1.83k
    // Be extra careful sinking truncate. If it's free, there's no benefit in
4407
1.83k
    // widening a binop. Also, don't create a logic op on an illegal type.
4408
1.83k
    if (TLI.isZExtFree(VT, XVT) && 
TLI.isTruncateFree(XVT, VT)554
)
4409
554
      return SDValue();
4410
1.28k
    if (!TLI.isTypeLegal(XVT))
4411
29
      return SDValue();
4412
1.25k
    SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4413
1.25k
    return DAG.getNode(HandOpcode, DL, VT, Logic);
4414
1.25k
  }
4415
102k
4416
102k
  // For binops SHL/SRL/SRA/AND:
4417
102k
  //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4418
102k
  if ((HandOpcode == ISD::SHL || 
HandOpcode == ISD::SRL100k
||
4419
102k
       
HandOpcode == ISD::SRA100k
||
HandOpcode == ISD::AND100k
) &&
4420
102k
      
N0.getOperand(1) == N1.getOperand(1)17.8k
) {
4421
343
    // If either operand has other uses, this transform is not an improvement.
4422
343
    if (!N0.hasOneUse() || 
!N1.hasOneUse()93
)
4423
280
      return SDValue();
4424
63
    SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4425
63
    return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4426
63
  }
4427
101k
4428
101k
  // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4429
101k
  if (HandOpcode == ISD::BSWAP) {
4430
8
    // If either operand has other uses, this transform is not an improvement.
4431
8
    if (!N0.hasOneUse() || 
!N1.hasOneUse()4
)
4432
4
      return SDValue();
4433
4
    SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4434
4
    return DAG.getNode(HandOpcode, DL, VT, Logic);
4435
4
  }
4436
101k
4437
101k
  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4438
101k
  // Only perform this optimization up until type legalization, before
4439
101k
  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4440
101k
  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4441
101k
  // we don't want to undo this promotion.
4442
101k
  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4443
101k
  // on scalars.
4444
101k
  if ((HandOpcode == ISD::BITCAST || 
HandOpcode == ISD::SCALAR_TO_VECTOR96.3k
) &&
4445
101k
       
Level <= AfterLegalizeTypes5.37k
) {
4446
602
    // Input types must be integer and the same.
4447
602
    if (XVT.isInteger() && 
XVT == Y.getValueType()133
) {
4448
126
      SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4449
126
      return DAG.getNode(HandOpcode, DL, VT, Logic);
4450
126
    }
4451
101k
  }
4452
101k
4453
101k
  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4454
101k
  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4455
101k
  // If both shuffles use the same mask, and both shuffle within a single
4456
101k
  // vector, then it is worthwhile to move the swizzle after the operation.
4457
101k
  // The type-legalizer generates this pattern when loading illegal
4458
101k
  // vector types from memory. In many cases this allows additional shuffle
4459
101k
  // optimizations.
4460
101k
  // There are other cases where moving the shuffle after the xor/and/or
4461
101k
  // is profitable even if shuffles don't perform a swizzle.
4462
101k
  // If both shuffles use the same mask, and both shuffles have the same first
4463
101k
  // or second operand, then it might still be profitable to move the shuffle
4464
101k
  // after the xor/and/or operation.
4465
101k
  if (HandOpcode == ISD::VECTOR_SHUFFLE && 
Level < AfterLegalizeDAG2.15k
) {
4466
2.15k
    auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4467
2.15k
    auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4468
2.15k
    assert(X.getValueType() == Y.getValueType() &&
4469
2.15k
           "Inputs to shuffles are not the same type");
4470
2.15k
4471
2.15k
    // Check that both shuffles use the same mask. The masks are known to be of
4472
2.15k
    // the same length because the result vector type is the same.
4473
2.15k
    // Check also that shuffles have only one use to avoid introducing extra
4474
2.15k
    // instructions.
4475
2.15k
    if (!SVN0->hasOneUse() || 
!SVN1->hasOneUse()2.14k
||
4476
2.15k
        
!SVN0->getMask().equals(SVN1->getMask())2.03k
)
4477
1.93k
      return SDValue();
4478
218
4479
218
    // Don't try to fold this node if it requires introducing a
4480
218
    // build vector of all zeros that might be illegal at this stage.
4481
218
    SDValue ShOp = N0.getOperand(1);
4482
218
    if (LogicOpcode == ISD::XOR && 
!ShOp.isUndef()44
)
4483
44
      ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4484
218
4485
218
    // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4486
218
    if (N0.getOperand(1) == N1.getOperand(1) && 
ShOp.getNode()42
) {
4487
42
      SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4488
42
                                  N0.getOperand(0), N1.getOperand(0));
4489
42
      return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4490
42
    }
4491
176
4492
176
    // Don't try to fold this node if it requires introducing a
4493
176
    // build vector of all zeros that might be illegal at this stage.
4494
176
    ShOp = N0.getOperand(0);
4495
176
    if (LogicOpcode == ISD::XOR && 
!ShOp.isUndef()30
)
4496
30
      ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4497
176
4498
176
    // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4499
176
    if (N0.getOperand(0) == N1.getOperand(0) && 
ShOp.getNode()36
) {
4500
36
      SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4501
36
                                  N1.getOperand(1));
4502
36
      return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4503
36
    }
4504
99.5k
  }
4505
99.5k
4506
99.5k
  return SDValue();
4507
99.5k
}
4508
4509
/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4510
SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4511
969k
                                       const SDLoc &DL) {
4512
969k
  SDValue LL, LR, RL, RR, N0CC, N1CC;
4513
969k
  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4514
969k
      
!isSetCCEquivalent(N1, RL, RR, N1CC)252k
)
4515
954k
    return SDValue();
4516
15.7k
4517
15.7k
  assert(N0.getValueType() == N1.getValueType() &&
4518
15.7k
         "Unexpected operand types for bitwise logic op");
4519
15.7k
  assert(LL.getValueType() == LR.getValueType() &&
4520
15.7k
         RL.getValueType() == RR.getValueType() &&
4521
15.7k
         "Unexpected operand types for setcc");
4522
15.7k
4523
15.7k
  // If we're here post-legalization or the logic op type is not i1, the logic
4524
15.7k
  // op type must match a setcc result type. Also, all folds require new
4525
15.7k
  // operations on the left and right operands, so those types must match.
4526
15.7k
  EVT VT = N0.getValueType();
4527
15.7k
  EVT OpVT = LL.getValueType();
4528
15.7k
  if (LegalOperations || 
VT.getScalarType() != MVT::i114.6k
)
4529
3.07k
    if (VT != getSetCCResultType(OpVT))
4530
34
      return SDValue();
4531
15.7k
  if (OpVT != RL.getValueType())
4532
496
    return SDValue();
4533
15.2k
4534
15.2k
  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4535
15.2k
  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4536
15.2k
  bool IsInteger = OpVT.isInteger();
4537
15.2k
  if (LR == RR && 
CC0 == CC111.0k
&&
IsInteger10.5k
) {
4538
10.3k
    bool IsZero = isNullOrNullSplat(LR);
4539
10.3k
    bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4540
10.3k
4541
10.3k
    // All bits clear?
4542
10.3k
    bool AndEqZero = IsAnd && 
CC1 == ISD::SETEQ4.35k
&&
IsZero4.18k
;
4543
10.3k
    // All sign bits clear?
4544
10.3k
    bool AndGtNeg1 = IsAnd && 
CC1 == ISD::SETGT4.35k
&&
IsNeg111
;
4545
10.3k
    // Any bits set?
4546
10.3k
    bool OrNeZero = !IsAnd && 
CC1 == ISD::SETNE5.98k
&&
IsZero5.59k
;
4547
10.3k
    // Any sign bits set?
4548
10.3k
    bool OrLtZero = !IsAnd && 
CC1 == ISD::SETLT5.98k
&&
IsZero14
;
4549
10.3k
4550
10.3k
    // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4551
10.3k
    // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4552
10.3k
    // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4553
10.3k
    // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4554
10.3k
    if (AndEqZero || 
AndGtNeg16.16k
||
OrNeZero6.15k
||
OrLtZero574
) {
4555
9.76k
      SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4556
9.76k
      AddToWorklist(Or.getNode());
4557
9.76k
      return DAG.getSetCC(DL, VT, Or, LR, CC1);
4558
9.76k
    }
4559
567
4560
567
    // All bits set?
4561
567
    bool AndEqNeg1 = IsAnd && 
CC1 == ISD::SETEQ178
&&
IsNeg113
;
4562
567
    // All sign bits set?
4563
567
    bool AndLtZero = IsAnd && 
CC1 == ISD::SETLT178
&&
IsZero10
;
4564
567
    // Any bits clear?
4565
567
    bool OrNeNeg1 = !IsAnd && 
CC1 == ISD::SETNE389
&&
IsNeg17
;
4566
567
    // Any sign bits clear?
4567
567
    bool OrGtNeg1 = !IsAnd && 
CC1 == ISD::SETGT389
&&
IsNeg15
;
4568
567
4569
567
    // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4570
567
    // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
4571
567
    // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4572
567
    // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
4573
567
    if (AndEqNeg1 || 
AndLtZero560
||
OrNeNeg1555
||
OrGtNeg1550
) {
4574
22
      SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4575
22
      AddToWorklist(And.getNode());
4576
22
      return DAG.getSetCC(DL, VT, And, LR, CC1);
4577
22
    }
4578
5.47k
  }
4579
5.47k
4580
5.47k
  // TODO: What is the 'or' equivalent of this fold?
4581
5.47k
  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4582
5.47k
  if (IsAnd && 
LL == RL3.69k
&&
CC0 == CC1421
&&
OpVT.getScalarSizeInBits() > 1127
&&
4583
5.47k
      
IsInteger112
&&
CC0 == ISD::SETNE102
&&
4584
5.47k
      
(93
(93
isNullConstant(LR)93
&&
isAllOnesConstant(RR)0
) ||
4585
93
       (isAllOnesConstant(LR) && 
isNullConstant(RR)3
))) {
4586
3
    SDValue One = DAG.getConstant(1, DL, OpVT);
4587
3
    SDValue Two = DAG.getConstant(2, DL, OpVT);
4588
3
    SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4589
3
    AddToWorklist(Add.getNode());
4590
3
    return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4591
3
  }
4592
5.46k
4593
5.46k
  // Try more general transforms if the predicates match and the only user of
4594
5.46k
  // the compares is the 'and' or 'or'.
4595
5.46k
  if (IsInteger && 
TLI.convertSetCCLogicToBitwiseLogic(OpVT)4.54k
&&
CC0 == CC11.10k
&&
4596
5.46k
      
N0.hasOneUse()494
&&
N1.hasOneUse()473
) {
4597
473
    // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4598
473
    // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4599
473
    if ((IsAnd && 
CC1 == ISD::SETEQ189
) ||
(455
!IsAnd455
&&
CC1 == ISD::SETNE284
)) {
4600
90
      SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4601
90
      SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4602
90
      SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4603
90
      SDValue Zero = DAG.getConstant(0, DL, OpVT);
4604
90
      return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4605
90
    }
4606
383
4607
383
    // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4608
383
    // TODO - support non-uniform vector amounts.
4609
383
    if ((IsAnd && 
CC1 == ISD::SETNE171
) ||
(252
!IsAnd252
&&
CC1 == ISD::SETEQ212
)) {
4610
271
      // Match a shared variable operand and 2 non-opaque constant operands.
4611
271
      ConstantSDNode *C0 = isConstOrConstSplat(LR);
4612
271
      ConstantSDNode *C1 = isConstOrConstSplat(RR);
4613
271
      if (LL == RL && 
C070
&&
C124
&&
!C0->isOpaque()17
&&
!C1->isOpaque()17
) {
4614
17
        // Canonicalize larger constant as C0.
4615
17
        if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4616
14
          std::swap(C0, C1);
4617
17
4618
17
        // The difference of the constants must be a single bit.
4619
17
        const APInt &C0Val = C0->getAPIntValue();
4620
17
        const APInt &C1Val = C1->getAPIntValue();
4621
17
        if ((C0Val - C1Val).isPowerOf2()) {
4622
8
          // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4623
8
          // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4624
8
          SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4625
8
          SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4626
8
          SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4627
8
          SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4628
8
          SDValue Zero = DAG.getConstant(0, DL, OpVT);
4629
8
          return DAG.getSetCC(DL, VT, And, Zero, CC0);
4630
8
        }
4631
5.37k
      }
4632
271
    }
4633
383
  }
4634
5.37k
4635
5.37k
  // Canonicalize equivalent operands to LL == RL.
4636
5.37k
  if (LL == RR && 
LR == RL142
) {
4637
0
    CC1 = ISD::getSetCCSwappedOperands(CC1);
4638
0
    std::swap(RL, RR);
4639
0
  }
4640
5.37k
4641
5.37k
  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4642
5.37k
  // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4643
5.37k
  if (LL == RL && 
LR == RR687
) {
4644
97
    ISD::CondCode NewCC = IsAnd ? 
ISD::getSetCCAndOperation(CC0, CC1, IsInteger)45
4645
97
                                : 
ISD::getSetCCOrOperation(CC0, CC1, IsInteger)52
;
4646
97
    if (NewCC != ISD::SETCC_INVALID &&
4647
97
        
(87
!LegalOperations87
||
4648
87
         
(55
TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType())55
&&
4649
55
          
TLI.isOperationLegal(ISD::SETCC, OpVT)0
)))
4650
32
      return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4651
5.33k
  }
4652
5.33k
4653
5.33k
  return SDValue();
4654
5.33k
}
4655
4656
/// This contains all DAGCombine rules which reduce two values combined by
4657
/// an And operation to a single value. This makes them reusable in the context
4658
/// of visitSELECT(). Rules involving constants are not included as
4659
/// visitSELECT() already handles those cases.
4660
744k
SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4661
744k
  EVT VT = N1.getValueType();
4662
744k
  SDLoc DL(N);
4663
744k
4664
744k
  // fold (and x, undef) -> 0
4665
744k
  if (N0.isUndef() || 
N1.isUndef()744k
)
4666
5
    return DAG.getConstant(0, DL, VT);
4667
744k
4668
744k
  if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4669
4.23k
    return V;
4670
740k
4671
740k
  if (N0.getOpcode() == ISD::ADD && 
N1.getOpcode() == ISD::SRL30.1k
&&
4672
740k
      
VT.getSizeInBits() <= 64218
) {
4673
218
    if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4674
218
      if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4675
17
        // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4676
17
        // immediate for an add, but it is legal if its top c2 bits are set,
4677
17
        // transform the ADD so the immediate doesn't need to be materialized
4678
17
        // in a register.
4679
17
        APInt ADDC = ADDI->getAPIntValue();
4680
17
        APInt SRLC = SRLI->getAPIntValue();
4681
17
        if (ADDC.getMinSignedBits() <= 64 &&
4682
17
            SRLC.ult(VT.getSizeInBits()) &&
4683
17
            !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4684
2
          APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4685
2
                                             SRLC.getZExtValue());
4686
2
          if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4687
2
            ADDC |= Mask;
4688
2
            if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4689
2
              SDLoc DL0(N0);
4690
2
              SDValue NewAdd =
4691
2
                DAG.getNode(ISD::ADD, DL0, VT,
4692
2
                            N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4693
2
              CombineTo(N0.getNode(), NewAdd);
4694
2
              // Return N so it doesn't get rechecked!
4695
2
              return SDValue(N, 0);
4696
2
            }
4697
740k
          }
4698
2
        }
4699
17
      }
4700
218
    }
4701
218
  }
4702
740k
4703
740k
  // Reduce bit extract of low half of an integer to the narrower type.
4704
740k
  // (and (srl i64:x, K), KMask) ->
4705
740k
  //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4706
740k
  if (N0.getOpcode() == ISD::SRL && 
N0.hasOneUse()58.2k
) {
4707
52.1k
    if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4708
50.1k
      if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4709
46.7k
        unsigned Size = VT.getSizeInBits();
4710
46.7k
        const APInt &AndMask = CAnd->getAPIntValue();
4711
46.7k
        unsigned ShiftBits = CShift->getZExtValue();
4712
46.7k
4713
46.7k
        // Bail out, this node will probably disappear anyway.
4714
46.7k
        if (ShiftBits == 0)
4715
0
          return SDValue();
4716
46.7k
4717
46.7k
        unsigned MaskBits = AndMask.countTrailingOnes();
4718
46.7k
        EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4719
46.7k
4720
46.7k
        if (AndMask.isMask() &&
4721
46.7k
            // Required bits must not span the two halves of the integer and
4722
46.7k
            // must fit in the half size type.
4723
46.7k
            
(ShiftBits + MaskBits <= Size / 2)35.8k
&&
4724
46.7k
            
TLI.isNarrowingProfitable(VT, HalfVT)15.3k
&&
4725
46.7k
            
TLI.isTypeDesirableForOp(ISD::AND, HalfVT)356
&&
4726
46.7k
            
TLI.isTypeDesirableForOp(ISD::SRL, HalfVT)355
&&
4727
46.7k
            
TLI.isTruncateFree(VT, HalfVT)355
&&
4728
46.7k
            
TLI.isZExtFree(HalfVT, VT)355
) {
4729
320
          // The isNarrowingProfitable is to avoid regressions on PPC and
4730
320
          // AArch64 which match a few 64-bit bit insert / bit extract patterns
4731
320
          // on downstream users of this. Those patterns could probably be
4732
320
          // extended to handle extensions mixed in.
4733
320
4734
320
          SDValue SL(N0);
4735
320
          assert(MaskBits <= Size);
4736
320
4737
320
          // Extracting the highest bit of the low half.
4738
320
          EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4739
320
          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4740
320
                                      N0.getOperand(0));
4741
320
4742
320
          SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4743
320
          SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4744
320
          SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4745
320
          SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4746
320
          return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4747
320
        }
4748
739k
      }
4749
50.1k
    }
4750
52.1k
  }
4751
739k
4752
739k
  return SDValue();
4753
739k
}
4754
4755
bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4756
1.45k
                                   EVT LoadResultTy, EVT &ExtVT) {
4757
1.45k
  if (!AndC->getAPIntValue().isMask())
4758
0
    return false;
4759
1.45k
4760
1.45k
  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4761
1.45k
4762
1.45k
  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4763
1.45k
  EVT LoadedVT = LoadN->getMemoryVT();
4764
1.45k
4765
1.45k
  if (ExtVT == LoadedVT &&
4766
1.45k
      
(53
!LegalOperations53
||
4767
53
       
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)13
)) {
4768
53
    // ZEXTLOAD will match without needing to change the size of the value being
4769
53
    // loaded.
4770
53
    return true;
4771
53
  }
4772
1.39k
4773
1.39k
  // Do not change the width of a volatile load.
4774
1.39k
  if (LoadN->isVolatile())
4775
3
    return false;
4776
1.39k
4777
1.39k
  // Do not generate loads of non-round integer types since these can
4778
1.39k
  // be expensive (and would be wrong if the type is not byte sized).
4779
1.39k
  if (!LoadedVT.bitsGT(ExtVT) || 
!ExtVT.isRound()1.38k
)
4780
841
    return false;
4781
554
4782
554
  if (LegalOperations &&
4783
554
      
!TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)453
)
4784
408
    return false;
4785
146
4786
146
  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4787
0
    return false;
4788
146
4789
146
  return true;
4790
146
}
4791
4792
bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4793
                                    ISD::LoadExtType ExtType, EVT &MemVT,
4794
122k
                                    unsigned ShAmt) {
4795
122k
  if (!LDST)
4796
0
    return false;
4797
122k
  // Only allow byte offsets.
4798
122k
  if (ShAmt % 8)
4799
19.3k
    return false;
4800
102k
4801
102k
  // Do not generate loads of non-round integer types since these can
4802
102k
  // be expensive (and would be wrong if the type is not byte sized).
4803
102k
  if (!MemVT.isRound())
4804
34.2k
    return false;
4805
68.6k
4806
68.6k
  // Don't change the width of a volatile load.
4807
68.6k
  if (LDST->isVolatile())
4808
1.19k
    return false;
4809
67.4k
4810
67.4k
  // Verify that we are actually reducing a load width here.
4811
67.4k
  if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4812
7.85k
    return false;
4813
59.5k
4814
59.5k
  // Ensure that this isn't going to produce an unsupported unaligned access.
4815
59.5k
  if (ShAmt &&
4816
59.5k
      !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4817
14.0k
                              LDST->getAddressSpace(), ShAmt / 8,
4818
14.0k
                              LDST->getMemOperand()->getFlags()))
4819
6
    return false;
4820
59.5k
4821
59.5k
  // It's not possible to generate a constant of extended or untyped type.
4822
59.5k
  EVT PtrType = LDST->getBasePtr().getValueType();
4823
59.5k
  if (PtrType == MVT::Untyped || PtrType.isExtended())
4824
0
    return false;
4825
59.5k
4826
59.5k
  if (isa<LoadSDNode>(LDST)) {
4827
59.5k
    LoadSDNode *Load = cast<LoadSDNode>(LDST);
4828
59.5k
    // Don't transform one with multiple uses, this would require adding a new
4829
59.5k
    // load.
4830
59.5k
    if (!SDValue(Load, 0).hasOneUse())
4831
40.9k
      return false;
4832
18.5k
4833
18.5k
    if (LegalOperations &&
4834
18.5k
        
!TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT)9.43k
)
4835
8.39k
      return false;
4836
10.1k
4837
10.1k
    // For the transform to be legal, the load must produce only two values
4838
10.1k
    // (the value loaded and the chain).  Don't transform a pre-increment
4839
10.1k
    // load, for example, which produces an extra value.  Otherwise the
4840
10.1k
    // transformation is not equivalent, and the downstream logic to replace
4841
10.1k
    // uses gets things wrong.
4842
10.1k
    if (Load->getNumValues() > 2)
4843
0
      return false;
4844
10.1k
4845
10.1k
    // If the load that we're shrinking is an extload and we're not just
4846
10.1k
    // discarding the extension we can't simply shrink the load. Bail.
4847
10.1k
    // TODO: It would be possible to merge the extensions in some cases.
4848
10.1k
    if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4849
10.1k
        
Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt4.72k
)
4850
1
      return false;
4851
10.1k
4852
10.1k
    if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4853
2.07k
      return false;
4854
0
  } else {
4855
0
    assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4856
0
    StoreSDNode *Store = cast<StoreSDNode>(LDST);
4857
0
    // Can't write outside the original store
4858
0
    if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4859
0
      return false;
4860
0
4861
0
    if (LegalOperations &&
4862
0
        !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4863
0
      return false;
4864
8.11k
  }
4865
8.11k
  return true;
4866
8.11k
}
4867
4868
bool DAGCombiner::SearchForAndLoads(SDNode *N,
4869
                                    SmallVectorImpl<LoadSDNode*> &Loads,
4870
                                    SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4871
                                    ConstantSDNode *Mask,
4872
434k
                                    SDNode *&NodeToMask) {
4873
434k
  // Recursively search for the operands, looking for loads which can be
4874
434k
  // narrowed.
4875
820k
  for (SDValue Op : N->op_values()) {
4876
820k
    if (Op.getValueType().isVector())
4877
0
      return false;
4878
820k
4879
820k
    // Some constants may need fixing up later if they are too large.
4880
820k
    if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4881
383k
      if ((N->getOpcode() == ISD::OR || 
N->getOpcode() == ISD::XOR383k
) &&
4882
383k
          
(Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue()1.70k
)
4883
1.14k
        NodesWithConsts.insert(N);
4884
383k
      continue;
4885
383k
    }
4886
437k
4887
437k
    if (!Op.hasOneUse())
4888
37.9k
      return false;
4889
399k
4890
399k
    switch(Op.getOpcode()) {
4891
399k
    case ISD::LOAD: {
4892
1.43k
      auto *Load = cast<LoadSDNode>(Op);
4893
1.43k
      EVT ExtVT;
4894
1.43k
      if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4895
1.43k
          
isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)199
) {
4896
185
4897
185
        // ZEXTLOAD is already small enough.
4898
185
        if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4899
185
            
ExtVT.bitsGE(Load->getMemoryVT())13
)
4900
9
          continue;
4901
176
4902
176
        // Use LE to convert equal sized loads to zext.
4903
176
        if (ExtVT.bitsLE(Load->getMemoryVT()))
4904
176
          Loads.push_back(Load);
4905
176
4906
176
        continue;
4907
176
      }
4908
1.25k
      return false;
4909
1.25k
    }
4910
26.3k
    case ISD::ZERO_EXTEND:
4911
26.3k
    case ISD::AssertZext: {
4912
26.3k
      unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4913
26.3k
      EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4914
26.3k
      EVT VT = Op.getOpcode() == ISD::AssertZext ?
4915
25.6k
        cast<VTSDNode>(Op.getOperand(1))->getVT() :
4916
26.3k
        
Op.getOperand(0).getValueType()710
;
4917
26.3k
4918
26.3k
      // We can accept extending nodes if the mask is wider or an equal
4919
26.3k
      // width to the original type.
4920
26.3k
      if (ExtVT.bitsGE(VT))
4921
25.3k
        continue;
4922
1.00k
      break;
4923
1.00k
    }
4924
8.59k
    case ISD::OR:
4925
8.59k
    case ISD::XOR:
4926
8.59k
    case ISD::AND:
4927
8.59k
      if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4928
8.59k
                             NodeToMask))
4929
5.98k
        return false;
4930
2.61k
      continue;
4931
363k
    }
4932
363k
4933
363k
    // Allow one node which will masked along with any loads found.
4934
363k
    if (NodeToMask)
4935
1.53k
      return false;
4936
362k
4937
362k
    // Also ensure that the node to be masked only produces one data result.
4938
362k
    NodeToMask = Op.getNode();
4939
362k
    if (NodeToMask->getNumValues() > 1) {
4940
22.8k
      bool HasValue = false;
4941
65.4k
      for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; 
++i42.5k
) {
4942
46.4k
        MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4943
46.4k
        if (VT != MVT::Glue && 
VT != MVT::Other45.7k
) {
4944
26.7k
          if (HasValue) {
4945
3.90k
            NodeToMask = nullptr;
4946
3.90k
            return false;
4947
3.90k
          }
4948
22.8k
          HasValue = true;
4949
22.8k
        }
4950
46.4k
      }
4951
22.8k
      assert(HasValue && "Node to be masked has no data result?");
4952
18.9k
    }
4953
362k
  }
4954
434k
  
return true384k
;
4955
434k
}
4956
4957
619k
bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4958
619k
  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4959
619k
  if (!Mask)
4960
112k
    return false;
4961
506k
4962
506k
  if (!Mask->getAPIntValue().isMask())
4963
56.2k
    return false;
4964
450k
4965
450k
  // No need to do anything if the and directly uses a load.
4966
450k
  if (isa<LoadSDNode>(N->getOperand(0)))
4967
24.3k
    return false;
4968
426k
4969
426k
  SmallVector<LoadSDNode*, 8> Loads;
4970
426k
  SmallPtrSet<SDNode*, 2> NodesWithConsts;
4971
426k
  SDNode *FixupNode = nullptr;
4972
426k
  if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4973
381k
    if (Loads.size() == 0)
4974
381k
      return false;
4975
92
4976
92
    LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4977
92
    SDValue MaskOp = N->getOperand(1);
4978
92
4979
92
    // If it exists, fixup the single node we allow in the tree that needs
4980
92
    // masking.
4981
92
    if (FixupNode) {
4982
26
      LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4983
26
      SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4984
26
                                FixupNode->getValueType(0),
4985
26
                                SDValue(FixupNode, 0), MaskOp);
4986
26
      DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4987
26
      if (And.getOpcode() == ISD ::AND)
4988
26
        DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4989
26
    }
4990
92
4991
92
    // Narrow any constants that need it.
4992
92
    for (auto *LogicN : NodesWithConsts) {
4993
15
      SDValue Op0 = LogicN->getOperand(0);
4994
15
      SDValue Op1 = LogicN->getOperand(1);
4995
15
4996
15
      if (isa<ConstantSDNode>(Op0))
4997
0
          std::swap(Op0, Op1);
4998
15
4999
15
      SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5000
15
                                Op1, MaskOp);
5001
15
5002
15
      DAG.UpdateNodeOperands(LogicN, Op0, And);
5003
15