Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
10
// both before and after the DAG is legalized.
11
//
12
// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13
// primarily intended to handle simplification opportunities that are implicit
14
// in the LLVM IR and exposed by the various codegen lowering phases.
15
//
16
//===----------------------------------------------------------------------===//
17
18
#include "llvm/ADT/APFloat.h"
19
#include "llvm/ADT/APInt.h"
20
#include "llvm/ADT/ArrayRef.h"
21
#include "llvm/ADT/DenseMap.h"
22
#include "llvm/ADT/IntervalMap.h"
23
#include "llvm/ADT/None.h"
24
#include "llvm/ADT/Optional.h"
25
#include "llvm/ADT/STLExtras.h"
26
#include "llvm/ADT/SetVector.h"
27
#include "llvm/ADT/SmallBitVector.h"
28
#include "llvm/ADT/SmallPtrSet.h"
29
#include "llvm/ADT/SmallSet.h"
30
#include "llvm/ADT/SmallVector.h"
31
#include "llvm/ADT/Statistic.h"
32
#include "llvm/Analysis/AliasAnalysis.h"
33
#include "llvm/Analysis/MemoryLocation.h"
34
#include "llvm/CodeGen/DAGCombine.h"
35
#include "llvm/CodeGen/ISDOpcodes.h"
36
#include "llvm/CodeGen/MachineFrameInfo.h"
37
#include "llvm/CodeGen/MachineFunction.h"
38
#include "llvm/CodeGen/MachineMemOperand.h"
39
#include "llvm/CodeGen/RuntimeLibcalls.h"
40
#include "llvm/CodeGen/SelectionDAG.h"
41
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
42
#include "llvm/CodeGen/SelectionDAGNodes.h"
43
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
44
#include "llvm/CodeGen/TargetLowering.h"
45
#include "llvm/CodeGen/TargetRegisterInfo.h"
46
#include "llvm/CodeGen/TargetSubtargetInfo.h"
47
#include "llvm/CodeGen/ValueTypes.h"
48
#include "llvm/IR/Attributes.h"
49
#include "llvm/IR/Constant.h"
50
#include "llvm/IR/DataLayout.h"
51
#include "llvm/IR/DerivedTypes.h"
52
#include "llvm/IR/Function.h"
53
#include "llvm/IR/LLVMContext.h"
54
#include "llvm/IR/Metadata.h"
55
#include "llvm/Support/Casting.h"
56
#include "llvm/Support/CodeGen.h"
57
#include "llvm/Support/CommandLine.h"
58
#include "llvm/Support/Compiler.h"
59
#include "llvm/Support/Debug.h"
60
#include "llvm/Support/ErrorHandling.h"
61
#include "llvm/Support/KnownBits.h"
62
#include "llvm/Support/MachineValueType.h"
63
#include "llvm/Support/MathExtras.h"
64
#include "llvm/Support/raw_ostream.h"
65
#include "llvm/Target/TargetMachine.h"
66
#include "llvm/Target/TargetOptions.h"
67
#include <algorithm>
68
#include <cassert>
69
#include <cstdint>
70
#include <functional>
71
#include <iterator>
72
#include <string>
73
#include <tuple>
74
#include <utility>
75
76
using namespace llvm;
77
78
#define DEBUG_TYPE "dagcombine"
79
80
STATISTIC(NodesCombined   , "Number of dag nodes combined");
81
STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83
STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
84
STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
85
STATISTIC(SlicedLoads, "Number of load sliced");
86
STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
87
88
static cl::opt<bool>
89
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90
                 cl::desc("Enable DAG combiner's use of IR alias analysis"));
91
92
static cl::opt<bool>
93
UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94
        cl::desc("Enable DAG combiner's use of TBAA"));
95
96
#ifndef NDEBUG
97
static cl::opt<std::string>
98
CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99
                   cl::desc("Only use DAG-combiner alias analysis in this"
100
                            " function"));
101
#endif
102
103
/// Hidden option to stress test load slicing, i.e., when this option
104
/// is enabled, load slicing bypasses most of its profitability guards.
105
static cl::opt<bool>
106
StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107
                  cl::desc("Bypass the profitability model of load slicing"),
108
                  cl::init(false));
109
110
static cl::opt<bool>
111
  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112
                    cl::desc("DAG combiner may split indexing from loads"));
113
114
static cl::opt<unsigned> TokenFactorInlineLimit(
115
    "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
116
    cl::desc("Limit the number of operands to inline for Token Factors"));
117
118
namespace {
119
120
  class DAGCombiner {
121
    SelectionDAG &DAG;
122
    const TargetLowering &TLI;
123
    CombineLevel Level;
124
    CodeGenOpt::Level OptLevel;
125
    bool LegalOperations = false;
126
    bool LegalTypes = false;
127
    bool ForCodeSize;
128
129
    /// Worklist of all of the nodes that need to be simplified.
130
    ///
131
    /// This must behave as a stack -- new nodes to process are pushed onto the
132
    /// back and when processing we pop off of the back.
133
    ///
134
    /// The worklist will not contain duplicates but may contain null entries
135
    /// due to nodes being deleted from the underlying DAG.
136
    SmallVector<SDNode *, 64> Worklist;
137
138
    /// Mapping from an SDNode to its position on the worklist.
139
    ///
140
    /// This is used to find and remove nodes from the worklist (by nulling
141
    /// them) when they are deleted from the underlying DAG. It relies on
142
    /// stable indices of nodes within the worklist.
143
    DenseMap<SDNode *, unsigned> WorklistMap;
144
    /// This records all nodes attempted to add to the worklist since we
145
    /// considered a new worklist entry. As we keep do not add duplicate nodes
146
    /// in the worklist, this is different from the tail of the worklist.
147
    SmallSetVector<SDNode *, 32> PruningList;
148
149
    /// Set of nodes which have been combined (at least once).
150
    ///
151
    /// This is used to allow us to reliably add any operands of a DAG node
152
    /// which have not yet been combined to the worklist.
153
    SmallPtrSet<SDNode *, 32> CombinedNodes;
154
155
    // AA - Used for DAG load/store alias analysis.
156
    AliasAnalysis *AA;
157
158
    /// When an instruction is simplified, add all users of the instruction to
159
    /// the work lists because they might get more simplified now.
160
3.42M
    void AddUsersToWorklist(SDNode *N) {
161
3.42M
      for (SDNode *Node : N->uses())
162
6.44M
        AddToWorklist(Node);
163
3.42M
    }
164
165
    // Prune potentially dangling nodes. This is called after
166
    // any visit to a node, but should also be called during a visit after any
167
    // failed combine which may have created a DAG node.
168
72.3M
    void clearAddedDanglingWorklistEntries() {
169
72.3M
      // Check any nodes added to the worklist to see if they are prunable.
170
244M
      while (!PruningList.empty()) {
171
172M
        auto *N = PruningList.pop_back_val();
172
172M
        if (N->use_empty())
173
2.37M
          recursivelyDeleteUnusedNodes(N);
174
172M
      }
175
72.3M
    }
176
177
72.3M
    SDNode *getNextWorklistEntry() {
178
72.3M
      // Before we do any work, remove nodes that are not in use.
179
72.3M
      clearAddedDanglingWorklistEntries();
180
72.3M
      SDNode *N = nullptr;
181
72.3M
      // The Worklist holds the SDNodes in order, but it may contain null
182
72.3M
      // entries.
183
147M
      while (!N && 
!Worklist.empty()78.3M
) {
184
75.5M
        N = Worklist.pop_back_val();
185
75.5M
      }
186
72.3M
187
72.3M
      if (N) {
188
69.5M
        bool GoodWorklistEntry = WorklistMap.erase(N);
189
69.5M
        (void)GoodWorklistEntry;
190
69.5M
        assert(GoodWorklistEntry &&
191
69.5M
               "Found a worklist entry without a corresponding map entry!");
192
69.5M
      }
193
72.3M
      return N;
194
72.3M
    }
195
196
    /// Call the node-specific routine that folds each particular type of node.
197
    SDValue visit(SDNode *N);
198
199
  public:
200
    DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
201
        : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
202
2.88M
          OptLevel(OL), AA(AA) {
203
2.88M
      ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
204
2.88M
205
2.88M
      MaximumLegalStoreInBits = 0;
206
2.88M
      for (MVT VT : MVT::all_valuetypes())
207
371M
        
if (371M
EVT(VT).isSimple()371M
&& VT != MVT::Other &&
208
371M
            
TLI.isTypeLegal(EVT(VT))368M
&&
209
371M
            
VT.getSizeInBits() >= MaximumLegalStoreInBits51.7M
)
210
32.3M
          MaximumLegalStoreInBits = VT.getSizeInBits();
211
2.88M
    }
212
213
194M
    void ConsiderForPruning(SDNode *N) {
214
194M
      // Mark this for potential pruning.
215
194M
      PruningList.insert(N);
216
194M
    }
217
218
    /// Add to the worklist making sure its instance is at the back (next to be
219
    /// processed.)
220
189M
    void AddToWorklist(SDNode *N) {
221
189M
      assert(N->getOpcode() != ISD::DELETED_NODE &&
222
189M
             "Deleted Node added to Worklist");
223
189M
224
189M
      // Skip handle nodes as they can't usefully be combined and confuse the
225
189M
      // zero-use deletion strategy.
226
189M
      if (N->getOpcode() == ISD::HANDLENODE)
227
22.6k
        return;
228
189M
229
189M
      ConsiderForPruning(N);
230
189M
231
189M
      if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
232
75.5M
        Worklist.push_back(N);
233
189M
    }
234
235
    /// Remove all instances of N from the worklist.
236
9.38M
    void removeFromWorklist(SDNode *N) {
237
9.38M
      CombinedNodes.erase(N);
238
9.38M
      PruningList.remove(N);
239
9.38M
240
9.38M
      auto It = WorklistMap.find(N);
241
9.38M
      if (It == WorklistMap.end())
242
3.39M
        return; // Not in the worklist.
243
5.98M
244
5.98M
      // Null out the entry rather than erasing it to avoid a linear operation.
245
5.98M
      Worklist[It->second] = nullptr;
246
5.98M
      WorklistMap.erase(It);
247
5.98M
    }
248
249
    void deleteAndRecombine(SDNode *N);
250
    bool recursivelyDeleteUnusedNodes(SDNode *N);
251
252
    /// Replaces all uses of the results of one DAG node with new values.
253
    SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
254
                      bool AddTo = true);
255
256
    /// Replaces all uses of the results of one DAG node with new values.
257
374k
    SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
258
374k
      return CombineTo(N, &Res, 1, AddTo);
259
374k
    }
260
261
    /// Replaces all uses of the results of one DAG node with new values.
262
    SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
263
202k
                      bool AddTo = true) {
264
202k
      SDValue To[] = { Res0, Res1 };
265
202k
      return CombineTo(N, To, 2, AddTo);
266
202k
    }
267
268
    void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
269
270
  private:
271
    unsigned MaximumLegalStoreInBits;
272
273
    /// Check the specified integer node value to see if it can be simplified or
274
    /// if things it uses can be simplified by bit propagation.
275
    /// If so, return true.
276
5.67M
    bool SimplifyDemandedBits(SDValue Op) {
277
5.67M
      unsigned BitWidth = Op.getScalarValueSizeInBits();
278
5.67M
      APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
279
5.67M
      return SimplifyDemandedBits(Op, DemandedBits);
280
5.67M
    }
281
282
5.97M
    bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
283
5.97M
      EVT VT = Op.getValueType();
284
5.97M
      unsigned NumElts = VT.isVector() ? 
VT.getVectorNumElements()341k
:
15.63M
;
285
5.97M
      APInt DemandedElts = APInt::getAllOnesValue(NumElts);
286
5.97M
      return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
287
5.97M
    }
288
289
    /// Check the specified vector node value to see if it can be simplified or
290
    /// if things it uses can be simplified as it only uses some of the
291
    /// elements. If so, return true.
292
329k
    bool SimplifyDemandedVectorElts(SDValue Op) {
293
329k
      unsigned NumElts = Op.getValueType().getVectorNumElements();
294
329k
      APInt DemandedElts = APInt::getAllOnesValue(NumElts);
295
329k
      return SimplifyDemandedVectorElts(Op, DemandedElts);
296
329k
    }
297
298
    bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
299
                              const APInt &DemandedElts);
300
    bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
301
                                    bool AssumeSingleUse = false);
302
303
    bool CombineToPreIndexedLoadStore(SDNode *N);
304
    bool CombineToPostIndexedLoadStore(SDNode *N);
305
    SDValue SplitIndexingFromLoad(LoadSDNode *LD);
306
    bool SliceUpLoad(SDNode *N);
307
308
    // Scalars have size 0 to distinguish from singleton vectors.
309
    SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
310
    bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
311
    bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
312
313
    /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
314
    ///   load.
315
    ///
316
    /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
317
    /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
318
    /// \param EltNo index of the vector element to load.
319
    /// \param OriginalLoad load that EVE came from to be replaced.
320
    /// \returns EVE on success SDValue() on failure.
321
    SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
322
                                         SDValue EltNo,
323
                                         LoadSDNode *OriginalLoad);
324
    void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
325
    SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
326
    SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
327
    SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
328
    SDValue PromoteIntBinOp(SDValue Op);
329
    SDValue PromoteIntShiftOp(SDValue Op);
330
    SDValue PromoteExtend(SDValue Op);
331
    bool PromoteLoad(SDValue Op);
332
333
    /// Call the node-specific routine that knows how to fold each
334
    /// particular type of node. If that doesn't do anything, try the
335
    /// target-specific DAG combines.
336
    SDValue combine(SDNode *N);
337
338
    // Visitation implementation - Implement dag node combining for different
339
    // node types.  The semantics are as follows:
340
    // Return Value:
341
    //   SDValue.getNode() == 0 - No change was made
342
    //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
343
    //   otherwise              - N should be replaced by the returned Operand.
344
    //
345
    SDValue visitTokenFactor(SDNode *N);
346
    SDValue visitMERGE_VALUES(SDNode *N);
347
    SDValue visitADD(SDNode *N);
348
    SDValue visitADDLike(SDNode *N);
349
    SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
350
    SDValue visitSUB(SDNode *N);
351
    SDValue visitADDSAT(SDNode *N);
352
    SDValue visitSUBSAT(SDNode *N);
353
    SDValue visitADDC(SDNode *N);
354
    SDValue visitADDO(SDNode *N);
355
    SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
356
    SDValue visitSUBC(SDNode *N);
357
    SDValue visitSUBO(SDNode *N);
358
    SDValue visitADDE(SDNode *N);
359
    SDValue visitADDCARRY(SDNode *N);
360
    SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
361
    SDValue visitSUBE(SDNode *N);
362
    SDValue visitSUBCARRY(SDNode *N);
363
    SDValue visitMUL(SDNode *N);
364
    SDValue useDivRem(SDNode *N);
365
    SDValue visitSDIV(SDNode *N);
366
    SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
367
    SDValue visitUDIV(SDNode *N);
368
    SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
369
    SDValue visitREM(SDNode *N);
370
    SDValue visitMULHU(SDNode *N);
371
    SDValue visitMULHS(SDNode *N);
372
    SDValue visitSMUL_LOHI(SDNode *N);
373
    SDValue visitUMUL_LOHI(SDNode *N);
374
    SDValue visitMULO(SDNode *N);
375
    SDValue visitIMINMAX(SDNode *N);
376
    SDValue visitAND(SDNode *N);
377
    SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
378
    SDValue visitOR(SDNode *N);
379
    SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
380
    SDValue visitXOR(SDNode *N);
381
    SDValue SimplifyVBinOp(SDNode *N);
382
    SDValue visitSHL(SDNode *N);
383
    SDValue visitSRA(SDNode *N);
384
    SDValue visitSRL(SDNode *N);
385
    SDValue visitFunnelShift(SDNode *N);
386
    SDValue visitRotate(SDNode *N);
387
    SDValue visitABS(SDNode *N);
388
    SDValue visitBSWAP(SDNode *N);
389
    SDValue visitBITREVERSE(SDNode *N);
390
    SDValue visitCTLZ(SDNode *N);
391
    SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
392
    SDValue visitCTTZ(SDNode *N);
393
    SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
394
    SDValue visitCTPOP(SDNode *N);
395
    SDValue visitSELECT(SDNode *N);
396
    SDValue visitVSELECT(SDNode *N);
397
    SDValue visitSELECT_CC(SDNode *N);
398
    SDValue visitSETCC(SDNode *N);
399
    SDValue visitSETCCCARRY(SDNode *N);
400
    SDValue visitSIGN_EXTEND(SDNode *N);
401
    SDValue visitZERO_EXTEND(SDNode *N);
402
    SDValue visitANY_EXTEND(SDNode *N);
403
    SDValue visitAssertExt(SDNode *N);
404
    SDValue visitSIGN_EXTEND_INREG(SDNode *N);
405
    SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
406
    SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
407
    SDValue visitTRUNCATE(SDNode *N);
408
    SDValue visitBITCAST(SDNode *N);
409
    SDValue visitBUILD_PAIR(SDNode *N);
410
    SDValue visitFADD(SDNode *N);
411
    SDValue visitFSUB(SDNode *N);
412
    SDValue visitFMUL(SDNode *N);
413
    SDValue visitFMA(SDNode *N);
414
    SDValue visitFDIV(SDNode *N);
415
    SDValue visitFREM(SDNode *N);
416
    SDValue visitFSQRT(SDNode *N);
417
    SDValue visitFCOPYSIGN(SDNode *N);
418
    SDValue visitFPOW(SDNode *N);
419
    SDValue visitSINT_TO_FP(SDNode *N);
420
    SDValue visitUINT_TO_FP(SDNode *N);
421
    SDValue visitFP_TO_SINT(SDNode *N);
422
    SDValue visitFP_TO_UINT(SDNode *N);
423
    SDValue visitFP_ROUND(SDNode *N);
424
    SDValue visitFP_ROUND_INREG(SDNode *N);
425
    SDValue visitFP_EXTEND(SDNode *N);
426
    SDValue visitFNEG(SDNode *N);
427
    SDValue visitFABS(SDNode *N);
428
    SDValue visitFCEIL(SDNode *N);
429
    SDValue visitFTRUNC(SDNode *N);
430
    SDValue visitFFLOOR(SDNode *N);
431
    SDValue visitFMINNUM(SDNode *N);
432
    SDValue visitFMAXNUM(SDNode *N);
433
    SDValue visitFMINIMUM(SDNode *N);
434
    SDValue visitFMAXIMUM(SDNode *N);
435
    SDValue visitBRCOND(SDNode *N);
436
    SDValue visitBR_CC(SDNode *N);
437
    SDValue visitLOAD(SDNode *N);
438
439
    SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
440
    SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
441
442
    SDValue visitSTORE(SDNode *N);
443
    SDValue visitLIFETIME_END(SDNode *N);
444
    SDValue visitINSERT_VECTOR_ELT(SDNode *N);
445
    SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
446
    SDValue visitBUILD_VECTOR(SDNode *N);
447
    SDValue visitCONCAT_VECTORS(SDNode *N);
448
    SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
449
    SDValue visitVECTOR_SHUFFLE(SDNode *N);
450
    SDValue visitSCALAR_TO_VECTOR(SDNode *N);
451
    SDValue visitINSERT_SUBVECTOR(SDNode *N);
452
    SDValue visitMLOAD(SDNode *N);
453
    SDValue visitMSTORE(SDNode *N);
454
    SDValue visitMGATHER(SDNode *N);
455
    SDValue visitMSCATTER(SDNode *N);
456
    SDValue visitFP_TO_FP16(SDNode *N);
457
    SDValue visitFP16_TO_FP(SDNode *N);
458
    SDValue visitVECREDUCE(SDNode *N);
459
460
    SDValue visitFADDForFMACombine(SDNode *N);
461
    SDValue visitFSUBForFMACombine(SDNode *N);
462
    SDValue visitFMULForFMADistributiveCombine(SDNode *N);
463
464
    SDValue XformToShuffleWithZero(SDNode *N);
465
    bool reassociationCanBreakAddressingModePattern(unsigned Opc,
466
                                                    const SDLoc &DL, SDValue N0,
467
                                                    SDValue N1);
468
    SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
469
                                      SDValue N1);
470
    SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
471
                           SDValue N1, SDNodeFlags Flags);
472
473
    SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
474
475
    SDValue foldSelectOfConstants(SDNode *N);
476
    SDValue foldVSelectOfConstants(SDNode *N);
477
    SDValue foldBinOpIntoSelect(SDNode *BO);
478
    bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
479
    SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
480
    SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
481
    SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
482
                             SDValue N2, SDValue N3, ISD::CondCode CC,
483
                             bool NotExtCompare = false);
484
    SDValue convertSelectOfFPConstantsToLoadOffset(
485
        const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
486
        ISD::CondCode CC);
487
    SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
488
                                   SDValue N2, SDValue N3, ISD::CondCode CC);
489
    SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
490
                              const SDLoc &DL);
491
    SDValue unfoldMaskedMerge(SDNode *N);
492
    SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
493
    SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
494
                          const SDLoc &DL, bool foldBooleans);
495
    SDValue rebuildSetCC(SDValue N);
496
497
    bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
498
                           SDValue &CC) const;
499
    bool isOneUseSetCC(SDValue N) const;
500
501
    SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
502
                                         unsigned HiOp);
503
    SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
504
    SDValue CombineExtLoad(SDNode *N);
505
    SDValue CombineZExtLogicopShiftLoad(SDNode *N);
506
    SDValue combineRepeatedFPDivisors(SDNode *N);
507
    SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
508
    SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
509
    SDValue BuildSDIV(SDNode *N);
510
    SDValue BuildSDIVPow2(SDNode *N);
511
    SDValue BuildUDIV(SDNode *N);
512
    SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
513
    SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
514
    SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
515
    SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
516
    SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
517
    SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
518
                                SDNodeFlags Flags, bool Reciprocal);
519
    SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
520
                                SDNodeFlags Flags, bool Reciprocal);
521
    SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
522
                               bool DemandHighBits = true);
523
    SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
524
    SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
525
                              SDValue InnerPos, SDValue InnerNeg,
526
                              unsigned PosOpcode, unsigned NegOpcode,
527
                              const SDLoc &DL);
528
    SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
529
    SDValue MatchLoadCombine(SDNode *N);
530
    SDValue MatchStoreCombine(StoreSDNode *N);
531
    SDValue ReduceLoadWidth(SDNode *N);
532
    SDValue ReduceLoadOpStoreWidth(SDNode *N);
533
    SDValue splitMergedValStore(StoreSDNode *ST);
534
    SDValue TransformFPLoadStorePair(SDNode *N);
535
    SDValue convertBuildVecZextToZext(SDNode *N);
536
    SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
537
    SDValue reduceBuildVecToShuffle(SDNode *N);
538
    SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
539
                                  ArrayRef<int> VectorMask, SDValue VecIn1,
540
                                  SDValue VecIn2, unsigned LeftIdx,
541
                                  bool DidSplitVec);
542
    SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
543
544
    /// Walk up chain skipping non-aliasing memory nodes,
545
    /// looking for aliasing nodes and adding them to the Aliases vector.
546
    void GatherAllAliases(SDNode *N, SDValue OriginalChain,
547
                          SmallVectorImpl<SDValue> &Aliases);
548
549
    /// Return true if there is any possibility that the two addresses overlap.
550
    bool isAlias(SDNode *Op0, SDNode *Op1) const;
551
552
    /// Walk up chain skipping non-aliasing memory nodes, looking for a better
553
    /// chain (aliasing node.)
554
    SDValue FindBetterChain(SDNode *N, SDValue Chain);
555
556
    /// Try to replace a store and any possibly adjacent stores on
557
    /// consecutive chains with better chains. Return true only if St is
558
    /// replaced.
559
    ///
560
    /// Notice that other chains may still be replaced even if the function
561
    /// returns false.
562
    bool findBetterNeighborChains(StoreSDNode *St);
563
564
    // Helper for findBetterNeighborChains. Walk up store chain add additional
565
    // chained stores that do not overlap and can be parallelized.
566
    bool parallelizeChainedStores(StoreSDNode *St);
567
568
    /// Holds a pointer to an LSBaseSDNode as well as information on where it
569
    /// is located in a sequence of memory operations connected by a chain.
570
    struct MemOpLink {
571
      // Ptr to the mem node.
572
      LSBaseSDNode *MemNode;
573
574
      // Offset from the base ptr.
575
      int64_t OffsetFromBase;
576
577
      MemOpLink(LSBaseSDNode *N, int64_t Offset)
578
1.85M
          : MemNode(N), OffsetFromBase(Offset) {}
579
    };
580
581
    /// This is a helper function for visitMUL to check the profitability
582
    /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
583
    /// MulNode is the original multiply, AddNode is (add x, c1),
584
    /// and ConstNode is c2.
585
    bool isMulAddWithConstProfitable(SDNode *MulNode,
586
                                     SDValue &AddNode,
587
                                     SDValue &ConstNode);
588
589
    /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
590
    /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
591
    /// the type of the loaded value to be extended.
592
    bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
593
                          EVT LoadResultTy, EVT &ExtVT);
594
595
    /// Helper function to calculate whether the given Load/Store can have its
596
    /// width reduced to ExtVT.
597
    bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
598
                           EVT &MemVT, unsigned ShAmt = 0);
599
600
    /// Used by BackwardsPropagateMask to find suitable loads.
601
    bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
602
                           SmallPtrSetImpl<SDNode*> &NodesWithConsts,
603
                           ConstantSDNode *Mask, SDNode *&NodeToMask);
604
    /// Attempt to propagate a given AND node back to load leaves so that they
605
    /// can be combined into narrow loads.
606
    bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
607
608
    /// Helper function for MergeConsecutiveStores which merges the
609
    /// component store chains.
610
    SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
611
                                unsigned NumStores);
612
613
    /// This is a helper function for MergeConsecutiveStores. When the
614
    /// source elements of the consecutive stores are all constants or
615
    /// all extracted vector elements, try to merge them into one
616
    /// larger store introducing bitcasts if necessary.  \return True
617
    /// if a merged store was created.
618
    bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
619
                                         EVT MemVT, unsigned NumStores,
620
                                         bool IsConstantSrc, bool UseVector,
621
                                         bool UseTrunc);
622
623
    /// This is a helper function for MergeConsecutiveStores. Stores
624
    /// that potentially may be merged with St are placed in
625
    /// StoreNodes. RootNode is a chain predecessor to all store
626
    /// candidates.
627
    void getStoreMergeCandidates(StoreSDNode *St,
628
                                 SmallVectorImpl<MemOpLink> &StoreNodes,
629
                                 SDNode *&Root);
630
631
    /// Helper function for MergeConsecutiveStores. Checks if
632
    /// candidate stores have indirect dependency through their
633
    /// operands. RootNode is the predecessor to all stores calculated
634
    /// by getStoreMergeCandidates and is used to prune the dependency check.
635
    /// \return True if safe to merge.
636
    bool checkMergeStoreCandidatesForDependencies(
637
        SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
638
        SDNode *RootNode);
639
640
    /// Merge consecutive store operations into a wide store.
641
    /// This optimization uses wide integers or vectors when possible.
642
    /// \return number of stores that were merged into a merged store (the
643
    /// affected nodes are stored as a prefix in \p StoreNodes).
644
    bool MergeConsecutiveStores(StoreSDNode *St);
645
646
    /// Try to transform a truncation where C is a constant:
647
    ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
648
    ///
649
    /// \p N needs to be a truncation and its first operand an AND. Other
650
    /// requirements are checked by the function (e.g. that trunc is
651
    /// single-use) and if missed an empty SDValue is returned.
652
    SDValue distributeTruncateThroughAnd(SDNode *N);
653
654
    /// Helper function to determine whether the target supports operation
655
    /// given by \p Opcode for type \p VT, that is, whether the operation
656
    /// is legal or custom before legalizing operations, and whether is
657
    /// legal (but not custom) after legalization.
658
613k
    bool hasOperation(unsigned Opcode, EVT VT) {
659
613k
      if (LegalOperations)
660
366k
        return TLI.isOperationLegal(Opcode, VT);
661
246k
      return TLI.isOperationLegalOrCustom(Opcode, VT);
662
246k
    }
663
664
  public:
665
    /// Runs the dag combiner on all nodes in the work list
666
    void Run(CombineLevel AtLevel);
667
668
73.4M
    SelectionDAG &getDAG() const { return DAG; }
669
670
    /// Returns a type large enough to hold any valid shift amount - before type
671
    /// legalization these can be huge.
672
12.8k
    EVT getShiftAmountTy(EVT LHSTy) {
673
12.8k
      assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
674
12.8k
      return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
675
12.8k
    }
676
677
    /// This method returns true if we are running before type legalization or
678
    /// if the specified VT is legal.
679
560k
    bool isTypeLegal(const EVT &VT) {
680
560k
      if (!LegalTypes) 
return true149k
;
681
411k
      return TLI.isTypeLegal(VT);
682
411k
    }
683
684
    /// Convenience wrapper around TargetLowering::getSetCCResultType
685
739k
    EVT getSetCCResultType(EVT VT) const {
686
739k
      return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
687
739k
    }
688
689
    void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
690
                         SDValue OrigLoad, SDValue ExtLoad,
691
                         ISD::NodeType ExtType);
692
  };
693
694
/// This class is a DAGUpdateListener that removes any deleted
695
/// nodes from the worklist.
696
class WorklistRemover : public SelectionDAG::DAGUpdateListener {
697
  DAGCombiner &DC;
698
699
public:
700
  explicit WorklistRemover(DAGCombiner &dc)
701
70.5M
    : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
702
703
23.4k
  void NodeDeleted(SDNode *N, SDNode *E) override {
704
23.4k
    DC.removeFromWorklist(N);
705
23.4k
  }
706
};
707
708
class WorklistInserter : public SelectionDAG::DAGUpdateListener {
709
  DAGCombiner &DC;
710
711
public:
712
  explicit WorklistInserter(DAGCombiner &dc)
713
2.88M
      : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
714
715
  // FIXME: Ideally we could add N to the worklist, but this causes exponential
716
  //        compile time costs in large DAGs, e.g. Halide.
717
4.90M
  void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
718
};
719
720
} // end anonymous namespace
721
722
//===----------------------------------------------------------------------===//
723
//  TargetLowering::DAGCombinerInfo implementation
724
//===----------------------------------------------------------------------===//
725
726
15.8k
void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
727
15.8k
  ((DAGCombiner*)DC)->AddToWorklist(N);
728
15.8k
}
729
730
SDValue TargetLowering::DAGCombinerInfo::
731
1.74k
CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
732
1.74k
  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
733
1.74k
}
734
735
SDValue TargetLowering::DAGCombinerInfo::
736
3.47k
CombineTo(SDNode *N, SDValue Res, bool AddTo) {
737
3.47k
  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
738
3.47k
}
739
740
SDValue TargetLowering::DAGCombinerInfo::
741
8.98k
CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
742
8.98k
  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
743
8.98k
}
744
745
void TargetLowering::DAGCombinerInfo::
746
4.01k
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
747
4.01k
  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
748
4.01k
}
749
750
//===----------------------------------------------------------------------===//
751
// Helper Functions
752
//===----------------------------------------------------------------------===//
753
754
1.08M
void DAGCombiner::deleteAndRecombine(SDNode *N) {
755
1.08M
  removeFromWorklist(N);
756
1.08M
757
1.08M
  // If the operands of this node are only used by the node, they will now be
758
1.08M
  // dead. Make sure to re-visit them and recursively delete dead nodes.
759
1.08M
  for (const SDValue &Op : N->ops())
760
2.76M
    // For an operand generating multiple values, one of the values may
761
2.76M
    // become dead allowing further simplification (e.g. split index
762
2.76M
    // arithmetic from an indexed load).
763
2.76M
    if (Op->hasOneUse() || 
Op->getNumValues() > 12.28M
)
764
982k
      AddToWorklist(Op.getNode());
765
1.08M
766
1.08M
  DAG.DeleteNode(N);
767
1.08M
}
768
769
/// Return 1 if we can compute the negated form of the specified expression for
770
/// the same cost as the expression itself, or 2 if we can compute the negated
771
/// form more cheaply than the expression itself.
772
static char isNegatibleForFree(SDValue Op, bool LegalOperations,
773
                               const TargetLowering &TLI,
774
                               const TargetOptions *Options,
775
                               bool ForCodeSize,
776
448k
                               unsigned Depth = 0) {
777
448k
  // fneg is removable even if it has multiple uses.
778
448k
  if (Op.getOpcode() == ISD::FNEG)
779
1.01k
    return 2;
780
447k
781
447k
  // Don't allow anything with multiple uses unless we know it is free.
782
447k
  EVT VT = Op.getValueType();
783
447k
  const SDNodeFlags Flags = Op->getFlags();
784
447k
  if (!Op.hasOneUse() &&
785
447k
      
!(170k
Op.getOpcode() == ISD::FP_EXTEND170k
&&
786
170k
        
TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())591
))
787
170k
    return 0;
788
276k
789
276k
  // Don't recurse exponentially.
790
276k
  if (Depth > 6)
791
2.99k
    return 0;
792
273k
793
273k
  switch (Op.getOpcode()) {
794
273k
  
default: return false156k
;
795
273k
  case ISD::ConstantFP: {
796
13.6k
    if (!LegalOperations)
797
10.5k
      return 1;
798
3.03k
799
3.03k
    // Don't invert constant FP values after legalization unless the target says
800
3.03k
    // the negated constant is legal.
801
3.03k
    return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
802
3.03k
           TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
803
2.12k
                            ForCodeSize);
804
3.03k
  }
805
3.03k
  case ISD::BUILD_VECTOR: {
806
2.93k
    // Only permit BUILD_VECTOR of constants.
807
10.5k
    if (
llvm::any_of(Op->op_values(), [&](SDValue N) 2.93k
{
808
10.5k
          return !N.isUndef() && 
!isa<ConstantFPSDNode>(N)10.3k
;
809
10.5k
        }))
810
1.07k
      return 0;
811
1.86k
    if (!LegalOperations)
812
1.57k
      return 1;
813
287
    if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&
814
287
        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
815
0
      return 1;
816
287
    return llvm::all_of(Op->op_values(), [&](SDValue N) {
817
287
      return N.isUndef() ||
818
287
             TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
819
287
                              ForCodeSize);
820
287
    });
821
287
  }
822
28.9k
  case ISD::FADD:
823
28.9k
    if (!Options->UnsafeFPMath && 
!Flags.hasNoSignedZeros()27.5k
)
824
27.2k
      return 0;
825
1.75k
826
1.75k
    // After operation legalization, it might not be legal to create new FSUBs.
827
1.75k
    if (LegalOperations && 
!TLI.isOperationLegalOrCustom(ISD::FSUB, VT)670
)
828
0
      return 0;
829
1.75k
830
1.75k
    // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
831
1.75k
    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
832
22
                                    Options, ForCodeSize, Depth + 1))
833
22
      return V;
834
1.73k
    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
835
1.73k
    return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
836
1.73k
                              ForCodeSize, Depth + 1);
837
4.78k
  case ISD::FSUB:
838
4.78k
    // We can't turn -(A-B) into B-A when we honor signed zeros.
839
4.78k
    if (!Options->NoSignedZerosFPMath && 
!Flags.hasNoSignedZeros()4.74k
)
840
3.94k
      return 0;
841
841
842
841
    // fold (fneg (fsub A, B)) -> (fsub B, A)
843
841
    return 1;
844
841
845
63.3k
  case ISD::FMUL:
846
63.3k
  case ISD::FDIV:
847
63.3k
    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
848
63.3k
    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
849
770
                                    Options, ForCodeSize, Depth + 1))
850
770
      return V;
851
62.5k
852
62.5k
    return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
853
62.5k
                              ForCodeSize, Depth + 1);
854
62.5k
855
62.5k
  case ISD::FP_EXTEND:
856
4.16k
  case ISD::FP_ROUND:
857
4.16k
  case ISD::FSIN:
858
4.16k
    return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
859
4.16k
                              ForCodeSize, Depth + 1);
860
273k
  }
861
273k
}
862
863
/// If isNegatibleForFree returns true, return the newly negated expression.
864
static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
865
                                    bool LegalOperations, bool ForCodeSize,
866
1.43k
                                    unsigned Depth = 0) {
867
1.43k
  // fneg is removable even if it has multiple uses.
868
1.43k
  if (Op.getOpcode() == ISD::FNEG)
869
550
    return Op.getOperand(0);
870
886
871
886
  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
872
886
  const TargetOptions &Options = DAG.getTarget().Options;
873
886
  const SDNodeFlags Flags = Op->getFlags();
874
886
875
886
  switch (Op.getOpcode()) {
876
886
  
default: 0
llvm_unreachable0
("Unknown code");
877
886
  case ISD::ConstantFP: {
878
289
    APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
879
289
    V.changeSign();
880
289
    return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
881
886
  }
882
886
  case ISD::BUILD_VECTOR: {
883
90
    SmallVector<SDValue, 4> Ops;
884
454
    for (SDValue C : Op->op_values()) {
885
454
      if (C.isUndef()) {
886
8
        Ops.push_back(C);
887
8
        continue;
888
8
      }
889
446
      APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
890
446
      V.changeSign();
891
446
      Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
892
446
    }
893
90
    return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
894
886
  }
895
886
  case ISD::FADD:
896
20
    assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
897
20
898
20
    // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
899
20
    if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
900
20
                           DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
901
20
                           Depth + 1))
902
19
      return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
903
19
                         GetNegatedExpression(Op.getOperand(0), DAG,
904
19
                                              LegalOperations, ForCodeSize,
905
19
                                              Depth + 1),
906
19
                         Op.getOperand(1), Flags);
907
1
    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
908
1
    return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
909
1
                       GetNegatedExpression(Op.getOperand(1), DAG,
910
1
                                            LegalOperations, ForCodeSize,
911
1
                                            Depth + 1),
912
1
                       Op.getOperand(0), Flags);
913
19
  case ISD::FSUB:
914
19
    // fold (fneg (fsub 0, B)) -> B
915
19
    if (ConstantFPSDNode *N0CFP =
916
7
            isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
917
7
      if (N0CFP->isZero())
918
6
        return Op.getOperand(1);
919
13
920
13
    // fold (fneg (fsub A, B)) -> (fsub B, A)
921
13
    return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
922
13
                       Op.getOperand(1), Op.getOperand(0), Flags);
923
13
924
423
  case ISD::FMUL:
925
423
  case ISD::FDIV:
926
423
    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
927
423
    if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
928
423
                           DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
929
423
                           Depth + 1))
930
121
      return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
931
121
                         GetNegatedExpression(Op.getOperand(0), DAG,
932
121
                                              LegalOperations, ForCodeSize,
933
121
                                              Depth + 1),
934
121
                         Op.getOperand(1), Flags);
935
302
936
302
    // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
937
302
    return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
938
302
                       Op.getOperand(0),
939
302
                       GetNegatedExpression(Op.getOperand(1), DAG,
940
302
                                            LegalOperations, ForCodeSize,
941
302
                                            Depth + 1), Flags);
942
302
943
302
  case ISD::FP_EXTEND:
944
21
  case ISD::FSIN:
945
21
    return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
946
21
                       GetNegatedExpression(Op.getOperand(0), DAG,
947
21
                                            LegalOperations, ForCodeSize,
948
21
                                            Depth + 1));
949
24
  case ISD::FP_ROUND:
950
24
    return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
951
24
                       GetNegatedExpression(Op.getOperand(0), DAG,
952
24
                                            LegalOperations, ForCodeSize,
953
24
                                            Depth + 1),
954
24
                       Op.getOperand(1));
955
886
  }
956
886
}
957
958
// APInts must be the same size for most operations, this helper
959
// function zero extends the shorter of the pair so that they match.
960
// We provide an Offset so that we can create bitwidths that won't overflow.
961
10.9k
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
962
10.9k
  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
963
10.9k
  LHS = LHS.zextOrSelf(Bits);
964
10.9k
  RHS = RHS.zextOrSelf(Bits);
965
10.9k
}
966
967
// Return true if this node is a setcc, or is a select_cc
968
// that selects between the target values used for true and false, making it
969
// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
970
// the appropriate nodes based on the type of node we are checking. This
971
// simplifies life a bit for the callers.
972
bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
973
1.49M
                                    SDValue &CC) const {
974
1.49M
  if (N.getOpcode() == ISD::SETCC) {
975
498k
    LHS = N.getOperand(0);
976
498k
    RHS = N.getOperand(1);
977
498k
    CC  = N.getOperand(2);
978
498k
    return true;
979
498k
  }
980
996k
981
996k
  if (N.getOpcode() != ISD::SELECT_CC ||
982
996k
      
!TLI.isConstTrueVal(N.getOperand(2).getNode())2.10k
||
983
996k
      
!TLI.isConstFalseVal(N.getOperand(3).getNode())263
)
984
996k
    return false;
985
259
986
259
  if (TLI.getBooleanContents(N.getValueType()) ==
987
259
      TargetLowering::UndefinedBooleanContent)
988
0
    return false;
989
259
990
259
  LHS = N.getOperand(0);
991
259
  RHS = N.getOperand(1);
992
259
  CC  = N.getOperand(4);
993
259
  return true;
994
259
}
995
996
/// Return true if this is a SetCC-equivalent operation with only one use.
997
/// If this is true, it allows the users to invert the operation for free when
998
/// it is profitable to do so.
999
5.77k
bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1000
5.77k
  SDValue N0, N1, N2;
1001
5.77k
  if (isSetCCEquivalent(N, N0, N1, N2) && 
N.getNode()->hasOneUse()5.70k
)
1002
5.70k
    return true;
1003
72
  return false;
1004
72
}
1005
1006
// Returns the SDNode if it is a constant float BuildVector
1007
// or constant float.
1008
325k
static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
1009
325k
  if (isa<ConstantFPSDNode>(N))
1010
12.0k
    return N.getNode();
1011
313k
  if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
1012
17.6k
    return N.getNode();
1013
295k
  return nullptr;
1014
295k
}
1015
1016
// Determines if it is a constant integer or a build vector of constant
1017
// integers (and undefs).
1018
// Do not permit build vector implicit truncation.
1019
4.45M
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1020
4.45M
  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1021
3.47M
    return !(Const->isOpaque() && 
NoOpaques3.00k
);
1022
987k
  if (N.getOpcode() != ISD::BUILD_VECTOR)
1023
912k
    return false;
1024
75.6k
  unsigned BitWidth = N.getScalarValueSizeInBits();
1025
330k
  for (const SDValue &Op : N->op_values()) {
1026
330k
    if (Op.isUndef())
1027
5.82k
      continue;
1028
324k
    ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1029
324k
    if (!Const || 
Const->getAPIntValue().getBitWidth() != BitWidth322k
||
1030
324k
        
(321k
Const->isOpaque()321k
&&
NoOpaques0
))
1031
2.90k
      return false;
1032
324k
  }
1033
75.6k
  
return true72.7k
;
1034
75.6k
}
1035
1036
// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1037
// undef's.
1038
58.7k
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1039
58.7k
  if (V.getOpcode() != ISD::BUILD_VECTOR)
1040
51.6k
    return false;
1041
7.12k
  return isConstantOrConstantVector(V, NoOpaques) ||
1042
7.12k
         
ISD::isBuildVectorOfConstantFPSDNodes(V.getNode())1.03k
;
1043
7.12k
}
1044
1045
bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1046
                                                             const SDLoc &DL,
1047
                                                             SDValue N0,
1048
4.14M
                                                             SDValue N1) {
1049
4.14M
  // Currently this only tries to ensure we don't undo the GEP splits done by
1050
4.14M
  // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1051
4.14M
  // we check if the following transformation would be problematic:
1052
4.14M
  // (load/store (add, (add, x, offset1), offset2)) ->
1053
4.14M
  // (load/store (add, x, offset1+offset2)).
1054
4.14M
1055
4.14M
  if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1056
3.34M
    return false;
1057
795k
1058
795k
  if (N0.hasOneUse())
1059
201k
    return false;
1060
593k
1061
593k
  auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1062
593k
  auto *C2 = dyn_cast<ConstantSDNode>(N1);
1063
593k
  if (!C1 || 
!C2458k
)
1064
146k
    return false;
1065
446k
1066
446k
  const APInt &C1APIntVal = C1->getAPIntValue();
1067
446k
  const APInt &C2APIntVal = C2->getAPIntValue();
1068
446k
  if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1069
0
    return false;
1070
446k
1071
446k
  const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1072
446k
  if (CombinedValueIntVal.getBitWidth() > 64)
1073
0
    return false;
1074
446k
  const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1075
446k
1076
1.39M
  for (SDNode *Node : N0->uses()) {
1077
1.39M
    auto LoadStore = dyn_cast<MemSDNode>(Node);
1078
1.39M
    if (LoadStore) {
1079
460k
      // Is x[offset2] already not a legal addressing mode? If so then
1080
460k
      // reassociating the constants breaks nothing (we test offset2 because
1081
460k
      // that's the one we hope to fold into the load or store).
1082
460k
      TargetLoweringBase::AddrMode AM;
1083
460k
      AM.HasBaseReg = true;
1084
460k
      AM.BaseOffs = C2APIntVal.getSExtValue();
1085
460k
      EVT VT = LoadStore->getMemoryVT();
1086
460k
      unsigned AS = LoadStore->getAddressSpace();
1087
460k
      Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1088
460k
      if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1089
9.75k
        continue;
1090
451k
1091
451k
      // Would x[offset1+offset2] still be a legal addressing mode?
1092
451k
      AM.BaseOffs = CombinedValue;
1093
451k
      if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1094
3.87k
        return true;
1095
451k
    }
1096
1.39M
  }
1097
446k
1098
446k
  
return false442k
;
1099
446k
}
1100
1101
// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1102
// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1103
SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1104
10.8M
                                               SDValue N0, SDValue N1) {
1105
10.8M
  EVT VT = N0.getValueType();
1106
10.8M
1107
10.8M
  if (N0.getOpcode() != Opc)
1108
9.96M
    return SDValue();
1109
860k
1110
860k
  // Don't reassociate reductions.
1111
860k
  if (N0->getFlags().hasVectorReduction())
1112
393
    return SDValue();
1113
859k
1114
859k
  if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1115
537k
    if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1116
479k
      // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1117
479k
      if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
1118
479k
        return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1119
526
      return SDValue();
1120
526
    }
1121
57.5k
    if (N0.hasOneUse()) {
1122
41.0k
      // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1123
41.0k
      //              iff (op x, c1) has one use
1124
41.0k
      SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1125
41.0k
      if (!OpNode.getNode())
1126
0
        return SDValue();
1127
41.0k
      AddToWorklist(OpNode.getNode());
1128
41.0k
      return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1129
41.0k
    }
1130
57.5k
  }
1131
339k
  return SDValue();
1132
339k
}
1133
1134
// Try to reassociate commutative binops.
1135
SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1136
5.67M
                                    SDValue N1, SDNodeFlags Flags) {
1137
5.67M
  assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1138
5.67M
  // Don't reassociate reductions.
1139
5.67M
  if (Flags.hasVectorReduction())
1140
3.59k
    return SDValue();
1141
5.66M
1142
5.66M
  // Floating-point reassociation is not allowed without loose FP math.
1143
5.66M
  if (N0.getValueType().isFloatingPoint() ||
1144
5.66M
      N1.getValueType().isFloatingPoint())
1145
0
    if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1146
0
      return SDValue();
1147
5.66M
1148
5.66M
  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1149
506k
    return Combined;
1150
5.16M
  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1151
13.5k
    return Combined;
1152
5.14M
  return SDValue();
1153
5.14M
}
1154
1155
SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1156
578k
                               bool AddTo) {
1157
578k
  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1158
578k
  ++NodesCombined;
1159
578k
  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1160
578k
             To[0].getNode()->dump(&DAG);
1161
578k
             dbgs() << " and " << NumTo - 1 << " other values\n");
1162
1.36M
  for (unsigned i = 0, e = NumTo; i != e; 
++i782k
)
1163
578k
    assert((!To[i].getNode() ||
1164
578k
            N->getValueType(i) == To[i].getValueType()) &&
1165
578k
           "Cannot combine value to value of different type!");
1166
578k
1167
578k
  WorklistRemover DeadNodes(*this);
1168
578k
  DAG.ReplaceAllUsesWith(N, To);
1169
578k
  if (AddTo) {
1170
474k
    // Push the new nodes and any users onto the worklist
1171
1.15M
    for (unsigned i = 0, e = NumTo; i != e; 
++i678k
) {
1172
678k
      if (To[i].getNode()) {
1173
678k
        AddToWorklist(To[i].getNode());
1174
678k
        AddUsersToWorklist(To[i].getNode());
1175
678k
      }
1176
678k
    }
1177
474k
  }
1178
578k
1179
578k
  // Finally, if the node is now dead, remove it from the graph.  The node
1180
578k
  // may not be dead if the replacement process recursively simplified to
1181
578k
  // something else needing this node.
1182
578k
  if (N->use_empty())
1183
578k
    deleteAndRecombine(N);
1184
578k
  return SDValue(N, 0);
1185
578k
}
1186
1187
void DAGCombiner::
1188
356k
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1189
356k
  // Replace all uses.  If any nodes become isomorphic to other nodes and
1190
356k
  // are deleted, make sure to remove them from our worklist.
1191
356k
  WorklistRemover DeadNodes(*this);
1192
356k
  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1193
356k
1194
356k
  // Push the new node and any (possibly new) users onto the worklist.
1195
356k
  AddToWorklist(TLO.New.getNode());
1196
356k
  AddUsersToWorklist(TLO.New.getNode());
1197
356k
1198
356k
  // Finally, if the node is now dead, remove it from the graph.  The node
1199
356k
  // may not be dead if the replacement process recursively simplified to
1200
356k
  // something else needing this node.
1201
356k
  if (TLO.Old.getNode()->use_empty())
1202
354k
    deleteAndRecombine(TLO.Old.getNode());
1203
356k
}
1204
1205
/// Check the specified integer node value to see if it can be simplified or if
1206
/// things it uses can be simplified by bit propagation. If so, return true.
1207
bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1208
5.97M
                                       const APInt &DemandedElts) {
1209
5.97M
  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1210
5.97M
  KnownBits Known;
1211
5.97M
  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
1212
5.62M
    return false;
1213
346k
1214
346k
  // Revisit the node.
1215
346k
  AddToWorklist(Op.getNode());
1216
346k
1217
346k
  // Replace the old value with the new one.
1218
346k
  ++NodesCombined;
1219
346k
  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1220
346k
             dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1221
346k
             dbgs() << '\n');
1222
346k
1223
346k
  CommitTargetLoweringOpt(TLO);
1224
346k
  return true;
1225
346k
}
1226
1227
/// Check the specified vector node value to see if it can be simplified or
1228
/// if things it uses can be simplified as it only uses some of the elements.
1229
/// If so, return true.
1230
bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1231
                                             const APInt &DemandedElts,
1232
773k
                                             bool AssumeSingleUse) {
1233
773k
  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1234
773k
  APInt KnownUndef, KnownZero;
1235
773k
  if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1236
773k
                                      TLO, 0, AssumeSingleUse))
1237
767k
    return false;
1238
6.53k
1239
6.53k
  // Revisit the node.
1240
6.53k
  AddToWorklist(Op.getNode());
1241
6.53k
1242
6.53k
  // Replace the old value with the new one.
1243
6.53k
  ++NodesCombined;
1244
6.53k
  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1245
6.53k
             dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1246
6.53k
             dbgs() << '\n');
1247
6.53k
1248
6.53k
  CommitTargetLoweringOpt(TLO);
1249
6.53k
  return true;
1250
6.53k
}
1251
1252
349
void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1253
349
  SDLoc DL(Load);
1254
349
  EVT VT = Load->getValueType(0);
1255
349
  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1256
349
1257
349
  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1258
349
             Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1259
349
  WorklistRemover DeadNodes(*this);
1260
349
  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1261
349
  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1262
349
  deleteAndRecombine(Load);
1263
349
  AddToWorklist(Trunc.getNode());
1264
349
}
1265
1266
6.38k
SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1267
6.38k
  Replace = false;
1268
6.38k
  SDLoc DL(Op);
1269
6.38k
  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1270
481
    LoadSDNode *LD = cast<LoadSDNode>(Op);
1271
481
    EVT MemVT = LD->getMemoryVT();
1272
481
    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? 
ISD::EXTLOAD457
1273
481
                                                      : 
LD->getExtensionType()24
;
1274
481
    Replace = true;
1275
481
    return DAG.getExtLoad(ExtType, DL, PVT,
1276
481
                          LD->getChain(), LD->getBasePtr(),
1277
481
                          MemVT, LD->getMemOperand());
1278
481
  }
1279
5.90k
1280
5.90k
  unsigned Opc = Op.getOpcode();
1281
5.90k
  switch (Opc) {
1282
5.90k
  
default: break4.41k
;
1283
5.90k
  case ISD::AssertSext:
1284
0
    if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1285
0
      return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1286
0
    break;
1287
34
  case ISD::AssertZext:
1288
34
    if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1289
34
      return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1290
0
    break;
1291
1.46k
  case ISD::Constant: {
1292
1.46k
    unsigned ExtOpc =
1293
1.46k
      Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : 
ISD::ZERO_EXTEND0
;
1294
1.46k
    return DAG.getNode(ExtOpc, DL, PVT, Op);
1295
4.41k
  }
1296
4.41k
  }
1297
4.41k
1298
4.41k
  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1299
0
    return SDValue();
1300
4.41k
  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1301
4.41k
}
1302
1303
37
SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1304
37
  if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1305
0
    return SDValue();
1306
37
  EVT OldVT = Op.getValueType();
1307
37
  SDLoc DL(Op);
1308
37
  bool Replace = false;
1309
37
  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1310
37
  if (!NewOp.getNode())
1311
0
    return SDValue();
1312
37
  AddToWorklist(NewOp.getNode());
1313
37
1314
37
  if (Replace)
1315
9
    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1316
37
  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1317
37
                     DAG.getValueType(OldVT));
1318
37
}
1319
1320
1.02k
SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1321
1.02k
  EVT OldVT = Op.getValueType();
1322
1.02k
  SDLoc DL(Op);
1323
1.02k
  bool Replace = false;
1324
1.02k
  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1325
1.02k
  if (!NewOp.getNode())
1326
0
    return SDValue();
1327
1.02k
  AddToWorklist(NewOp.getNode());
1328
1.02k
1329
1.02k
  if (Replace)
1330
89
    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1331
1.02k
  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1332
1.02k
}
1333
1334
/// Promote the specified integer binary operation if the target indicates it is
1335
/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1336
/// i32 since i16 instructions are longer.
1337
4.52M
SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1338
4.52M
  if (!LegalOperations)
1339
2.55M
    return SDValue();
1340
1.97M
1341
1.97M
  EVT VT = Op.getValueType();
1342
1.97M
  if (VT.isVector() || 
!VT.isInteger()1.74M
)
1343
223k
    return SDValue();
1344
1.74M
1345
1.74M
  // If operation type is 'undesirable', e.g. i16 on x86, consider
1346
1.74M
  // promoting it.
1347
1.74M
  unsigned Opc = Op.getOpcode();
1348
1.74M
  if (TLI.isTypeDesirableForOp(Opc, VT))
1349
1.74M
    return SDValue();
1350
3.41k
1351
3.41k
  EVT PVT = VT;
1352
3.41k
  // Consult target whether it is a good idea to promote this operation and
1353
3.41k
  // what's the right type to promote it to.
1354
3.41k
  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1355
2.42k
    assert(PVT != VT && "Don't know what type to promote to!");
1356
2.42k
1357
2.42k
    LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1358
2.42k
1359
2.42k
    bool Replace0 = false;
1360
2.42k
    SDValue N0 = Op.getOperand(0);
1361
2.42k
    SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1362
2.42k
1363
2.42k
    bool Replace1 = false;
1364
2.42k
    SDValue N1 = Op.getOperand(1);
1365
2.42k
    SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1366
2.42k
    SDLoc DL(Op);
1367
2.42k
1368
2.42k
    SDValue RV =
1369
2.42k
        DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1370
2.42k
1371
2.42k
    // We are always replacing N0/N1's use in N and only need
1372
2.42k
    // additional replacements if there are additional uses.
1373
2.42k
    Replace0 &= !N0->hasOneUse();
1374
2.42k
    Replace1 &= (N0 != N1) && 
!N1->hasOneUse()2.42k
;
1375
2.42k
1376
2.42k
    // Combine Op here so it is preserved past replacements.
1377
2.42k
    CombineTo(Op.getNode(), RV);
1378
2.42k
1379
2.42k
    // If operands have a use ordering, make sure we deal with
1380
2.42k
    // predecessor first.
1381
2.42k
    if (Replace0 && 
Replace1107
&&
N0.getNode()->isPredecessorOf(N1.getNode())2
) {
1382
0
      std::swap(N0, N1);
1383
0
      std::swap(NN0, NN1);
1384
0
    }
1385
2.42k
1386
2.42k
    if (Replace0) {
1387
107
      AddToWorklist(NN0.getNode());
1388
107
      ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1389
107
    }
1390
2.42k
    if (Replace1) {
1391
89
      AddToWorklist(NN1.getNode());
1392
89
      ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1393
89
    }
1394
2.42k
    return Op;
1395
2.42k
  }
1396
983
  return SDValue();
1397
983
}
1398
1399
/// Promote the specified integer shift operation if the target indicates it is
1400
/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1401
/// i32 since i16 instructions are longer.
1402
591k
SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1403
591k
  if (!LegalOperations)
1404
313k
    return SDValue();
1405
277k
1406
277k
  EVT VT = Op.getValueType();
1407
277k
  if (VT.isVector() || 
!VT.isInteger()271k
)
1408
5.89k
    return SDValue();
1409
271k
1410
271k
  // If operation type is 'undesirable', e.g. i16 on x86, consider
1411
271k
  // promoting it.
1412
271k
  unsigned Opc = Op.getOpcode();
1413
271k
  if (TLI.isTypeDesirableForOp(Opc, VT))
1414
264k
    return SDValue();
1415
6.65k
1416
6.65k
  EVT PVT = VT;
1417
6.65k
  // Consult target whether it is a good idea to promote this operation and
1418
6.65k
  // what's the right type to promote it to.
1419
6.65k
  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1420
1.50k
    assert(PVT != VT && "Don't know what type to promote to!");
1421
1.50k
1422
1.50k
    LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1423
1.50k
1424
1.50k
    bool Replace = false;
1425
1.50k
    SDValue N0 = Op.getOperand(0);
1426
1.50k
    SDValue N1 = Op.getOperand(1);
1427
1.50k
    if (Opc == ISD::SRA)
1428
37
      N0 = SExtPromoteOperand(N0, PVT);
1429
1.46k
    else if (Opc == ISD::SRL)
1430
987
      N0 = ZExtPromoteOperand(N0, PVT);
1431
476
    else
1432
476
      N0 = PromoteOperand(N0, PVT, Replace);
1433
1.50k
1434
1.50k
    if (!N0.getNode())
1435
0
      return SDValue();
1436
1.50k
1437
1.50k
    SDLoc DL(Op);
1438
1.50k
    SDValue RV =
1439
1.50k
        DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1440
1.50k
1441
1.50k
    AddToWorklist(N0.getNode());
1442
1.50k
    if (Replace)
1443
55
      ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1444
1.50k
1445
1.50k
    // Deal with Op being deleted.
1446
1.50k
    if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1447
1.50k
      return RV;
1448
5.15k
  }
1449
5.15k
  return SDValue();
1450
5.15k
}
1451
1452
312k
SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1453
312k
  if (!LegalOperations)
1454
164k
    return SDValue();
1455
148k
1456
148k
  EVT VT = Op.getValueType();
1457
148k
  if (VT.isVector() || 
!VT.isInteger()85.0k
)
1458
63.0k
    return SDValue();
1459
85.0k
1460
85.0k
  // If operation type is 'undesirable', e.g. i16 on x86, consider
1461
85.0k
  // promoting it.
1462
85.0k
  unsigned Opc = Op.getOpcode();
1463
85.0k
  if (TLI.isTypeDesirableForOp(Opc, VT))
1464
84.7k
    return SDValue();
1465
235
1466
235
  EVT PVT = VT;
1467
235
  // Consult target whether it is a good idea to promote this operation and
1468
235
  // what's the right type to promote it to.
1469
235
  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1470
221
    assert(PVT != VT && "Don't know what type to promote to!");
1471
221
    // fold (aext (aext x)) -> (aext x)
1472
221
    // fold (aext (zext x)) -> (zext x)
1473
221
    // fold (aext (sext x)) -> (sext x)
1474
221
    LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1475
221
    return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1476
221
  }
1477
14
  return SDValue();
1478
14
}
1479
1480
2.69M
bool DAGCombiner::PromoteLoad(SDValue Op) {
1481
2.69M
  if (!LegalOperations)
1482
1.72M
    return false;
1483
970k
1484
970k
  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1485
7.46k
    return false;
1486
962k
1487
962k
  EVT VT = Op.getValueType();
1488
962k
  if (VT.isVector() || 
!VT.isInteger()761k
)
1489
254k
    return false;
1490
708k
1491
708k
  // If operation type is 'undesirable', e.g. i16 on x86, consider
1492
708k
  // promoting it.
1493
708k
  unsigned Opc = Op.getOpcode();
1494
708k
  if (TLI.isTypeDesirableForOp(Opc, VT))
1495
705k
    return false;
1496
2.68k
1497
2.68k
  EVT PVT = VT;
1498
2.68k
  // Consult target whether it is a good idea to promote this operation and
1499
2.68k
  // what's the right type to promote it to.
1500
2.68k
  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1501
0
    assert(PVT != VT && "Don't know what type to promote to!");
1502
0
1503
0
    SDLoc DL(Op);
1504
0
    SDNode *N = Op.getNode();
1505
0
    LoadSDNode *LD = cast<LoadSDNode>(N);
1506
0
    EVT MemVT = LD->getMemoryVT();
1507
0
    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1508
0
                                                      : LD->getExtensionType();
1509
0
    SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1510
0
                                   LD->getChain(), LD->getBasePtr(),
1511
0
                                   MemVT, LD->getMemOperand());
1512
0
    SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1513
0
1514
0
    LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1515
0
               Result.getNode()->dump(&DAG); dbgs() << '\n');
1516
0
    WorklistRemover DeadNodes(*this);
1517
0
    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1518
0
    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1519
0
    deleteAndRecombine(N);
1520
0
    AddToWorklist(Result.getNode());
1521
0
    return true;
1522
0
  }
1523
2.68k
  return false;
1524
2.68k
}
1525
1526
/// Recursively delete a node which has no uses and any operands for
1527
/// which it is the only use.
1528
///
1529
/// Note that this both deletes the nodes and removes them from the worklist.
1530
/// It also adds any nodes who have had a user deleted to the worklist as they
1531
/// may now have only one use and subject to other combines.
1532
74.2M
bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1533
74.2M
  if (!N->use_empty())
1534
69.5M
    return false;
1535
4.69M
1536
4.69M
  SmallSetVector<SDNode *, 16> Nodes;
1537
4.69M
  Nodes.insert(N);
1538
17.1M
  do {
1539
17.1M
    N = Nodes.pop_back_val();
1540
17.1M
    if (!N)
1541
0
      continue;
1542
17.1M
1543
17.1M
    if (N->use_empty()) {
1544
8.27M
      for (const SDValue &ChildN : N->op_values())
1545
13.1M
        Nodes.insert(ChildN.getNode());
1546
8.27M
1547
8.27M
      removeFromWorklist(N);
1548
8.27M
      DAG.DeleteNode(N);
1549
8.89M
    } else {
1550
8.89M
      AddToWorklist(N);
1551
8.89M
    }
1552
17.1M
  } while (!Nodes.empty());
1553
4.69M
  return true;
1554
4.69M
}
1555
1556
//===----------------------------------------------------------------------===//
1557
//  Main DAG Combiner implementation
1558
//===----------------------------------------------------------------------===//
1559
1560
2.88M
void DAGCombiner::Run(CombineLevel AtLevel) {
1561
2.88M
  // set the instance variables, so that the various visit routines may use it.
1562
2.88M
  Level = AtLevel;
1563
2.88M
  LegalOperations = Level >= AfterLegalizeVectorOps;
1564
2.88M
  LegalTypes = Level >= AfterLegalizeTypes;
1565
2.88M
1566
2.88M
  WorklistInserter AddNodes(*this);
1567
2.88M
1568
2.88M
  // Add all the dag nodes to the worklist.
1569
2.88M
  for (SDNode &Node : DAG.allnodes())
1570
65.2M
    AddToWorklist(&Node);
1571
2.88M
1572
2.88M
  // Create a dummy node (which is not added to allnodes), that adds a reference
1573
2.88M
  // to the root node, preventing it from being deleted, and tracking any
1574
2.88M
  // changes of the root.
1575
2.88M
  HandleSDNode Dummy(DAG.getRoot());
1576
2.88M
1577
2.88M
  // While we have a valid worklist entry node, try to combine it.
1578
72.3M
  while (SDNode *N = getNextWorklistEntry()) {
1579
69.5M
    // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1580
69.5M
    // N is deleted from the DAG, since they too may now be dead or may have a
1581
69.5M
    // reduced number of uses, allowing other xforms.
1582
69.5M
    if (recursivelyDeleteUnusedNodes(N))
1583
641
      continue;
1584
69.5M
1585
69.5M
    WorklistRemover DeadNodes(*this);
1586
69.5M
1587
69.5M
    // If this combine is running after legalizing the DAG, re-legalize any
1588
69.5M
    // nodes pulled off the worklist.
1589
69.5M
    if (Level == AfterLegalizeDAG) {
1590
27.4M
      SmallSetVector<SDNode *, 16> UpdatedNodes;
1591
27.4M
      bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1592
27.4M
1593
27.4M
      for (SDNode *LN : UpdatedNodes) {
1594
24.9k
        AddToWorklist(LN);
1595
24.9k
        AddUsersToWorklist(LN);
1596
24.9k
      }
1597
27.4M
      if (!NIsValid)
1598
11.9k
        continue;
1599
69.5M
    }
1600
69.5M
1601
69.5M
    LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1602
69.5M
1603
69.5M
    // Add any operands of the new node which have not yet been combined to the
1604
69.5M
    // worklist as well. Because the worklist uniques things already, this
1605
69.5M
    // won't repeatedly process the same operand.
1606
69.5M
    CombinedNodes.insert(N);
1607
69.5M
    for (const SDValue &ChildN : N->op_values())
1608
113M
      if (!CombinedNodes.count(ChildN.getNode()))
1609
102M
        AddToWorklist(ChildN.getNode());
1610
69.5M
1611
69.5M
    SDValue RV = combine(N);
1612
69.5M
1613
69.5M
    if (!RV.getNode())
1614
66.3M
      continue;
1615
3.19M
1616
3.19M
    ++NodesCombined;
1617
3.19M
1618
3.19M
    // If we get back the same node we passed in, rather than a new node or
1619
3.19M
    // zero, we know that the node must have defined multiple values and
1620
3.19M
    // CombineTo was used.  Since CombineTo takes care of the worklist
1621
3.19M
    // mechanics for us, we have no work to do in this case.
1622
3.19M
    if (RV.getNode() == N)
1623
949k
      continue;
1624
2.24M
1625
2.24M
    assert(N->getOpcode() != ISD::DELETED_NODE &&
1626
2.24M
           RV.getOpcode() != ISD::DELETED_NODE &&
1627
2.24M
           "Node was deleted but visit returned new node!");
1628
2.24M
1629
2.24M
    LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1630
2.24M
1631
2.24M
    if (N->getNumValues() == RV.getNode()->getNumValues())
1632
2.17M
      DAG.ReplaceAllUsesWith(N, RV.getNode());
1633
73.0k
    else {
1634
73.0k
      assert(N->getValueType(0) == RV.getValueType() &&
1635
73.0k
             N->getNumValues() == 1 && "Type mismatch");
1636
73.0k
      DAG.ReplaceAllUsesWith(N, &RV);
1637
73.0k
    }
1638
2.24M
1639
2.24M
    // Push the new node and any users onto the worklist
1640
2.24M
    AddToWorklist(RV.getNode());
1641
2.24M
    AddUsersToWorklist(RV.getNode());
1642
2.24M
1643
2.24M
    // Finally, if the node is now dead, remove it from the graph.  The node
1644
2.24M
    // may not be dead if the replacement process recursively simplified to
1645
2.24M
    // something else needing this node. This will also take care of adding any
1646
2.24M
    // operands which have lost a user to the worklist.
1647
2.24M
    recursivelyDeleteUnusedNodes(N);
1648
2.24M
  }
1649
2.88M
1650
2.88M
  // If the root changed (e.g. it was a dead load, update the root).
1651
2.88M
  DAG.setRoot(Dummy.getValue());
1652
2.88M
  DAG.RemoveDeadNodes();
1653
2.88M
}
1654
1655
69.5M
SDValue DAGCombiner::visit(SDNode *N) {
1656
69.5M
  switch (N->getOpcode()) {
1657
69.5M
  
default: break48.1M
;
1658
69.5M
  
case ISD::TokenFactor: return visitTokenFactor(N)1.87M
;
1659
69.5M
  
case ISD::MERGE_VALUES: return visitMERGE_VALUES(N)65.3k
;
1660
69.5M
  
case ISD::ADD: return visitADD(N)4.07M
;
1661
69.5M
  
case ISD::SUB: return visitSUB(N)167k
;
1662
69.5M
  case ISD::SADDSAT:
1663
3.23k
  case ISD::UADDSAT:            return visitADDSAT(N);
1664
3.57k
  case ISD::SSUBSAT:
1665
3.57k
  case ISD::USUBSAT:            return visitSUBSAT(N);
1666
3.57k
  
case ISD::ADDC: return visitADDC(N)625
;
1667
14.2k
  case ISD::SADDO:
1668
14.2k
  case ISD::UADDO:              return visitADDO(N);
1669
14.2k
  
case ISD::SUBC: return visitSUBC(N)128
;
1670
14.2k
  case ISD::SSUBO:
1671
6.09k
  case ISD::USUBO:              return visitSUBO(N);
1672
6.09k
  
case ISD::ADDE: return visitADDE(N)2.78k
;
1673
41.6k
  case ISD::ADDCARRY:           return visitADDCARRY(N);
1674
6.09k
  
case ISD::SUBE: return visitSUBE(N)102
;
1675
6.09k
  
case ISD::SUBCARRY: return visitSUBCARRY(N)1.59k
;
1676
138k
  case ISD::MUL:                return visitMUL(N);
1677
6.09k
  
case ISD::SDIV: return visitSDIV(N)6.06k
;
1678
6.09k
  
case ISD::UDIV: return visitUDIV(N)4.95k
;
1679
7.21k
  case ISD::SREM:
1680
7.21k
  case ISD::UREM:               return visitREM(N);
1681
8.94k
  case ISD::MULHU:              return visitMULHU(N);
1682
7.21k
  
case ISD::MULHS: return visitMULHS(N)2.21k
;
1683
7.21k
  
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N)889
;
1684
7.81k
  case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1685
7.21k
  case ISD::SMULO:
1686
1.52k
  case ISD::UMULO:              return visitMULO(N);
1687
41.7k
  case ISD::SMIN:
1688
41.7k
  case ISD::SMAX:
1689
41.7k
  case ISD::UMIN:
1690
41.7k
  case ISD::UMAX:               return visitIMINMAX(N);
1691
834k
  case ISD::AND:                return visitAND(N);
1692
228k
  case ISD::OR:                 return visitOR(N);
1693
118k
  case ISD::XOR:                return visitXOR(N);
1694
338k
  case ISD::SHL:                return visitSHL(N);
1695
48.8k
  case ISD::SRA:                return visitSRA(N);
1696
275k
  case ISD::SRL:                return visitSRL(N);
1697
41.7k
  case ISD::ROTR:
1698
7.80k
  case ISD::ROTL:               return visitRotate(N);
1699
7.80k
  case ISD::FSHL:
1700
922
  case ISD::FSHR:               return visitFunnelShift(N);
1701
2.37k
  case ISD::ABS:                return visitABS(N);
1702
2.28k
  case ISD::BSWAP:              return visitBSWAP(N);
1703
922
  
case ISD::BITREVERSE: return visitBITREVERSE(N)772
;
1704
5.87k
  case ISD::CTLZ:               return visitCTLZ(N);
1705
3.09k
  case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1706
1.03k
  case ISD::CTTZ:               return visitCTTZ(N);
1707
1.55k
  case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1708
2.81k
  case ISD::CTPOP:              return visitCTPOP(N);
1709
83.3k
  case ISD::SELECT:             return visitSELECT(N);
1710
48.1k
  case ISD::VSELECT:            return visitVSELECT(N);
1711
55.2k
  case ISD::SELECT_CC:          return visitSELECT_CC(N);
1712
654k
  case ISD::SETCC:              return visitSETCC(N);
1713
2.14k
  case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1714
149k
  case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1715
206k
  case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1716
88.0k
  case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1717
528k
  case ISD::AssertSext:
1718
528k
  case ISD::AssertZext:         return visitAssertExt(N);
1719
528k
  
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N)68.8k
;
1720
528k
  
case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N)5.13k
;
1721
528k
  
case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N)10.2k
;
1722
528k
  
case ISD::TRUNCATE: return visitTRUNCATE(N)469k
;
1723
583k
  case ISD::BITCAST:            return visitBITCAST(N);
1724
528k
  
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N)68.8k
;
1725
528k
  
case ISD::FADD: return visitFADD(N)83.3k
;
1726
528k
  
case ISD::FSUB: return visitFSUB(N)15.7k
;
1727
528k
  
case ISD::FMUL: return visitFMUL(N)61.9k
;
1728
528k
  
case ISD::FMA: return visitFMA(N)10.2k
;
1729
528k
  
case ISD::FDIV: return visitFDIV(N)64.8k
;
1730
528k
  
case ISD::FREM: return visitFREM(N)335
;
1731
528k
  
case ISD::FSQRT: return visitFSQRT(N)2.37k
;
1732
528k
  
case ISD::FCOPYSIGN: return visitFCOPYSIGN(N)3.29k
;
1733
528k
  
case ISD::FPOW: return visitFPOW(N)354
;
1734
528k
  
case ISD::SINT_TO_FP: return visitSINT_TO_FP(N)99.7k
;
1735
528k
  
case ISD::UINT_TO_FP: return visitUINT_TO_FP(N)35.0k
;
1736
528k
  
case ISD::FP_TO_SINT: return visitFP_TO_SINT(N)11.7k
;
1737
528k
  
case ISD::FP_TO_UINT: return visitFP_TO_UINT(N)5.76k
;
1738
528k
  
case ISD::FP_ROUND: return visitFP_ROUND(N)7.44k
;
1739
528k
  
case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N)0
;
1740
528k
  
case ISD::FP_EXTEND: return visitFP_EXTEND(N)17.7k
;
1741
528k
  
case ISD::FNEG: return visitFNEG(N)8.29k
;
1742
528k
  
case ISD::FABS: return visitFABS(N)5.69k
;
1743
528k
  
case ISD::FFLOOR: return visitFFLOOR(N)1.80k
;
1744
528k
  
case ISD::FMINNUM: return visitFMINNUM(N)2.46k
;
1745
528k
  
case ISD::FMAXNUM: return visitFMAXNUM(N)2.33k
;
1746
528k
  
case ISD::FMINIMUM: return visitFMINIMUM(N)385
;
1747
528k
  
case ISD::FMAXIMUM: return visitFMAXIMUM(N)488
;
1748
528k
  
case ISD::FCEIL: return visitFCEIL(N)1.64k
;
1749
528k
  
case ISD::FTRUNC: return visitFTRUNC(N)1.65k
;
1750
1.09M
  case ISD::BRCOND:             return visitBRCOND(N);
1751
528k
  
case ISD::BR_CC: return visitBR_CC(N)519k
;
1752
2.86M
  case ISD::LOAD:               return visitLOAD(N);
1753
3.23M
  case ISD::STORE:              return visitSTORE(N);
1754
528k
  
case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N)80.2k
;
1755
613k
  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1756
528k
  
case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N)518k
;
1757
528k
  
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N)55.1k
;
1758
528k
  
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N)176k
;
1759
528k
  
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N)128k
;
1760
528k
  
case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N)41.8k
;
1761
528k
  
case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N)54.7k
;
1762
528k
  
case ISD::MGATHER: return visitMGATHER(N)998
;
1763
528k
  
case ISD::MLOAD: return visitMLOAD(N)1.68k
;
1764
528k
  
case ISD::MSCATTER: return visitMSCATTER(N)293
;
1765
528k
  
case ISD::MSTORE: return visitMSTORE(N)1.69k
;
1766
528k
  
case ISD::LIFETIME_END: return visitLIFETIME_END(N)84.7k
;
1767
528k
  
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N)5.10k
;
1768
528k
  
case ISD::FP16_TO_FP: return visitFP16_TO_FP(N)7.31k
;
1769
528k
  case ISD::VECREDUCE_FADD:
1770
1.12k
  case ISD::VECREDUCE_FMUL:
1771
1.12k
  case ISD::VECREDUCE_ADD:
1772
1.12k
  case ISD::VECREDUCE_MUL:
1773
1.12k
  case ISD::VECREDUCE_AND:
1774
1.12k
  case ISD::VECREDUCE_OR:
1775
1.12k
  case ISD::VECREDUCE_XOR:
1776
1.12k
  case ISD::VECREDUCE_SMAX:
1777
1.12k
  case ISD::VECREDUCE_SMIN:
1778
1.12k
  case ISD::VECREDUCE_UMAX:
1779
1.12k
  case ISD::VECREDUCE_UMIN:
1780
1.12k
  case ISD::VECREDUCE_FMAX:
1781
1.12k
  case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1782
48.1M
  }
1783
48.1M
  return SDValue();
1784
48.1M
}
1785
1786
69.5M
SDValue DAGCombiner::combine(SDNode *N) {
1787
69.5M
  SDValue RV = visit(N);
1788
69.5M
1789
69.5M
  // If nothing happened, try a target-specific DAG combine.
1790
69.5M
  if (!RV.getNode()) {
1791
66.4M
    assert(N->getOpcode() != ISD::DELETED_NODE &&
1792
66.4M
           "Node was deleted but visit returned NULL!");
1793
66.4M
1794
66.4M
    if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1795
66.4M
        
TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())62.4M
) {
1796
16.0M
1797
16.0M
      // Expose the DAG combiner to the target combiner impls.
1798
16.0M
      TargetLowering::DAGCombinerInfo
1799
16.0M
        DagCombineInfo(DAG, Level, false, this);
1800
16.0M
1801
16.0M
      RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1802
16.0M
    }
1803
66.4M
  }
1804
69.5M
1805
69.5M
  // If nothing happened still, try promoting the operation.
1806
69.5M
  if (!RV.getNode()) {
1807
66.3M
    switch (N->getOpcode()) {
1808
66.3M
    
default: break58.1M
;
1809
66.3M
    case ISD::ADD:
1810
4.52M
    case ISD::SUB:
1811
4.52M
    case ISD::MUL:
1812
4.52M
    case ISD::AND:
1813
4.52M
    case ISD::OR:
1814
4.52M
    case ISD::XOR:
1815
4.52M
      RV = PromoteIntBinOp(SDValue(N, 0));
1816
4.52M
      break;
1817
4.52M
    case ISD::SHL:
1818
591k
    case ISD::SRA:
1819
591k
    case ISD::SRL:
1820
591k
      RV = PromoteIntShiftOp(SDValue(N, 0));
1821
591k
      break;
1822
591k
    case ISD::SIGN_EXTEND:
1823
312k
    case ISD::ZERO_EXTEND:
1824
312k
    case ISD::ANY_EXTEND:
1825
312k
      RV = PromoteExtend(SDValue(N, 0));
1826
312k
      break;
1827
2.69M
    case ISD::LOAD:
1828
2.69M
      if (PromoteLoad(SDValue(N, 0)))
1829
0
        RV = SDValue(N, 0);
1830
2.69M
      break;
1831
69.5M
    }
1832
69.5M
  }
1833
69.5M
1834
69.5M
  // If N is a commutative binary node, try to eliminate it if the commuted
1835
69.5M
  // version is already present in the DAG.
1836
69.5M
  if (!RV.getNode() && 
TLI.isCommutativeBinOp(N->getOpcode())66.3M
&&
1837
69.5M
      
N->getNumValues() == 14.62M
) {
1838
4.60M
    SDValue N0 = N->getOperand(0);
1839
4.60M
    SDValue N1 = N->getOperand(1);
1840
4.60M
1841
4.60M
    // Constant operands are canonicalized to RHS.
1842
4.60M
    if (N0 != N1 && 
(4.57M
isa<ConstantSDNode>(N0)4.57M
||
!isa<ConstantSDNode>(N1)4.57M
)) {
1843
1.33M
      SDValue Ops[] = {N1, N0};
1844
1.33M
      SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1845
1.33M
                                            N->getFlags());
1846
1.33M
      if (CSENode)
1847
3.90k
        return SDValue(CSENode, 0);
1848
69.4M
    }
1849
4.60M
  }
1850
69.4M
1851
69.4M
  return RV;
1852
69.4M
}
1853
1854
/// Given a node, return its input chain if it has one, otherwise return a null
1855
/// sd operand.
1856
2.12M
static SDValue getInputChainForNode(SDNode *N) {
1857
2.12M
  if (unsigned NumOps = N->getNumOperands()) {
1858
2.10M
    if (N->getOperand(0).getValueType() == MVT::Other)
1859
2.04M
      return N->getOperand(0);
1860
60.0k
    if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1861
60.0k
      return N->getOperand(NumOps-1);
1862
0
    for (unsigned i = 1; i < NumOps-1; ++i)
1863
0
      if (N->getOperand(i).getValueType() == MVT::Other)
1864
0
        return N->getOperand(i);
1865
0
  }
1866
2.12M
  
return SDValue()25.6k
;
1867
2.12M
}
1868
1869
1.87M
SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1870
1.87M
  // If N has two operands, where one has an input chain equal to the other,
1871
1.87M
  // the 'other' chain is redundant.
1872
1.87M
  if (N->getNumOperands() == 2) {
1873
1.06M
    if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1874
1.77k
      return N->getOperand(0);
1875
1.06M
    if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1876
16.5k
      return N->getOperand(1);
1877
1.85M
  }
1878
1.85M
1879
1.85M
  // Don't simplify token factors if optnone.
1880
1.85M
  if (OptLevel == CodeGenOpt::None)
1881
9.16k
    return SDValue();
1882
1.85M
1883
1.85M
  // If the sole user is a token factor, we should make sure we have a
1884
1.85M
  // chance to merge them together. This prevents TF chains from inhibiting
1885
1.85M
  // optimizations.
1886
1.85M
  if (N->hasOneUse() && 
N->use_begin()->getOpcode() == ISD::TokenFactor1.74M
)
1887
141k
    AddToWorklist(*(N->use_begin()));
1888
1.85M
1889
1.85M
  SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1890
1.85M
  SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1891
1.85M
  SmallPtrSet<SDNode*, 16> SeenOps;
1892
1.85M
  bool Changed = false;             // If we should replace this token factor.
1893
1.85M
1894
1.85M
  // Start out with this token factor.
1895
1.85M
  TFs.push_back(N);
1896
1.85M
1897
1.85M
  // Iterate through token factors.  The TFs grows when new token factors are
1898
1.85M
  // encountered.
1899
3.91M
  for (unsigned i = 0; i < TFs.size(); 
++i2.06M
) {
1900
2.06M
    // Limit number of nodes to inline, to avoid quadratic compile times.
1901
2.06M
    // We have to add the outstanding Token Factors to Ops, otherwise we might
1902
2.06M
    // drop Ops from the resulting Token Factors.
1903
2.06M
    if (Ops.size() > TokenFactorInlineLimit) {
1904
8
      for (unsigned j = i; j < TFs.size(); 
j++4
)
1905
4
        Ops.emplace_back(TFs[j], 0);
1906
4
      // Drop unprocessed Token Factors from TFs, so we do not add them to the
1907
4
      // combiner worklist later.
1908
4
      TFs.resize(i);
1909
4
      break;
1910
4
    }
1911
2.06M
1912
2.06M
    SDNode *TF = TFs[i];
1913
2.06M
    // Check each of the operands.
1914
7.50M
    for (const SDValue &Op : TF->op_values()) {
1915
7.50M
      switch (Op.getOpcode()) {
1916
7.50M
      case ISD::EntryToken:
1917
39.8k
        // Entry tokens don't need to be added to the list. They are
1918
39.8k
        // redundant.
1919
39.8k
        Changed = true;
1920
39.8k
        break;
1921
7.50M
1922
7.50M
      case ISD::TokenFactor:
1923
317k
        if (Op.hasOneUse() && 
!is_contained(TFs, Op.getNode())216k
) {
1924
216k
          // Queue up for processing.
1925
216k
          TFs.push_back(Op.getNode());
1926
216k
          Changed = true;
1927
216k
          break;
1928
216k
        }
1929
100k
        LLVM_FALLTHROUGH;
1930
100k
1931
7.25M
      default:
1932
7.25M
        // Only add if it isn't already in the list.
1933
7.25M
        if (SeenOps.insert(Op.getNode()).second)
1934
7.23M
          Ops.push_back(Op);
1935
21.0k
        else
1936
21.0k
          Changed = true;
1937
7.25M
        break;
1938
7.50M
      }
1939
7.50M
    }
1940
2.06M
  }
1941
1.85M
1942
1.85M
  // Re-visit inlined Token Factors, to clean them up in case they have been
1943
1.85M
  // removed. Skip the first Token Factor, as this is the current node.
1944
2.06M
  
for (unsigned i = 1, e = TFs.size(); 1.85M
i < e;
i++216k
)
1945
216k
    AddToWorklist(TFs[i]);
1946
1.85M
1947
1.85M
  // Remove Nodes that are chained to another node in the list. Do so
1948
1.85M
  // by walking up chains breath-first stopping when we've seen
1949
1.85M
  // another operand. In general we must climb to the EntryNode, but we can exit
1950
1.85M
  // early if we find all remaining work is associated with just one operand as
1951
1.85M
  // no further pruning is possible.
1952
1.85M
1953
1.85M
  // List of nodes to search through and original Ops from which they originate.
1954
1.85M
  SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1955
1.85M
  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1956
1.85M
  SmallPtrSet<SDNode *, 16> SeenChains;
1957
1.85M
  bool DidPruneOps = false;
1958
1.85M
1959
1.85M
  unsigned NumLeftToConsider = 0;
1960
7.23M
  for (const SDValue &Op : Ops) {
1961
7.23M
    Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1962
7.23M
    OpWorkCount.push_back(1);
1963
7.23M
  }
1964
1.85M
1965
10.4M
  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1966
10.4M
    // If this is an Op, we can remove the op from the list. Remark any
1967
10.4M
    // search associated with it as from the current OpNumber.
1968
10.4M
    if (SeenOps.count(Op) != 0) {
1969
260k
      Changed = true;
1970
260k
      DidPruneOps = true;
1971
260k
      unsigned OrigOpNumber = 0;
1972
3.91M
      while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1973
3.65M
        OrigOpNumber++;
1974
260k
      assert((OrigOpNumber != Ops.size()) &&
1975
260k
             "expected to find TokenFactor Operand");
1976
260k
      // Re-mark worklist from OrigOpNumber to OpNumber
1977
3.66M
      for (unsigned i = CurIdx + 1; i < Worklist.size(); 
++i3.40M
) {
1978
3.40M
        if (Worklist[i].second == OrigOpNumber) {
1979
118k
          Worklist[i].second = OpNumber;
1980
118k
        }
1981
3.40M
      }
1982
260k
      OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1983
260k
      OpWorkCount[OrigOpNumber] = 0;
1984
260k
      NumLeftToConsider--;
1985
260k
    }
1986
10.4M
    // Add if it's a new chain
1987
10.4M
    if (SeenChains.insert(Op).second) {
1988
4.80M
      OpWorkCount[OpNumber]++;
1989
4.80M
      Worklist.push_back(std::make_pair(Op, OpNumber));
1990
4.80M
    }
1991
10.4M
  };
1992
1.85M
1993
12.1M
  for (unsigned i = 0; i < Worklist.size() && 
i < 102411.6M
;
++i10.3M
) {
1994
11.6M
    // We need at least be consider at least 2 Ops to prune.
1995
11.6M
    if (NumLeftToConsider <= 1)
1996
1.28M
      break;
1997
10.3M
    auto CurNode = Worklist[i].first;
1998
10.3M
    auto CurOpNumber = Worklist[i].second;
1999
10.3M
    assert((OpWorkCount[CurOpNumber] > 0) &&
2000
10.3M
           "Node should not appear in worklist");
2001
10.3M
    switch (CurNode->getOpcode()) {
2002
10.3M
    case ISD::EntryToken:
2003
572k
      // Hitting EntryToken is the only way for the search to terminate without
2004
572k
      // hitting
2005
572k
      // another operand's search. Prevent us from marking this operand
2006
572k
      // considered.
2007
572k
      NumLeftToConsider++;
2008
572k
      break;
2009
10.3M
    case ISD::TokenFactor:
2010
644k
      for (const SDValue &Op : CurNode->op_values())
2011
2.28M
        AddToWorklist(i, Op.getNode(), CurOpNumber);
2012
644k
      break;
2013
10.3M
    case ISD::LIFETIME_START:
2014
2.46M
    case ISD::LIFETIME_END:
2015
2.46M
    case ISD::CopyFromReg:
2016
2.46M
    case ISD::CopyToReg:
2017
2.46M
      AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2018
2.46M
      break;
2019
6.65M
    default:
2020
6.65M
      if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2021
5.68M
        AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2022
6.65M
      break;
2023
10.3M
    }
2024
10.3M
    OpWorkCount[CurOpNumber]--;
2025
10.3M
    if (OpWorkCount[CurOpNumber] == 0)
2026
5.76M
      NumLeftToConsider--;
2027
10.3M
  }
2028
1.85M
2029
1.85M
  // If we've changed things around then replace token factor.
2030
1.85M
  if (Changed) {
2031
249k
    SDValue Result;
2032
249k
    if (Ops.empty()) {
2033
524
      // The entry token is the only possible outcome.
2034
524
      Result = DAG.getEntryNode();
2035
248k
    } else {
2036
248k
      if (DidPruneOps) {
2037
94.4k
        SmallVector<SDValue, 8> PrunedOps;
2038
94.4k
        //
2039
596k
        for (const SDValue &Op : Ops) {
2040
596k
          if (SeenChains.count(Op.getNode()) == 0)
2041
441k
            PrunedOps.push_back(Op);
2042
596k
        }
2043
94.4k
        Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2044
154k
      } else {
2045
154k
        Result = DAG.getTokenFactor(SDLoc(N), Ops);
2046
154k
      }
2047
248k
    }
2048
249k
    return Result;
2049
249k
  }
2050
1.60M
  return SDValue();
2051
1.60M
}
2052
2053
/// MERGE_VALUES can always be eliminated.
2054
65.3k
SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2055
65.3k
  WorklistRemover DeadNodes(*this);
2056
65.3k
  // Replacing results may cause a different MERGE_VALUES to suddenly
2057
65.3k
  // be CSE'd with N, and carry its uses with it. Iterate until no
2058
65.3k
  // uses remain, to ensure that the node can be safely deleted.
2059
65.3k
  // First add the users of this node to the work list so that they
2060
65.3k
  // can be tried again once they have new operands.
2061
65.3k
  AddUsersToWorklist(N);
2062
65.3k
  do {
2063
65.3k
    // Do as a single replacement to avoid rewalking use lists.
2064
65.3k
    SmallVector<SDValue, 8> Ops;
2065
217k
    for (unsigned i = 0, e = N->getNumOperands(); i != e; 
++i151k
)
2066
151k
      Ops.push_back(N->getOperand(i));
2067
65.3k
    DAG.ReplaceAllUsesWith(N, Ops.data());
2068
65.3k
  } while (!N->use_empty());
2069
65.3k
  deleteAndRecombine(N);
2070
65.3k
  return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2071
65.3k
}
2072
2073
/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2074
/// ConstantSDNode pointer else nullptr.
2075
2.72M
static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2076
2.72M
  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2077
2.72M
  return Const != nullptr && 
!Const->isOpaque()353k
?
Const351k
:
nullptr2.37M
;
2078
2.72M
}
2079
2080
6.75M
SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2081
6.75M
  assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2082
6.75M
         "Unexpected binary operator");
2083
6.75M
2084
6.75M
  // Don't do this unless the old select is going away. We want to eliminate the
2085
6.75M
  // binary operator, not replace a binop with a select.
2086
6.75M
  // TODO: Handle ISD::SELECT_CC.
2087
6.75M
  unsigned SelOpNo = 0;
2088
6.75M
  SDValue Sel = BO->getOperand(0);
2089
6.75M
  if (Sel.getOpcode() != ISD::SELECT || 
!Sel.hasOneUse()10.8k
) {
2090
6.74M
    SelOpNo = 1;
2091
6.74M
    Sel = BO->getOperand(1);
2092
6.74M
  }
2093
6.75M
2094
6.75M
  if (Sel.getOpcode() != ISD::SELECT || 
!Sel.hasOneUse()10.6k
)
2095
6.74M
    return SDValue();
2096
9.60k
2097
9.60k
  SDValue CT = Sel.getOperand(1);
2098
9.60k
  if (!isConstantOrConstantVector(CT, true) &&
2099
9.60k
      
!isConstantFPBuildVectorOrConstantFP(CT)4.32k
)
2100
4.23k
    return SDValue();
2101
5.37k
2102
5.37k
  SDValue CF = Sel.getOperand(2);
2103
5.37k
  if (!isConstantOrConstantVector(CF, true) &&
2104
5.37k
      
!isConstantFPBuildVectorOrConstantFP(CF)713
)
2105
653
    return SDValue();
2106
4.71k
2107
4.71k
  // Bail out if any constants are opaque because we can't constant fold those.
2108
4.71k
  // The exception is "and" and "or" with either 0 or -1 in which case we can
2109
4.71k
  // propagate non constant operands into select. I.e.:
2110
4.71k
  // and (select Cond, 0, -1), X --> select Cond, 0, X
2111
4.71k
  // or X, (select Cond, -1, 0) --> select Cond, -1, X
2112
4.71k
  auto BinOpcode = BO->getOpcode();
2113
4.71k
  bool CanFoldNonConst =
2114
4.71k
      (BinOpcode == ISD::AND || 
BinOpcode == ISD::OR4.47k
) &&
2115
4.71k
      
(792
isNullOrNullSplat(CT)792
||
isAllOnesOrAllOnesSplat(CT)685
) &&
2116
4.71k
      
(294
isNullOrNullSplat(CF)294
||
isAllOnesOrAllOnesSplat(CF)107
);
2117
4.71k
2118
4.71k
  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2119
4.71k
  if (!CanFoldNonConst &&
2120
4.71k
      
!isConstantOrConstantVector(CBO, true)4.51k
&&
2121
4.71k
      
!isConstantFPBuildVectorOrConstantFP(CBO)4.33k
)
2122
4.30k
    return SDValue();
2123
411
2124
411
  EVT VT = Sel.getValueType();
2125
411
2126
411
  // In case of shift value and shift amount may have different VT. For instance
2127
411
  // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2128
411
  // swapped operands and value types do not match. NB: x86 is fine if operands
2129
411
  // are not swapped with shift amount VT being not bigger than shifted value.
2130
411
  // TODO: that is possible to check for a shift operation, correct VTs and
2131
411
  // still perform optimization on x86 if needed.
2132
411
  if (SelOpNo && 
VT != CBO.getValueType()74
)
2133
12
    return SDValue();
2134
399
2135
399
  // We have a select-of-constants followed by a binary operator with a
2136
399
  // constant. Eliminate the binop by pulling the constant math into the select.
2137
399
  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2138
399
  SDLoc DL(Sel);
2139
399
  SDValue NewCT = SelOpNo ? 
DAG.getNode(BinOpcode, DL, VT, CBO, CT)62
2140
399
                          : 
DAG.getNode(BinOpcode, DL, VT, CT, CBO)337
;
2141
399
  if (!CanFoldNonConst && 
!NewCT.isUndef()194
&&
2142
399
      
!isConstantOrConstantVector(NewCT, true)194
&&
2143
399
      
!isConstantFPBuildVectorOrConstantFP(NewCT)31
)
2144
0
    return SDValue();
2145
399
2146
399
  SDValue NewCF = SelOpNo ? 
DAG.getNode(BinOpcode, DL, VT, CBO, CF)62
2147
399
                          : 
DAG.getNode(BinOpcode, DL, VT, CF, CBO)337
;
2148
399
  if (!CanFoldNonConst && 
!NewCF.isUndef()194
&&
2149
399
      
!isConstantOrConstantVector(NewCF, true)194
&&
2150
399
      
!isConstantFPBuildVectorOrConstantFP(NewCF)31
)
2151
0
    return SDValue();
2152
399
2153
399
  SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2154
399
  SelectOp->setFlags(BO->getFlags());
2155
399
  return SelectOp;
2156
399
}
2157
2158
3.68M
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2159
3.68M
  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2160
3.68M
         "Expecting add or sub");
2161
3.68M
2162
3.68M
  // Match a constant operand and a zext operand for the math instruction:
2163
3.68M
  // add Z, C
2164
3.68M
  // sub C, Z
2165
3.68M
  bool IsAdd = N->getOpcode() == ISD::ADD;
2166
3.68M
  SDValue C = IsAdd ? 
N->getOperand(1)3.52M
:
N->getOperand(0)159k
;
2167
3.68M
  SDValue Z = IsAdd ? 
N->getOperand(0)3.52M
:
N->getOperand(1)159k
;
2168
3.68M
  auto *CN = dyn_cast<ConstantSDNode>(C);
2169
3.68M
  if (!CN || 
Z.getOpcode() != ISD::ZERO_EXTEND2.87M
)
2170
3.68M
    return SDValue();
2171
3.29k
2172
3.29k
  // Match the zext operand as a setcc of a boolean.
2173
3.29k
  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2174
3.29k
      
Z.getOperand(0).getValueType() != MVT::i1751
)
2175
2.96k
    return SDValue();
2176
331
2177
331
  // Match the compare as: setcc (X & 1), 0, eq.
2178
331
  SDValue SetCC = Z.getOperand(0);
2179
331
  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2180
331
  if (CC != ISD::SETEQ || 
!isNullConstant(SetCC.getOperand(1))134
||
2181
331
      
SetCC.getOperand(0).getOpcode() != ISD::AND100
||
2182
331
      
!isOneConstant(SetCC.getOperand(0).getOperand(1))52
)
2183
295
    return SDValue();
2184
36
2185
36
  // We are adding/subtracting a constant and an inverted low bit. Turn that
2186
36
  // into a subtract/add of the low bit with incremented/decremented constant:
2187
36
  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2188
36
  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2189
36
  EVT VT = C.getValueType();
2190
36
  SDLoc DL(N);
2191
36
  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2192
36
  SDValue C1 = IsAdd ? 
DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)18
:
2193
36
                       
DAG.getConstant(CN->getAPIntValue() - 1, DL, VT)18
;
2194
36
  return DAG.getNode(IsAdd ? 
ISD::SUB18
:
ISD::ADD18
, DL, VT, C1, LowBit);
2195
36
}
2196
2197
/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2198
/// a shift and add with a different constant.
2199
3.68M
static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2200
3.68M
  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2201
3.68M
         "Expecting add or sub");
2202
3.68M
2203
3.68M
  // We need a constant operand for the add/sub, and the other operand is a
2204
3.68M
  // logical shift right: add (srl), C or sub C, (srl).
2205
3.68M
  // TODO - support non-uniform vector amounts.
2206
3.68M
  bool IsAdd = N->getOpcode() == ISD::ADD;
2207
3.68M
  SDValue ConstantOp = IsAdd ? 
N->getOperand(1)3.52M
:
N->getOperand(0)159k
;
2208
3.68M
  SDValue ShiftOp = IsAdd ? 
N->getOperand(0)3.52M
:
N->getOperand(1)159k
;
2209
3.68M
  ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2210
3.68M
  if (!C || 
ShiftOp.getOpcode() != ISD::SRL2.93M
)
2211
3.68M
    return SDValue();
2212
5.91k
2213
5.91k
  // The shift must be of a 'not' value.
2214
5.91k
  SDValue Not = ShiftOp.getOperand(0);
2215
5.91k
  if (!Not.hasOneUse() || 
!isBitwiseNot(Not)3.09k
)
2216
5.89k
    return SDValue();
2217
20
2218
20
  // The shift must be moving the sign bit to the least-significant-bit.
2219
20
  EVT VT = ShiftOp.getValueType();
2220
20
  SDValue ShAmt = ShiftOp.getOperand(1);
2221
20
  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2222
20
  if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2223
0
    return SDValue();
2224
20
2225
20
  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2226
20
  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2227
20
  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2228
20
  SDLoc DL(N);
2229
20
  auto ShOpcode = IsAdd ? 
ISD::SRA10
:
ISD::SRL10
;
2230
20
  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2231
20
  APInt NewC = IsAdd ? 
C->getAPIntValue() + 110
:
C->getAPIntValue() - 110
;
2232
20
  return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2233
20
}
2234
2235
/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2236
/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2237
/// are no common bits set in the operands).
2238
4.16M
SDValue DAGCombiner::visitADDLike(SDNode *N) {
2239
4.16M
  SDValue N0 = N->getOperand(0);
2240
4.16M
  SDValue N1 = N->getOperand(1);
2241
4.16M
  EVT VT = N0.getValueType();
2242
4.16M
  SDLoc DL(N);
2243
4.16M
2244
4.16M
  // fold vector ops
2245
4.16M
  if (VT.isVector()) {
2246
149k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
2247
48
      return FoldedVOp;
2248
149k
2249
149k
    // fold (add x, 0) -> x, vector edition
2250
149k
    if (ISD::isBuildVectorAllZeros(N1.getNode()))
2251
309
      return N0;
2252
149k
    if (ISD::isBuildVectorAllZeros(N0.getNode()))
2253
238
      return N1;
2254
4.16M
  }
2255
4.16M
2256
4.16M
  // fold (add x, undef) -> undef
2257
4.16M
  if (N0.isUndef())
2258
3
    return N0;
2259
4.16M
2260
4.16M
  if (N1.isUndef())
2261
158
    return N1;
2262
4.16M
2263
4.16M
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2264
4.00k
    // canonicalize constant to RHS
2265
4.00k
    if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2266
1.01k
      return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2267
2.99k
    // fold (add c1, c2) -> c1+c2
2268
2.99k
    return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2269
2.99k
                                      N1.getNode());
2270
2.99k
  }
2271
4.15M
2272
4.15M
  // fold (add x, 0) -> x
2273
4.15M
  if (isNullConstant(N1))
2274
546
    return N0;
2275
4.15M
2276
4.15M
  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2277
3.41M
    // fold ((A-c1)+c2) -> (A+(c2-c1))
2278
3.41M
    if (N0.getOpcode() == ISD::SUB &&
2279
3.41M
        
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)14.9k
) {
2280
7.43k
      SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
2281
7.43k
                                               N0.getOperand(1).getNode());
2282
7.43k
      assert(Sub && "Constant folding failed");
2283
7.43k
      return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2284
7.43k
    }
2285
3.40M
2286
3.40M
    // fold ((c1-A)+c2) -> (c1+c2)-A
2287
3.40M
    if (N0.getOpcode() == ISD::SUB &&
2288
3.40M
        
isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)7.56k
) {
2289
287
      SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
2290
287
                                               N0.getOperand(0).getNode());
2291
287
      assert(Add && "Constant folding failed");
2292
287
      return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2293
287
    }
2294
3.40M
2295
3.40M
    // add (sext i1 X), 1 -> zext (not i1 X)
2296
3.40M
    // We don't transform this pattern:
2297
3.40M
    //   add (zext i1 X), -1 -> sext (not i1 X)
2298
3.40M
    // because most (?) targets generate better code for the zext form.
2299
3.40M
    if (N0.getOpcode() == ISD::SIGN_EXTEND && 
N0.hasOneUse()6.14k
&&
2300
3.40M
        
isOneOrOneSplat(N1)1.10k
) {
2301
252
      SDValue X = N0.getOperand(0);
2302
252
      if ((!LegalOperations ||
2303
252
           
(46
TLI.isOperationLegal(ISD::XOR, X.getValueType())46
&&
2304
46
            
TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)18
)) &&
2305
252
          
X.getScalarValueSizeInBits() == 1224
) {
2306
17
        SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2307
17
        return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2308
17
      }
2309
3.40M
    }
2310
3.40M
2311
3.40M
    // Undo the add -> or combine to merge constant offsets from a frame index.
2312
3.40M
    if (N0.getOpcode() == ISD::OR &&
2313
3.40M
        
isa<FrameIndexSDNode>(N0.getOperand(0))6.65k
&&
2314
3.40M
        
isa<ConstantSDNode>(N0.getOperand(1))4.78k
&&
2315
3.40M
        
DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))4.77k
) {
2316
4.77k
      SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2317
4.77k
      return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2318
4.77k
    }
2319
4.14M
  }
2320
4.14M
2321
4.14M
  if (SDValue NewSel = foldBinOpIntoSelect(N))
2322
8
    return NewSel;
2323
4.14M
2324
4.14M
  // reassociate add
2325
4.14M
  if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2326
4.13M
    if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2327
516k
      return RADD;
2328
3.62M
  }
2329
3.62M
  // fold ((0-A) + B) -> B-A
2330
3.62M
  if (N0.getOpcode() == ISD::SUB && 
isNullOrNullSplat(N0.getOperand(0))13.1k
)
2331
198
    return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2332
3.62M
2333
3.62M
  // fold (A + (0-B)) -> A-B
2334
3.62M
  if (N1.getOpcode() == ISD::SUB && 
isNullOrNullSplat(N1.getOperand(0))6.38k
)
2335
1.84k
    return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2336
3.62M
2337
3.62M
  // fold (A+(B-A)) -> B
2338
3.62M
  if (N1.getOpcode() == ISD::SUB && 
N0 == N1.getOperand(1)4.53k
)
2339
17
    return N1.getOperand(0);
2340
3.62M
2341
3.62M
  // fold ((B-A)+A) -> B
2342
3.62M
  if (N0.getOpcode() == ISD::SUB && 
N1 == N0.getOperand(1)12.9k
)
2343
11
    return N0.getOperand(0);
2344
3.62M
2345
3.62M
  // fold ((A-B)+(C-A)) -> (C-B)
2346
3.62M
  if (N0.getOpcode() == ISD::SUB && 
N1.getOpcode() == ISD::SUB12.9k
&&
2347
3.62M
      
N0.getOperand(0) == N1.getOperand(1)353
)
2348
10
    return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2349
10
                       N0.getOperand(1));
2350
3.62M
2351
3.62M
  // fold ((A-B)+(B-C)) -> (A-C)
2352
3.62M
  if (N0.getOpcode() == ISD::SUB && 
N1.getOpcode() == ISD::SUB12.9k
&&
2353
3.62M
      
N0.getOperand(1) == N1.getOperand(0)343
)
2354
12
    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2355
12
                       N1.getOperand(1));
2356
3.62M
2357
3.62M
  // fold (A+(B-(A+C))) to (B-C)
2358
3.62M
  if (N1.getOpcode() == ISD::SUB && 
N1.getOperand(1).getOpcode() == ISD::ADD4.49k
&&
2359
3.62M
      
N0 == N1.getOperand(1).getOperand(0)199
)
2360
29
    return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2361
29
                       N1.getOperand(1).getOperand(1));
2362
3.62M
2363
3.62M
  // fold (A+(B-(C+A))) to (B-C)
2364
3.62M
  if (N1.getOpcode() == ISD::SUB && 
N1.getOperand(1).getOpcode() == ISD::ADD4.47k
&&
2365
3.62M
      
N0 == N1.getOperand(1).getOperand(1)170
)
2366
2
    return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2367
2
                       N1.getOperand(1).getOperand(0));
2368
3.62M
2369
3.62M
  // fold (A+((B-A)+or-C)) to (B+or-C)
2370
3.62M
  if ((N1.getOpcode() == ISD::SUB || 
N1.getOpcode() == ISD::ADD3.62M
) &&
2371
3.62M
      
N1.getOperand(0).getOpcode() == ISD::SUB23.4k
&&
2372
3.62M
      
N0 == N1.getOperand(0).getOperand(1)467
)
2373
17
    return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2374
17
                       N1.getOperand(1));
2375
3.62M
2376
3.62M
  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2377
3.62M
  if (N0.getOpcode() == ISD::SUB && 
N1.getOpcode() == ISD::SUB12.8k
) {
2378
331
    SDValue N00 = N0.getOperand(0);
2379
331
    SDValue N01 = N0.getOperand(1);
2380
331
    SDValue N10 = N1.getOperand(0);
2381
331
    SDValue N11 = N1.getOperand(1);
2382
331
2383
331
    if (isConstantOrConstantVector(N00) || 
isConstantOrConstantVector(N10)323
)
2384
10
      return DAG.getNode(ISD::SUB, DL, VT,
2385
10
                         DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2386
10
                         DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2387
3.62M
  }
2388
3.62M
2389
3.62M
  // fold (add (umax X, C), -C) --> (usubsat X, C)
2390
3.62M
  if (N0.getOpcode() == ISD::UMAX && 
hasOperation(ISD::USUBSAT, VT)164
) {
2391
2.18k
    auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2392
2.18k
      return (!Max && 
!Op28
) ||
2393
2.18k
             
(2.15k
Max2.15k
&&
Op2.15k
&&
Max->getAPIntValue() == (-Op->getAPIntValue())2.15k
);
2394
2.18k
    };
2395
132
    if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2396
132
                                  /*AllowUndefs*/ true))
2397
132
      return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2398
132
                         N0.getOperand(1));
2399
3.62M
  }
2400
3.62M
2401
3.62M
  if (SimplifyDemandedBits(SDValue(N, 0)))
2402
6.75k
    return SDValue(N, 0);
2403
3.61M
2404
3.61M
  if (isOneOrOneSplat(N1)) {
2405
148k
    // fold (add (xor a, -1), 1) -> (sub 0, a)
2406
148k
    if (isBitwiseNot(N0))
2407
9
      return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2408
9
                         N0.getOperand(0));
2409
148k
2410
148k
    // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2411
148k
    if (N0.getOpcode() == ISD::ADD ||
2412
148k
        
N0.getOpcode() == ISD::UADDO141k
||
2413
148k
        
N0.getOpcode() == ISD::SADDO141k
) {
2414
7.12k
      SDValue A, Xor;
2415
7.12k
2416
7.12k
      if (isBitwiseNot(N0.getOperand(0))) {
2417
7
        A = N0.getOperand(1);
2418
7
        Xor = N0.getOperand(0);
2419
7.11k
      } else if (isBitwiseNot(N0.getOperand(1))) {
2420
3
        A = N0.getOperand(0);
2421
3
        Xor = N0.getOperand(1);
2422
3
      }
2423
7.12k
2424
7.12k
      if (Xor)
2425
10
        return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2426
148k
    }
2427
148k
2428
148k
    // Look for:
2429
148k
    //   add (add x, y), 1
2430
148k
    // And if the target does not like this form then turn into:
2431
148k
    //   sub y, (xor x, -1)
2432
148k
    if (!TLI.preferIncOfAddToSubOfNot(VT) && 
N0.hasOneUse()13.1k
&&
2433
148k
        
N0.getOpcode() == ISD::ADD621
) {
2434
168
      SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2435
168
                                DAG.getAllOnesConstant(DL, VT));
2436
168
      return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2437
168
    }
2438
3.61M
  }
2439
3.61M
2440
3.61M
  // (x - y) + -1  ->  add (xor y, -1), x
2441
3.61M
  if (N0.hasOneUse() && 
N0.getOpcode() == ISD::SUB1.53M
&&
2442
3.61M
      
isAllOnesOrAllOnesSplat(N1)10.8k
) {
2443
208
    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2444
208
    return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2445
208
  }
2446
3.61M
2447
3.61M
  if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2448
2.14k
    return Combined;
2449
3.61M
2450
3.61M
  if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2451
1.43k
    return Combined;
2452
3.61M
2453
3.61M
  return SDValue();
2454
3.61M
}
2455
2456
4.07M
SDValue DAGCombiner::visitADD(SDNode *N) {
2457
4.07M
  SDValue N0 = N->getOperand(0);
2458
4.07M
  SDValue N1 = N->getOperand(1);
2459
4.07M
  EVT VT = N0.getValueType();
2460
4.07M
  SDLoc DL(N);
2461
4.07M
2462
4.07M
  if (SDValue Combined = visitADDLike(N))
2463
544k
    return Combined;
2464
3.52M
2465
3.52M
  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2466
18
    return V;
2467
3.52M
2468
3.52M
  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2469
10
    return V;
2470
3.52M
2471
3.52M
  // fold (a+b) -> (a|b) iff a and b share no bits.
2472
3.52M
  if ((!LegalOperations || 
TLI.isOperationLegal(ISD::OR, VT)1.47M
) &&
2473
3.52M
      
DAG.haveNoCommonBitsSet(N0, N1)3.47M
)
2474
26.5k
    return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2475
3.50M
2476
3.50M
  return SDValue();
2477
3.50M
}
2478
2479
3.23k
SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2480
3.23k
  unsigned Opcode = N->getOpcode();
2481
3.23k
  SDValue N0 = N->getOperand(0);
2482
3.23k
  SDValue N1 = N->getOperand(1);
2483
3.23k
  EVT VT = N0.getValueType();
2484
3.23k
  SDLoc DL(N);
2485
3.23k
2486
3.23k
  // fold vector ops
2487
3.23k
  if (VT.isVector()) {
2488
3.06k
    // TODO SimplifyVBinOp
2489
3.06k
2490
3.06k
    // fold (add_sat x, 0) -> x, vector edition
2491
3.06k
    if (ISD::isBuildVectorAllZeros(N1.getNode()))
2492
15
      return N0;
2493
3.04k
    if (ISD::isBuildVectorAllZeros(N0.getNode()))
2494
0
      return N1;
2495
3.21k
  }
2496
3.21k
2497
3.21k
  // fold (add_sat x, undef) -> -1
2498
3.21k
  if (N0.isUndef() || N1.isUndef())
2499
0
    return DAG.getAllOnesConstant(DL, VT);
2500
3.21k
2501
3.21k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2502
14
    // canonicalize constant to RHS
2503
14
    if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2504
14
      return DAG.getNode(Opcode, DL, VT, N1, N0);
2505
0
    // fold (add_sat c1, c2) -> c3
2506
0
    return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2507
0
                                      N1.getNode());
2508
0
  }
2509
3.20k
2510
3.20k
  // fold (add_sat x, 0) -> x
2511
3.20k
  if (isNullConstant(N1))
2512
14
    return N0;
2513
3.18k
2514
3.18k
  // If it cannot overflow, transform into an add.
2515
3.18k
  if (Opcode == ISD::UADDSAT)
2516
2.03k
    if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2517
14
      return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2518
3.17k
2519
3.17k
  return SDValue();
2520
3.17k
}
2521
2522
2.53M
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2523
2.53M
  bool Masked = false;
2524
2.53M
2525
2.53M
  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2526
2.55M
  while (true) {
2527
2.55M
    if (V.getOpcode() == ISD::TRUNCATE || 
V.getOpcode() == ISD::ZERO_EXTEND2.54M
) {
2528
20.2k
      V = V.getOperand(0);
2529
20.2k
      continue;
2530
20.2k
    }
2531
2.53M
2532
2.53M
    if (V.getOpcode() == ISD::AND && 
isOneConstant(V.getOperand(1))29.6k
) {
2533
1.52k
      Masked = true;
2534
1.52k
      V = V.getOperand(0);
2535
1.52k
      continue;
2536
1.52k
    }
2537
2.53M
2538
2.53M
    break;
2539
2.53M
  }
2540
2.53M
2541
2.53M
  // If this is not a carry, return.
2542
2.53M
  if (V.getResNo() != 1)
2543
2.51M
    return SDValue();
2544
14.0k
2545
14.0k
  if (V.getOpcode() != ISD::ADDCARRY && 
V.getOpcode() != ISD::SUBCARRY13.7k
&&
2546
14.0k
      
V.getOpcode() != ISD::UADDO13.7k
&&
V.getOpcode() != ISD::USUBO13.0k
)
2547
13.0k
    return SDValue();
2548
1.02k
2549
1.02k
  EVT VT = V.getNode()->getValueType(0);
2550
1.02k
  if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2551
1
    return SDValue();
2552
1.01k
2553
1.01k
  // If the result is masked, then no matter what kind of bool it is we can
2554
1.01k
  // return. If it isn't, then we need to make sure the bool type is either 0 or
2555
1.01k
  // 1 and not other values.
2556
1.01k
  if (Masked ||
2557
1.01k
      TLI.getBooleanContents(V.getValueType()) ==
2558
539
          TargetLoweringBase::ZeroOrOneBooleanContent)
2559
1.01k
    return V;
2560
0
2561
0
  return SDValue();
2562
0
}
2563
2564
/// Given the operands of an add/sub operation, see if the 2nd operand is a
2565
/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2566
/// the opcode and bypass the mask operation.
2567
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2568
7.39M
                                 SelectionDAG &DAG, const SDLoc &DL) {
2569
7.39M
  if (N1.getOpcode() != ISD::AND || 
!isOneOrOneSplat(N1->getOperand(1))64.2k
)
2570
7.38M
    return SDValue();
2571
4.82k
2572
4.82k
  EVT VT = N0.getValueType();
2573
4.82k
  if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2574
4.67k
    return SDValue();
2575
147
2576
147
  // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2577
147
  // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2578
147
  return DAG.getNode(IsAdd ? 
ISD::SUB142
:
ISD::ADD5
, DL, VT, N0, N1.getOperand(0));
2579
147
}
2580
2581
/// Helper for doing combines based on N0 and N1 being added to each other.
2582
SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2583
7.23M
                                          SDNode *LocReference) {
2584
7.23M
  EVT VT = N0.getValueType();
2585
7.23M
  SDLoc DL(LocReference);
2586
7.23M
2587
7.23M
  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2588
7.23M
  if (N1.getOpcode() == ISD::SHL && 
N1.getOperand(0).getOpcode() == ISD::SUB239k
&&
2589
7.23M
      
isNullOrNullSplat(N1.getOperand(0).getOperand(0))2.36k
)
2590
531
    return DAG.getNode(ISD::SUB, DL, VT, N0,
2591
531
                       DAG.getNode(ISD::SHL, DL, VT,
2592
531
                                   N1.getOperand(0).getOperand(1),
2593
531
                                   N1.getOperand(1)));
2594
7.23M
2595
7.23M
  if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2596
142
    return V;
2597
7.23M
2598
7.23M
  // Look for:
2599
7.23M
  //   add (add x, 1), y
2600
7.23M
  // And if the target does not like this form then turn into:
2601
7.23M
  //   sub y, (xor x, -1)
2602
7.23M
  if (!TLI.preferIncOfAddToSubOfNot(VT) && 
N0.hasOneUse()205k
&&
2603
7.23M
      
N0.getOpcode() == ISD::ADD102k
&&
isOneOrOneSplat(N0.getOperand(1))3.33k
) {
2604
0
    SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2605
0
                              DAG.getAllOnesConstant(DL, VT));
2606
0
    return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2607
0
  }
2608
7.23M
2609
7.23M
  // Hoist one-use subtraction by non-opaque constant:
2610
7.23M
  //   (x - C) + y  ->  (x + y) - C
2611
7.23M
  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2612
7.23M
  if (N0.hasOneUse() && 
N0.getOpcode() == ISD::SUB4.38M
&&
2613
7.23M
      
isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)14.1k
) {
2614
218
    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2615
218
    return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2616
218
  }
2617
7.23M
  // Hoist one-use subtraction from non-opaque constant:
2618
7.23M
  //   (C - x) + y  ->  (y - x) + C
2619
7.23M
  if (N0.hasOneUse() && 
N0.getOpcode() == ISD::SUB4.37M
&&
2620
7.23M
      
isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)13.9k
) {
2621
1.79k
    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2622
1.79k
    return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2623
1.79k
  }
2624
7.23M
2625
7.23M
  // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2626
7.23M
  // rather than 'add 0/-1' (the zext should get folded).
2627
7.23M
  // add (sext i1 Y), X --> sub X, (zext i1 Y)
2628
7.23M
  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2629
7.23M
      
N0.getOperand(0).getScalarValueSizeInBits() == 111.6k
&&
2630
7.23M
      
TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent281
) {
2631
218
    SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2632
218
    return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2633
218
  }
2634
7.23M
2635
7.23M
  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2636
7.23M
  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2637
1.64k
    VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2638
1.64k
    if (TN->getVT() == MVT::i1) {
2639
642
      SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2640
642
                                 DAG.getConstant(1, DL, VT));
2641
642
      return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2642
642
    }
2643
7.22M
  }
2644
7.22M
2645
7.22M
  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2646
7.22M
  if (N1.getOpcode() == ISD::ADDCARRY && 
isNullConstant(N1.getOperand(1))71
&&
2647
7.22M
      
N1.getResNo() == 025
)
2648
24
    return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2649
24
                       N0, N1.getOperand(0), N1.getOperand(2));
2650
7.22M
2651
7.22M
  // (add X, Carry) -> (addcarry X, 0, Carry)
2652
7.22M
  if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2653
2.43M
    if (SDValue Carry = getAsCarry(TLI, N1))
2654
13
      return DAG.getNode(ISD::ADDCARRY, DL,
2655
13
                         DAG.getVTList(VT, Carry.getValueType()), N0,
2656
13
                         DAG.getConstant(0, DL, VT), Carry);
2657
7.22M
2658
7.22M
  return SDValue();
2659
7.22M
}
2660
2661
625
SDValue DAGCombiner::visitADDC(SDNode *N) {
2662
625
  SDValue N0 = N->getOperand(0);
2663
625
  SDValue N1 = N->getOperand(1);
2664
625
  EVT VT = N0.getValueType();
2665
625
  SDLoc DL(N);
2666
625
2667
625
  // If the flag result is dead, turn this into an ADD.
2668
625
  if (!N->hasAnyUseOfValue(1))
2669
25
    return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2670
25
                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2671
600
2672
600
  // canonicalize constant to RHS.
2673
600
  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2674
600
  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2675
600
  if (N0C && 
!N1C13
)
2676
0
    return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2677
600
2678
600
  // fold (addc x, 0) -> x + no carry out
2679
600
  if (isNullConstant(N1))
2680
13
    return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2681
13
                                        DL, MVT::Glue));
2682
587
2683
587
  // If it cannot overflow, transform into an add.
2684
587
  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2685
17
    return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2686
17
                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2687
570
2688
570
  return SDValue();
2689
570
}
2690
2691
static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2692
15
                           SelectionDAG &DAG, const TargetLowering &TLI) {
2693
15
  EVT VT = V.getValueType();
2694
15
2695
15
  SDValue Cst;
2696
15
  switch (TLI.getBooleanContents(VT)) {
2697
15
  case TargetLowering::ZeroOrOneBooleanContent:
2698
13
  case TargetLowering::UndefinedBooleanContent:
2699
13
    Cst = DAG.getConstant(1, DL, VT);
2700
13
    break;
2701
13
  case TargetLowering::ZeroOrNegativeOneBooleanContent:
2702
2
    Cst = DAG.getAllOnesConstant(DL, VT);
2703
2
    break;
2704
15
  }
2705
15
2706
15
  return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2707
15
}
2708
2709
/**
2710
 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2711
 * then the flip also occurs if computing the inverse is the same cost.
2712
 * This function returns an empty SDValue in case it cannot flip the boolean
2713
 * without increasing the cost of the computation. If you want to flip a boolean
2714
 * no matter what, use flipBoolean.
2715
 */
2716
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2717
                                  const TargetLowering &TLI,
2718
130k
                                  bool Force) {
2719
130k
  if (Force && 
isa<ConstantSDNode>(V)38
)
2720
2
    return flipBoolean(V, SDLoc(V), DAG, TLI);
2721
130k
2722
130k
  if (V.getOpcode() != ISD::XOR)
2723
129k
    return SDValue();
2724
573
2725
573
  ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2726
573
  if (!Const)
2727
330
    return SDValue();
2728
243
2729
243
  EVT VT = V.getValueType();
2730
243
2731
243
  bool IsFlip = false;
2732
243
  switch(TLI.getBooleanContents(VT)) {
2733
243
    case TargetLowering::ZeroOrOneBooleanContent:
2734
25
      IsFlip = Const->isOne();
2735
25
      break;
2736
243
    case TargetLowering::ZeroOrNegativeOneBooleanContent:
2737
215
      IsFlip = Const->isAllOnesValue();
2738
215
      break;
2739
243
    case TargetLowering::UndefinedBooleanContent:
2740
3
      IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2741
3
      break;
2742
243
  }
2743
243
2744
243
  if (IsFlip)
2745
243
    return V.getOperand(0);
2746
0
  if (Force)
2747
0
    return flipBoolean(V, SDLoc(V), DAG, TLI);
2748
0
  return SDValue();
2749
0
}
2750
2751
14.2k
SDValue DAGCombiner::visitADDO(SDNode *N) {
2752
14.2k
  SDValue N0 = N->getOperand(0);
2753
14.2k
  SDValue N1 = N->getOperand(1);
2754
14.2k
  EVT VT = N0.getValueType();
2755
14.2k
  bool IsSigned = (ISD::SADDO == N->getOpcode());
2756
14.2k
2757
14.2k
  EVT CarryVT = N->getValueType(1);
2758
14.2k
  SDLoc DL(N);
2759
14.2k
2760
14.2k
  // If the flag result is dead, turn this into an ADD.
2761
14.2k
  if (!N->hasAnyUseOfValue(1))
2762
612
    return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2763
612
                     DAG.getUNDEF(CarryVT));
2764
13.6k
2765
13.6k
  // canonicalize constant to RHS.
2766
13.6k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2767
13.6k
      
!DAG.isConstantIntBuildVectorOrConstantInt(N1)95
)
2768
8
    return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2769
13.6k
2770
13.6k
  // fold (addo x, 0) -> x + no carry out
2771
13.6k
  if (isNullOrNullSplat(N1))
2772
639
    return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2773
12.9k
2774
12.9k
  if (!IsSigned) {
2775
11.7k
    // If it cannot overflow, transform into an add.
2776
11.7k
    if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2777
196
      return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2778
196
                       DAG.getConstant(0, DL, CarryVT));
2779
11.5k
2780
11.5k
    // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2781
11.5k
    if (isBitwiseNot(N0) && 
isOneOrOneSplat(N1)27
) {
2782
8
      SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2783
8
                                DAG.getConstant(0, DL, VT), N0.getOperand(0));
2784
8
      return CombineTo(N, Sub,
2785
8
                       flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2786
8
    }
2787
11.5k
2788
11.5k
    if (SDValue Combined = visitUADDOLike(N0, N1, N))
2789
273
      return Combined;
2790
11.2k
2791
11.2k
    if (SDValue Combined = visitUADDOLike(N1, N0, N))
2792
10
      return Combined;
2793
12.4k
  }
2794
12.4k
2795
12.4k
  return SDValue();
2796
12.4k
}
2797
2798
22.8k
SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2799
22.8k
  EVT VT = N0.getValueType();
2800
22.8k
  if (VT.isVector())
2801
370
    return SDValue();
2802
22.4k
2803
22.4k
  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2804
22.4k
  // If Y + 1 cannot overflow.
2805
22.4k
  if (N1.getOpcode() == ISD::ADDCARRY && 
isNullConstant(N1.getOperand(1))759
) {
2806
406
    SDValue Y = N1.getOperand(0);
2807
406
    SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2808
406
    if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2809
283
      return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2810
283
                         N1.getOperand(2));
2811
22.1k
  }
2812
22.1k
2813
22.1k
  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2814
22.1k
  if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2815
16.3k
    if (SDValue Carry = getAsCarry(TLI, N1))
2816
0
      return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2817
0
                         DAG.getConstant(0, SDLoc(N), VT), Carry);
2818
22.1k
2819
22.1k
  return SDValue();
2820
22.1k
}
2821
2822
2.78k
SDValue DAGCombiner::visitADDE(SDNode *N) {
2823
2.78k
  SDValue N0 = N->getOperand(0);
2824
2.78k
  SDValue N1 = N->getOperand(1);
2825
2.78k
  SDValue CarryIn = N->getOperand(2);
2826
2.78k
2827
2.78k
  // canonicalize constant to RHS
2828
2.78k
  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2829
2.78k
  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2830
2.78k
  if (N0C && 
!N1C54
)
2831
9
    return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2832
9
                       N1, N0, CarryIn);
2833
2.77k
2834
2.77k
  // fold (adde x, y, false) -> (addc x, y)
2835
2.77k
  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2836
30
    return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2837
2.74k
2838
2.74k
  return SDValue();
2839
2.74k
}
2840
2841
41.6k
SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2842
41.6k
  SDValue N0 = N->getOperand(0);
2843
41.6k
  SDValue N1 = N->getOperand(1);
2844
41.6k
  SDValue CarryIn = N->getOperand(2);
2845
41.6k
  SDLoc DL(N);
2846
41.6k
2847
41.6k
  // canonicalize constant to RHS
2848
41.6k
  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2849
41.6k
  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2850
41.6k
  if (N0C && 
!N1C1.28k
)
2851
478
    return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2852
41.1k
2853
41.1k
  // fold (addcarry x, y, false) -> (uaddo x, y)
2854
41.1k
  if (isNullConstant(CarryIn)) {
2855
915
    if (!LegalOperations ||
2856
915
        
TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0))92
)
2857
915
      return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2858
40.2k
  }
2859
40.2k
2860
40.2k
  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2861
40.2k
  if (isNullConstant(N0) && 
isNullConstant(N1)742
) {
2862
608
    EVT VT = N0.getValueType();
2863
608
    EVT CarryVT = CarryIn.getValueType();
2864
608
    SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2865
608
    AddToWorklist(CarryExt.getNode());
2866
608
    return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2867
608
                                    DAG.getConstant(1, DL, VT)),
2868
608
                     DAG.getConstant(0, DL, CarryVT));
2869
608
  }
2870
39.6k
2871
39.6k
  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2872
272
    return Combined;
2873
39.3k
2874
39.3k
  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2875
2
    return Combined;
2876
39.3k
2877
39.3k
  return SDValue();
2878
39.3k
}
2879
2880
/**
2881
 * If we are facing some sort of diamond carry propapagtion pattern try to
2882
 * break it up to generate something like:
2883
 *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2884
 *
2885
 * The end result is usually an increase in operation required, but because the
2886
 * carry is now linearized, other tranforms can kick in and optimize the DAG.
2887
 *
2888
 * Patterns typically look something like
2889
 *            (uaddo A, B)
2890
 *             /       \
2891
 *          Carry      Sum
2892
 *            |          \
2893
 *            | (addcarry *, 0, Z)
2894
 *            |       /
2895
 *             \   Carry
2896
 *              |   /
2897
 * (addcarry X, *, *)
2898
 *
2899
 * But numerous variation exist. Our goal is to identify A, B, X and Z and
2900
 * produce a combine with a single path for carry propagation.
2901
 */
2902
static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2903
                                      SDValue X, SDValue Carry0, SDValue Carry1,
2904
2.01k
                                      SDNode *N) {
2905
2.01k
  if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2906
0
    return SDValue();
2907
2.01k
  if (Carry1.getOpcode() != ISD::UADDO)
2908
585
    return SDValue();
2909
1.42k
2910
1.42k
  SDValue Z;
2911
1.42k
2912
1.42k
  /**
2913
1.42k
   * First look for a suitable Z. It will present itself in the form of
2914
1.42k
   * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2915
1.42k
   */
2916
1.42k
  if (Carry0.getOpcode() == ISD::ADDCARRY &&
2917
1.42k
      
isNullConstant(Carry0.getOperand(1))13
) {
2918
4
    Z = Carry0.getOperand(2);
2919
1.42k
  } else if (Carry0.getOpcode() == ISD::UADDO &&
2920
1.42k
             
isOneConstant(Carry0.getOperand(1))1.41k
) {
2921
3
    EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2922
3
    Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2923
1.41k
  } else {
2924
1.41k
    // We couldn't find a suitable Z.
2925
1.41k
    return SDValue();
2926
1.41k
  }
2927
7
2928
7
2929
7
  auto cancelDiamond = [&](SDValue A,SDValue B) {
2930
5
    SDLoc DL(N);
2931
5
    SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2932
5
    Combiner.AddToWorklist(NewY.getNode());
2933
5
    return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2934
5
                       DAG.getConstant(0, DL, X.getValueType()),
2935
5
                       NewY.getValue(1));
2936
5
  };
2937
7
2938
7
  /**
2939
7
   *      (uaddo A, B)
2940
7
   *           |
2941
7
   *          Sum
2942
7
   *           |
2943
7
   * (addcarry *, 0, Z)
2944
7
   */
2945
7
  if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2946
4
    return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2947
4
  }
2948
3
2949
3
  /**
2950
3
   * (addcarry A, 0, Z)
2951
3
   *         |
2952
3
   *        Sum
2953
3
   *         |
2954
3
   *  (uaddo *, B)
2955
3
   */
2956
3
  if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2957
0
    return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2958
0
  }
2959
3
2960
3
  if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2961
1
    return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2962
1
  }
2963
2
2964
2
  return SDValue();
2965
2
}
2966
2967
SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2968
79.0k
                                       SDNode *N) {
2969
79.0k
  // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
2970
79.0k
  if (isBitwiseNot(N0))
2971
38
    if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
2972
5
      SDLoc DL(N);
2973
5
      SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
2974
5
                                N0.getOperand(0), NotC);
2975
5
      return CombineTo(N, Sub,
2976
5
                       flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2977
5
    }
2978
79.0k
2979
79.0k
  // Iff the flag result is dead:
2980
79.0k
  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2981
79.0k
  // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
2982
79.0k
  // or the dependency between the instructions.
2983
79.0k
  if ((N0.getOpcode() == ISD::ADD ||
2984
79.0k
       
(77.4k
N0.getOpcode() == ISD::UADDO77.4k
&&
N0.getResNo() == 08.12k
&&
2985
77.4k
        
N0.getValue(1) != CarryIn8.03k
)) &&
2986
79.0k
      
isNullConstant(N1)9.63k
&&
!N->hasAnyUseOfValue(1)6.58k
)
2987
264
    return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2988
264
                       N0.getOperand(0), N0.getOperand(1), CarryIn);
2989
78.7k
2990
78.7k
  /**
2991
78.7k
   * When one of the addcarry argument is itself a carry, we may be facing
2992
78.7k
   * a diamond carry propagation. In which case we try to transform the DAG
2993
78.7k
   * to ensure linear carry propagation if that is possible.
2994
78.7k
   */
2995
78.7k
  if (auto Y = getAsCarry(TLI, N1)) {
2996
1.00k
    // Because both are carries, Y and Z can be swapped.
2997
1.00k
    if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
2998
1
      return R;
2999
1.00k
    if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3000
4
      return R;
3001
78.7k
  }
3002
78.7k
3003
78.7k
  return SDValue();
3004
78.7k
}
3005
3006
// Since it may not be valid to emit a fold to zero for vector initializers
3007
// check if we can before folding.
3008
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3009
132
                             SelectionDAG &DAG, bool LegalOperations) {
3010
132
  if (!VT.isVector())
3011
41
    return DAG.getConstant(0, DL, VT);
3012
91
  if (!LegalOperations || 
TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)20
)
3013
71
    return DAG.getConstant(0, DL, VT);
3014
20
  return SDValue();
3015
20
}
3016
3017
167k
SDValue DAGCombiner::visitSUB(SDNode *N) {
3018
167k
  SDValue N0 = N->getOperand(0);
3019
167k
  SDValue N1 = N->getOperand(1);
3020
167k
  EVT VT = N0.getValueType();
3021
167k
  SDLoc DL(N);
3022
167k
3023
167k
  // fold vector ops
3024
167k
  if (VT.isVector()) {
3025
23.9k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
3026
8
      return FoldedVOp;
3027
23.8k
3028
23.8k
    // fold (sub x, 0) -> x, vector edition
3029
23.8k
    if (ISD::isBuildVectorAllZeros(N1.getNode()))
3030
8
      return N0;
3031
167k
  }
3032
167k
3033
167k
  // fold (sub x, x) -> 0
3034
167k
  // FIXME: Refactor this and xor and other similar operations together.
3035
167k
  if (N0 == N1)
3036
42
    return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3037
167k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3038
167k
      
DAG.isConstantIntBuildVectorOrConstantInt(N1)59.7k
) {
3039
1
    // fold (sub c1, c2) -> c1-c2
3040
1
    return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
3041
1
                                      N1.getNode());
3042
1
  }
3043
167k
3044
167k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
3045
11
    return NewSel;
3046
167k
3047
167k
  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3048
167k
3049
167k
  // fold (sub x, c) -> (add x, -c)
3050
167k
  if (N1C) {
3051
5.00k
    return DAG.getNode(ISD::ADD, DL, VT, N0,
3052
5.00k
                       DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3053
5.00k
  }
3054
162k
3055
162k
  if (isNullOrNullSplat(N0)) {
3056
19.3k
    unsigned BitWidth = VT.getScalarSizeInBits();
3057
19.3k
    // Right-shifting everything out but the sign bit followed by negation is
3058
19.3k
    // the same as flipping arithmetic/logical shift type without the negation:
3059
19.3k
    // -(X >>u 31) -> (X >>s 31)
3060
19.3k
    // -(X >>s 31) -> (X >>u 31)
3061
19.3k
    if (N1->getOpcode() == ISD::SRA || 
N1->getOpcode() == ISD::SRL19.1k
) {
3062
262
      ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3063
262
      if (ShiftAmt && 
ShiftAmt->getAPIntValue() == (BitWidth - 1)238
) {
3064
11
        auto NewSh = N1->getOpcode() == ISD::SRA ? 
ISD::SRL7
:
ISD::SRA4
;
3065
11
        if (!LegalOperations || 
TLI.isOperationLegal(NewSh, VT)0
)
3066
11
          return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3067
19.3k
      }
3068
262
    }
3069
19.3k
3070
19.3k
    // 0 - X --> 0 if the sub is NUW.
3071
19.3k
    if (N->getFlags().hasNoUnsignedWrap())
3072
2
      return N0;
3073
19.3k
3074
19.3k
    if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3075
404
      // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3076
404
      // N1 must be 0 because negating the minimum signed value is undefined.
3077
404
      if (N->getFlags().hasNoSignedWrap())
3078
2
        return N0;
3079
402
3080
402
      // 0 - X --> X if X is 0 or the minimum signed value.
3081
402
      return N1;
3082
402
    }
3083
19.3k
  }
3084
162k
3085
162k
  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3086
162k
  if (isAllOnesOrAllOnesSplat(N0))
3087
145
    return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3088
162k
3089
162k
  // fold (A - (0-B)) -> A+B
3090
162k
  if (N1.getOpcode() == ISD::SUB && 
isNullOrNullSplat(N1.getOperand(0))3.54k
)
3091
43
    return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3092
162k
3093
162k
  // fold A-(A-B) -> B
3094
162k
  if (N1.getOpcode() == ISD::SUB && 
N0 == N1.getOperand(0)3.50k
)
3095
1.89k
    return N1.getOperand(1);
3096
160k
3097
160k
  // fold (A+B)-A -> B
3098
160k
  if (N0.getOpcode() == ISD::ADD && 
N0.getOperand(0) == N16.04k
)
3099
13
    return N0.getOperand(1);
3100
160k
3101
160k
  // fold (A+B)-B -> A
3102
160k
  if (N0.getOpcode() == ISD::ADD && 
N0.getOperand(1) == N16.03k
)
3103
3
    return N0.getOperand(0);
3104
160k
3105
160k
  // fold (A+C1)-C2 -> A+(C1-C2)
3106
160k
  if (N0.getOpcode() == ISD::ADD &&
3107
160k
      
isConstantOrConstantVector(N1, /* NoOpaques */ true)6.02k
&&
3108
160k
      
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)231
) {
3109
25
    SDValue NewC = DAG.FoldConstantArithmetic(
3110
25
        ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
3111
25
    assert(NewC && "Constant folding failed");
3112
25
    return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3113
25
  }
3114
160k
3115
160k
  // fold C2-(A+C1) -> (C2-C1)-A
3116
160k
  if (N1.getOpcode() == ISD::ADD) {
3117
2.58k
    SDValue N11 = N1.getOperand(1);
3118
2.58k
    if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3119
2.58k
        
isConstantOrConstantVector(N11, /* NoOpaques */ true)520
) {
3120
236
      SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
3121
236
                                                N11.getNode());
3122
236
      assert(NewC && "Constant folding failed");
3123
236
      return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3124
236
    }
3125
160k
  }
3126
160k
3127
160k
  // fold (A-C1)-C2 -> A-(C1+C2)
3128
160k
  if (N0.getOpcode() == ISD::SUB &&
3129
160k
      
isConstantOrConstantVector(N1, /* NoOpaques */ true)3.21k
&&
3130
160k
      
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)34
) {
3131
9
    SDValue NewC = DAG.FoldConstantArithmetic(
3132
9
        ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
3133
9
    assert(NewC && "Constant folding failed");
3134
9
    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3135
9
  }
3136
160k
3137
160k
  // fold (c1-A)-c2 -> (c1-c2)-A
3138
160k
  if (N0.getOpcode() == ISD::SUB &&
3139
160k
      
isConstantOrConstantVector(N1, /* NoOpaques */ true)3.20k
&&
3140
160k
      
isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)25
) {
3141
9
    SDValue NewC = DAG.FoldConstantArithmetic(
3142
9
        ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
3143
9
    assert(NewC && "Constant folding failed");
3144
9
    return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3145
9
  }
3146
160k
3147
160k
  // fold ((A+(B+or-C))-B) -> A+or-C
3148
160k
  if (N0.getOpcode() == ISD::ADD &&
3149
160k
      
(6.00k
N0.getOperand(1).getOpcode() == ISD::SUB6.00k
||
3150
6.00k
       
N0.getOperand(1).getOpcode() == ISD::ADD5.76k
) &&
3151
160k
      
N0.getOperand(1).getOperand(0) == N1387
)
3152
7
    return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3153
7
                       N0.getOperand(1).getOperand(1));
3154
159k
3155
159k
  // fold ((A+(C+B))-B) -> A+C
3156
159k
  if (N0.getOpcode() == ISD::ADD && 
N0.getOperand(1).getOpcode() == ISD::ADD5.99k
&&
3157
159k
      
N0.getOperand(1).getOperand(1) == N1148
)
3158
1
    return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3159
1
                       N0.getOperand(1).getOperand(0));
3160
159k
3161
159k
  // fold ((A-(B-C))-C) -> A-B
3162
159k
  if (N0.getOpcode() == ISD::SUB && 
N0.getOperand(1).getOpcode() == ISD::SUB3.19k
&&
3163
159k
      
N0.getOperand(1).getOperand(1) == N179
)
3164
3
    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3165
3
                       N0.getOperand(1).getOperand(0));
3166
159k
3167
159k
  // fold (A-(B-C)) -> A+(C-B)
3168
159k
  if (N1.getOpcode() == ISD::SUB && 
N1.hasOneUse()1.60k
)
3169
636
    return DAG.getNode(ISD::ADD, DL, VT, N0,
3170
636
                       DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3171
636
                                   N1.getOperand(0)));
3172
159k
3173
159k
  // fold (X - (-Y * Z)) -> (X + (Y * Z))
3174
159k
  if (N1.getOpcode() == ISD::MUL && 
N1.hasOneUse()8.50k
) {
3175
7.49k
    if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3176
7.49k
        
isNullOrNullSplat(N1.getOperand(0).getOperand(0))52
) {
3177
16
      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3178
16
                                N1.getOperand(0).getOperand(1),
3179
16
                                N1.getOperand(1));
3180
16
      return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3181
16
    }
3182
7.48k
    if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3183
7.48k
        
isNullOrNullSplat(N1.getOperand(1).getOperand(0))0
) {
3184
0
      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3185
0
                                N1.getOperand(0),
3186
0
                                N1.getOperand(1).getOperand(1));
3187
0
      return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3188
0
    }
3189
159k
  }
3190
159k
3191
159k
  // If either operand of a sub is undef, the result is undef
3192
159k
  if (N0.isUndef())
3193
1
    return N0;
3194
159k
  if (N1.isUndef())
3195
7
    return N1;
3196
159k
3197
159k
  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3198
18
    return V;
3199
159k
3200
159k
  if (SDValue V = foldAddSubOfSignBit(N, DAG))
3201
10
    return V;
3202
159k
3203
159k
  if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3204
5
    return V;
3205
159k
3206
159k
  // (x - y) - 1  ->  add (xor y, -1), x
3207
159k
  if (N0.hasOneUse() && 
N0.getOpcode() == ISD::SUB115k
&&
isOneOrOneSplat(N1)2.09k
) {
3208
4
    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3209
4
                              DAG.getAllOnesConstant(DL, VT));
3210
4
    return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3211
4
  }
3212
159k
3213
159k
  // Look for:
3214
159k
  //   sub y, (xor x, -1)
3215
159k
  // And if the target does not like this form then turn into:
3216
159k
  //   add (add x, y), 1
3217
159k
  if (TLI.preferIncOfAddToSubOfNot(VT) && 
N1.hasOneUse()150k
&&
isBitwiseNot(N1)95.9k
) {
3218
244
    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3219
244
    return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3220
244
  }
3221
159k
3222
159k
  // Hoist one-use addition by non-opaque constant:
3223
159k
  //   (x + C) - y  ->  (x - y) + C
3224
159k
  if (N0.hasOneUse() && 
N0.getOpcode() == ISD::ADD115k
&&
3225
159k
      
isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)3.03k
) {
3226
1.27k
    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3227
1.27k
    return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3228
1.27k
  }
3229
157k
  // y - (x + C)  ->  (y - x) - C
3230
157k
  if (N1.hasOneUse() && 
N1.getOpcode() == ISD::ADD100k
&&
3231
157k
      
isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)684
) {
3232
53
    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3233
53
    return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3234
53
  }
3235
157k
  // (x - C) - y  ->  (x - y) - C
3236
157k
  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3237
157k
  if (N0.hasOneUse() && 
N0.getOpcode() == ISD::SUB113k
&&
3238
157k
      
isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)2.07k
) {
3239
22
    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3240
22
    return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3241
22
  }
3242
157k
  // (C - x) - y  ->  C - (x + y)
3243
157k
  if (N0.hasOneUse() && 
N0.getOpcode() == ISD::SUB113k
&&
3244
157k
      
isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)2.05k
) {
3245
55
    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3246
55
    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3247
55
  }
3248
157k
3249
157k
  // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3250
157k
  // rather than 'sub 0/1' (the sext should get folded).
3251
157k
  // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3252
157k
  if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3253
157k
      
N1.getOperand(0).getScalarValueSizeInBits() == 13.18k
&&
3254
157k
      TLI.getBooleanContents(VT) ==
3255
366
          TargetLowering::ZeroOrNegativeOneBooleanContent) {
3256
6
    SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3257
6
    return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3258
6
  }
3259
157k
3260
157k
  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3261
157k
  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3262
62.5k
    if (N0.getOpcode() == ISD::XOR && 
N1.getOpcode() == ISD::SRA959
) {
3263
120
      SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3264
120
      SDValue S0 = N1.getOperand(0);
3265
120
      if ((X0 == S0 && 
X1 == N12
) ||
(118
X0 == N1118
&&
X1 == S089
)) {
3266
91
        unsigned OpSizeInBits = VT.getScalarSizeInBits();
3267
91
        if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3268
91
          if (C->getAPIntValue() == (OpSizeInBits - 1))
3269
91
            return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3270
157k
      }
3271
120
    }
3272
62.5k
  }
3273
157k
3274
157k
  // If the relocation model supports it, consider symbol offsets.
3275
157k
  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3276
29
    if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3277
0
      // fold (sub Sym, c) -> Sym-c
3278
0
      if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3279
0
        return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3280
0
                                    GA->getOffset() -
3281
0
                                        (uint64_t)N1C->getSExtValue());
3282
0
      // fold (sub Sym+c1, Sym+c2) -> c1-c2
3283
0
      if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3284
0
        if (GA->getGlobal() == GB->getGlobal())
3285
0
          return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3286
0
                                 DL, VT);
3287
157k
    }
3288
157k
3289
157k
  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3290
157k
  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3291
738
    VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3292
738
    if (TN->getVT() == MVT::i1) {
3293
54
      SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3294
54
                                 DAG.getConstant(1, DL, VT));
3295
54
      return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3296
54
    }
3297
157k
  }
3298
157k
3299
157k
  // Prefer an add for more folding potential and possibly better codegen:
3300
157k
  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3301
157k
  if (!LegalOperations && 
N1.getOpcode() == ISD::SRL80.9k
&&
N1.hasOneUse()480
) {
3302
215
    SDValue ShAmt = N1.getOperand(1);
3303
215
    ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3304
215
    if (ShAmtC &&
3305
215
        
ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)214
) {
3306
18
      SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3307
18
      return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3308
18
    }
3309
157k
  }
3310
157k
3311
157k
  return SDValue();
3312
157k
}
3313
3314
3.57k
SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3315
3.57k
  SDValue N0 = N->getOperand(0);
3316
3.57k
  SDValue N1 = N->getOperand(1);
3317
3.57k
  EVT VT = N0.getValueType();
3318
3.57k
  SDLoc DL(N);
3319
3.57k
3320
3.57k
  // fold vector ops
3321
3.57k
  if (VT.isVector()) {
3322
3.35k
    // TODO SimplifyVBinOp
3323
3.35k
3324
3.35k
    // fold (sub_sat x, 0) -> x, vector edition
3325
3.35k
    if (ISD::isBuildVectorAllZeros(N1.getNode()))
3326
14
      return N0;
3327
3.56k
  }
3328
3.56k
3329
3.56k
  // fold (sub_sat x, undef) -> 0
3330
3.56k
  if (N0.isUndef() || N1.isUndef())
3331
0
    return DAG.getConstant(0, DL, VT);
3332
3.56k
3333
3.56k
  // fold (sub_sat x, x) -> 0
3334
3.56k
  if (N0 == N1)
3335
28
    return DAG.getConstant(0, DL, VT);
3336
3.53k
3337
3.53k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3338
3.53k
      
DAG.isConstantIntBuildVectorOrConstantInt(N1)3
) {
3339
0
    // fold (sub_sat c1, c2) -> c3
3340
0
    return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
3341
0
                                      N1.getNode());
3342
0
  }
3343
3.53k
3344
3.53k
  // fold (sub_sat x, 0) -> x
3345
3.53k
  if (isNullConstant(N1))
3346
14
    return N0;
3347
3.52k
3348
3.52k
  return SDValue();
3349
3.52k
}
3350
3351
128
SDValue DAGCombiner::visitSUBC(SDNode *N) {
3352
128
  SDValue N0 = N->getOperand(0);
3353
128
  SDValue N1 = N->getOperand(1);
3354
128
  EVT VT = N0.getValueType();
3355
128
  SDLoc DL(N);
3356
128
3357
128
  // If the flag result is dead, turn this into an SUB.
3358
128
  if (!N->hasAnyUseOfValue(1))
3359
24
    return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3360
24
                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3361
104
3362
104
  // fold (subc x, x) -> 0 + no borrow
3363
104
  if (N0 == N1)
3364
0
    return CombineTo(N, DAG.getConstant(0, DL, VT),
3365
0
                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3366
104
3367
104
  // fold (subc x, 0) -> x + no borrow
3368
104
  if (isNullConstant(N1))
3369
0
    return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3370
104
3371
104
  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3372
104
  if (isAllOnesConstant(N0))
3373
0
    return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3374
0
                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3375
104
3376
104
  return SDValue();
3377
104
}
3378
3379
6.09k
SDValue DAGCombiner::visitSUBO(SDNode *N) {
3380
6.09k
  SDValue N0 = N->getOperand(0);
3381
6.09k
  SDValue N1 = N->getOperand(1);
3382
6.09k
  EVT VT = N0.getValueType();
3383
6.09k
  bool IsSigned = (ISD::SSUBO == N->getOpcode());
3384
6.09k
3385
6.09k
  EVT CarryVT = N->getValueType(1);
3386
6.09k
  SDLoc DL(N);
3387
6.09k
3388
6.09k
  // If the flag result is dead, turn this into an SUB.
3389
6.09k
  if (!N->hasAnyUseOfValue(1))
3390
59
    return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3391
59
                     DAG.getUNDEF(CarryVT));
3392
6.03k
3393
6.03k
  // fold (subo x, x) -> 0 + no borrow
3394
6.03k
  if (N0 == N1)
3395
55
    return CombineTo(N, DAG.getConstant(0, DL, VT),
3396
55
                     DAG.getConstant(0, DL, CarryVT));
3397
5.98k
3398
5.98k
  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3399
5.98k
3400
5.98k
  // fold (subox, c) -> (addo x, -c)
3401
5.98k
  if (IsSigned && 
N1C667
&&
!N1C->getAPIntValue().isMinSignedValue()153
) {
3402
145
    return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3403
145
                       DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3404
145
  }
3405
5.83k
3406
5.83k
  // fold (subo x, 0) -> x + no borrow
3407
5.83k
  if (isNullOrNullSplat(N1))
3408
194
    return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3409
5.64k
3410
5.64k
  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3411
5.64k
  if (!IsSigned && 
isAllOnesOrAllOnesSplat(N0)5.12k
)
3412
15
    return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3413
15
                     DAG.getConstant(0, DL, CarryVT));
3414
5.62k
3415
5.62k
  return SDValue();
3416
5.62k
}
3417
3418
102
SDValue DAGCombiner::visitSUBE(SDNode *N) {
3419
102
  SDValue N0 = N->getOperand(0);
3420
102
  SDValue N1 = N->getOperand(1);
3421
102
  SDValue CarryIn = N->getOperand(2);
3422
102
3423
102
  // fold (sube x, y, false) -> (subc x, y)
3424
102
  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3425
0
    return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3426
102
3427
102
  return SDValue();
3428
102
}
3429
3430
1.59k
SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3431
1.59k
  SDValue N0 = N->getOperand(0);
3432
1.59k
  SDValue N1 = N->getOperand(1);
3433
1.59k
  SDValue CarryIn = N->getOperand(2);
3434
1.59k
3435
1.59k
  // fold (subcarry x, y, false) -> (usubo x, y)
3436
1.59k
  if (isNullConstant(CarryIn)) {
3437
145
    if (!LegalOperations ||
3438
145
        
TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0))125
)
3439
145
      return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3440
1.44k
  }
3441
1.44k
3442
1.44k
  return SDValue();
3443
1.44k
}
3444
3445
138k
SDValue DAGCombiner::visitMUL(SDNode *N) {
3446
138k
  SDValue N0 = N->getOperand(0);
3447
138k
  SDValue N1 = N->getOperand(1);
3448
138k
  EVT VT = N0.getValueType();
3449
138k
3450
138k
  // fold (mul x, undef) -> 0
3451
138k
  if (N0.isUndef() || N1.isUndef())
3452
252
    return DAG.getConstant(0, SDLoc(N), VT);
3453
138k
3454
138k
  bool N0IsConst = false;
3455
138k
  bool N1IsConst = false;
3456
138k
  bool N1IsOpaqueConst = false;
3457
138k
  bool N0IsOpaqueConst = false;
3458
138k
  APInt ConstValue0, ConstValue1;
3459
138k
  // fold vector ops
3460
138k
  if (VT.isVector()) {
3461
32.4k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
3462
9
      return FoldedVOp;
3463
32.4k
3464
32.4k
    N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
3465
32.4k
    N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3466
32.4k
    assert((!N0IsConst ||
3467
32.4k
            ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
3468
32.4k
           "Splat APInt should be element width");
3469
32.4k
    assert((!N1IsConst ||
3470
32.4k
            ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3471
32.4k
           "Splat APInt should be element width");
3472
105k
  } else {
3473
105k
    N0IsConst = isa<ConstantSDNode>(N0);
3474
105k
    if (N0IsConst) {
3475
14
      ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
3476
14
      N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
3477
14
    }
3478
105k
    N1IsConst = isa<ConstantSDNode>(N1);
3479
105k
    if (N1IsConst) {
3480
60.2k
      ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3481
60.2k
      N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3482
60.2k
    }
3483
105k
  }
3484
138k
3485
138k
  // fold (mul c1, c2) -> c1*c2
3486
138k
  
if (138k
N0IsConst138k
&&
N1IsConst27
&&
!N0IsOpaqueConst14
&&
!N1IsOpaqueConst14
)
3487
14
    return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
3488
14
                                      N0.getNode(), N1.getNode());
3489
138k
3490
138k
  // canonicalize constant to RHS (vector doesn't have to splat)
3491
138k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3492
138k
     
!DAG.isConstantIntBuildVectorOrConstantInt(N1)32
)
3493
32
    return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3494
138k
  // fold (mul x, 0) -> 0
3495
138k
  if (N1IsConst && 
ConstValue1.isNullValue()62.8k
)
3496
29
    return N1;
3497
138k
  // fold (mul x, 1) -> x
3498
138k
  if (N1IsConst && 
ConstValue1.isOneValue()62.8k
)
3499
861
    return N0;
3500
137k
3501
137k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
3502
3
    return NewSel;
3503
137k
3504
137k
  // fold (mul x, -1) -> 0-x
3505
137k
  if (N1IsConst && 
ConstValue1.isAllOnesValue()61.9k
) {
3506
653
    SDLoc DL(N);
3507
653
    return DAG.getNode(ISD::SUB, DL, VT,
3508
653
                       DAG.getConstant(0, DL, VT), N0);
3509
653
  }
3510
136k
  // fold (mul x, (1 << c)) -> x << c
3511
136k
  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3512
136k
      
DAG.isKnownToBeAPowerOfTwo(N1)62.7k
&&
3513
136k
      
(5.73k
!VT.isVector()5.73k
||
Level <= AfterLegalizeVectorOps754
)) {
3514
5.73k
    SDLoc DL(N);
3515
5.73k
    SDValue LogBase2 = BuildLogBase2(N1, DL);
3516
5.73k
    EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3517
5.73k
    SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3518
5.73k
    return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3519
5.73k
  }
3520
130k
  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3521
130k
  if (N1IsConst && 
!N1IsOpaqueConst56.2k
&&
(-ConstValue1).isPowerOf2()54.9k
) {
3522
1.18k
    unsigned Log2Val = (-ConstValue1).logBase2();
3523
1.18k
    SDLoc DL(N);
3524
1.18k
    // FIXME: If the input is something that is easily negated (e.g. a
3525
1.18k
    // single-use add), we should put the negate there.
3526
1.18k
    return DAG.getNode(ISD::SUB, DL, VT,
3527
1.18k
                       DAG.getConstant(0, DL, VT),
3528
1.18k
                       DAG.getNode(ISD::SHL, DL, VT, N0,
3529
1.18k
                            DAG.getConstant(Log2Val, DL,
3530
1.18k
                                      getShiftAmountTy(N0.getValueType()))));
3531
1.18k
  }
3532
129k
3533
129k
  // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3534
129k
  // mul x, (2^N + 1) --> add (shl x, N), x
3535
129k
  // mul x, (2^N - 1) --> sub (shl x, N), x
3536
129k
  // Examples: x * 33 --> (x << 5) + x
3537
129k
  //           x * 15 --> (x << 4) - x
3538
129k
  //           x * -33 --> -((x << 5) + x)
3539
129k
  //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3540
129k
  if (N1IsConst && 
TLI.decomposeMulByConstant(VT, N1)55.0k
) {
3541
67
    // TODO: We could handle more general decomposition of any constant by
3542
67
    //       having the target set a limit on number of ops and making a
3543
67
    //       callback to determine that sequence (similar to sqrt expansion).
3544
67
    unsigned MathOp = ISD::DELETED_NODE;
3545
67
    APInt MulC = ConstValue1.abs();
3546
67
    if ((MulC - 1).isPowerOf2())
3547
19
      MathOp = ISD::ADD;
3548
48
    else if ((MulC + 1).isPowerOf2())
3549
48
      MathOp = ISD::SUB;
3550
67
3551
67
    if (MathOp != ISD::DELETED_NODE) {
3552
67
      unsigned ShAmt =
3553
67
          MathOp == ISD::ADD ? 
(MulC - 1).logBase2()19
:
(MulC + 1).logBase2()48
;
3554
67
      assert(ShAmt < VT.getScalarSizeInBits() &&
3555
67
             "multiply-by-constant generated out of bounds shift");
3556
67
      SDLoc DL(N);
3557
67
      SDValue Shl =
3558
67
          DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3559
67
      SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3560
67
      if (ConstValue1.isNegative())
3561
16
        R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3562
67
      return R;
3563
67
    }
3564
129k
  }
3565
129k
3566
129k
  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3567
129k
  if (N0.getOpcode() == ISD::SHL &&
3568
129k
      
isConstantOrConstantVector(N1, /* NoOpaques */ true)156
&&
3569
129k
      
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)18
) {
3570
7
    SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3571
7
    if (isConstantOrConstantVector(C3))
3572
7
      return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3573
129k
  }
3574
129k
3575
129k
  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3576
129k
  // use.
3577
129k
  {
3578
129k
    SDValue Sh(nullptr, 0), Y(nullptr, 0);
3579
129k
3580
129k
    // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3581
129k
    if (N0.getOpcode() == ISD::SHL &&
3582
129k
        
isConstantOrConstantVector(N0.getOperand(1))149
&&
3583
129k
        
N0.getNode()->hasOneUse()52
) {
3584
39
      Sh = N0; Y = N1;
3585
129k
    } else if (N1.getOpcode() == ISD::SHL &&
3586
129k
               
isConstantOrConstantVector(N1.getOperand(1))39
&&
3587
129k
               
N1.getNode()->hasOneUse()12
) {
3588
5
      Sh = N1; Y = N0;
3589
5
    }
3590
129k
3591
129k
    if (Sh.getNode()) {
3592
44
      SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3593
44
      return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3594
44
    }
3595
129k
  }
3596
129k
3597
129k
  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3598
129k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3599
129k
      
N0.getOpcode() == ISD::ADD57.1k
&&
3600
129k
      
DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))2.20k
&&
3601
129k
      
isMulAddWithConstProfitable(N, N0, N1)459
)
3602
379
      return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3603
379
                         DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3604
379
                                     N0.getOperand(0), N1),
3605
379
                         DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3606
379
                                     N0.getOperand(1), N1));
3607
129k
3608
129k
  // reassociate mul
3609
129k
  if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3610
104
    return RMUL;
3611
129k
3612
129k
  return SDValue();
3613
129k
}
3614
3615
/// Return true if divmod libcall is available.
3616
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3617
6.63k
                                     const TargetLowering &TLI) {
3618
6.63k
  RTLIB::Libcall LC;
3619
6.63k
  EVT NodeType = Node->getValueType(0);
3620
6.63k
  if (!NodeType.isSimple())
3621
0
    return false;
3622
6.63k
  switch (NodeType.getSimpleVT().SimpleTy) {
3623
6.63k
  
default: return false0
; // No libcall for vector types.
3624
6.63k
  
case MVT::i8: LC= isSigned 0
?
RTLIB::SDIVREM_I80
:
RTLIB::UDIVREM_I80
; break;
3625
6.63k
  
case MVT::i16: LC= isSigned 4
?
RTLIB::SDIVREM_I162
:
RTLIB::UDIVREM_I162
; break;
3626
6.63k
  
case MVT::i32: LC= isSigned 5.66k
?
RTLIB::SDIVREM_I324.15k
:
RTLIB::UDIVREM_I321.50k
; break;
3627
6.63k
  
case MVT::i64: LC= isSigned 964
?
RTLIB::SDIVREM_I64292
:
RTLIB::UDIVREM_I64672
; break;
3628
6.63k
  
case MVT::i128: LC= isSigned 3
?
RTLIB::SDIVREM_I1283
:
RTLIB::UDIVREM_I1280
; break;
3629
6.63k
  }
3630
6.63k
3631
6.63k
  return TLI.getLibcallName(LC) != nullptr;
3632
6.63k
}
3633
3634
/// Issue divrem if both quotient and remainder are needed.
3635
12.5k
SDValue DAGCombiner::useDivRem(SDNode *Node) {
3636
12.5k
  if (Node->use_empty())
3637
0
    return SDValue(); // This is a dead node, leave it alone.
3638
12.5k
3639
12.5k
  unsigned Opcode = Node->getOpcode();
3640
12.5k
  bool isSigned = (Opcode == ISD::SDIV) || 
(Opcode == ISD::SREM)8.85k
;
3641
12.5k
  unsigned DivRemOpc = isSigned ? 
ISD::SDIVREM6.56k
:
ISD::UDIVREM6.00k
;
3642
12.5k
3643
12.5k
  // DivMod lib calls can still work on non-legal types if using lib-calls.
3644
12.5k
  EVT VT = Node->getValueType(0);
3645
12.5k
  if (VT.isVector() || 
!VT.isInteger()12.0k
)
3646
513
    return SDValue();
3647
12.0k
3648
12.0k
  if (!TLI.isTypeLegal(VT) && 
!TLI.isOperationCustom(DivRemOpc, VT)987
)
3649
947
    return SDValue();
3650
11.1k
3651
11.1k
  // If DIVREM is going to get expanded into a libcall,
3652
11.1k
  // but there is no libcall available, then don't combine.
3653
11.1k
  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3654
11.1k
      
!isDivRemLibcallAvailable(Node, isSigned, TLI)6.63k
)
3655
5.95k
    return SDValue();
3656
5.15k
3657
5.15k
  // If div is legal, it's better to do the normal expansion
3658
5.15k
  unsigned OtherOpcode = 0;
3659
5.15k
  if ((Opcode == ISD::SDIV) || 
(Opcode == ISD::UDIV)4.34k
) {
3660
2.83k
    OtherOpcode = isSigned ? 
ISD::SREM807
:
ISD::UREM2.03k
;
3661
2.83k
    if (TLI.isOperationLegalOrCustom(Opcode, VT))
3662
1.05k
      return SDValue();
3663
2.31k
  } else {
3664
2.31k
    OtherOpcode = isSigned ? 
ISD::SDIV743
:
ISD::UDIV1.57k
;
3665
2.31k
    if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3666
403
      return SDValue();
3667
3.70k
  }
3668
3.70k
3669
3.70k
  SDValue Op0 = Node->getOperand(0);
3670
3.70k
  SDValue Op1 = Node->getOperand(1);
3671
3.70k
  SDValue combined;
3672
3.70k
  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3673
8.09k
         UE = Op0.getNode()->use_end(); UI != UE; 
++UI4.39k
) {
3674
4.39k
    SDNode *User = *UI;
3675
4.39k
    if (User == Node || 
User->getOpcode() == ISD::DELETED_NODE698
||
3676
4.39k
        
User->use_empty()698
)
3677
3.70k
      continue;
3678
698
    // Convert the other matching node(s), too;
3679
698
    // otherwise, the DIVREM may get target-legalized into something
3680
698
    // target-specific that we won't be able to recognize.
3681
698
    unsigned UserOpc = User->getOpcode();
3682
698
    if ((UserOpc == Opcode || 
UserOpc == OtherOpcode609
||
UserOpc == DivRemOpc361
) &&
3683
698
        
User->getOperand(0) == Op0359
&&
3684
698
        
User->getOperand(1) == Op1189
) {
3685
181
      if (!combined) {
3686
181
        if (UserOpc == OtherOpcode) {
3687
181
          SDVTList VTs = DAG.getVTList(VT, VT);
3688
181
          combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3689
181
        } else 
if (0
UserOpc == DivRemOpc0
) {
3690
0
          combined = SDValue(User, 0);
3691
0
        } else {
3692
0
          assert(UserOpc == Opcode);
3693
0
          continue;
3694
0
        }
3695
181
      }
3696
181
      if (UserOpc == ISD::SDIV || 
UserOpc == ISD::UDIV89
)
3697
145
        CombineTo(User, combined);
3698
36
      else if (UserOpc == ISD::SREM || 
UserOpc == ISD::UREM22
)
3699
36
        CombineTo(User, combined.getValue(1));
3700
181
    }
3701
698
  }
3702
3.70k
  return combined;
3703
3.70k
}
3704
3705
18.1k
static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3706
18.1k
  SDValue N0 = N->getOperand(0);
3707
18.1k
  SDValue N1 = N->getOperand(1);
3708
18.1k
  EVT VT = N->getValueType(0);
3709
18.1k
  SDLoc DL(N);
3710
18.1k
3711
18.1k
  unsigned Opc = N->getOpcode();
3712
18.1k
  bool IsDiv = (ISD::SDIV == Opc) || 
(ISD::UDIV == Opc)12.1k
;
3713
18.1k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
3714
18.1k
3715
18.1k
  // X / undef -> undef
3716
18.1k
  // X % undef -> undef
3717
18.1k
  // X / 0 -> undef
3718
18.1k
  // X % 0 -> undef
3719
18.1k
  // NOTE: This includes vectors where any divisor element is zero/undef.
3720
18.1k
  if (DAG.isUndef(Opc, {N0, N1}))
3721
1
    return DAG.getUNDEF(VT);
3722
18.1k
3723
18.1k
  // undef / X -> 0
3724
18.1k
  // undef % X -> 0
3725
18.1k
  if (N0.isUndef())
3726
0
    return DAG.getConstant(0, DL, VT);
3727
18.1k
3728
18.1k
  // 0 / X -> 0
3729
18.1k
  // 0 % X -> 0
3730
18.1k
  ConstantSDNode *N0C = isConstOrConstSplat(N0);
3731
18.1k
  if (N0C && 
N0C->isNullValue()1.21k
)
3732
46
    return N0;
3733
18.1k
3734
18.1k
  // X / X -> 1
3735
18.1k
  // X % X -> 0
3736
18.1k
  if (N0 == N1)
3737
47
    return DAG.getConstant(IsDiv ? 
126
:
021
, DL, VT);
3738
18.0k
3739
18.0k
  // X / 1 -> X
3740
18.0k
  // X % 1 -> 0
3741
18.0k
  // If this is a boolean op (single-bit element type), we can't have
3742
18.0k
  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3743
18.0k
  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3744
18.0k
  // it's a 1.
3745
18.0k
  if ((N1C && 
N1C->isOne()5.21k
) ||
(VT.getScalarType() == MVT::i1)18.0k
)
3746
172
    return IsDiv ? 
N080
:
DAG.getConstant(0, DL, VT)92
;
3747
17.9k
3748
17.9k
  return SDValue();
3749
17.9k
}
3750
3751
6.06k
SDValue DAGCombiner::visitSDIV(SDNode *N) {
3752
6.06k
  SDValue N0 = N->getOperand(0);
3753
6.06k
  SDValue N1 = N->getOperand(1);
3754
6.06k
  EVT VT = N->getValueType(0);
3755
6.06k
  EVT CCVT = getSetCCResultType(VT);
3756
6.06k
3757
6.06k
  // fold vector ops
3758
6.06k
  if (VT.isVector())
3759
513
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
3760
1
      return FoldedVOp;
3761
6.06k
3762
6.06k
  SDLoc DL(N);
3763
6.06k
3764
6.06k
  // fold (sdiv c1, c2) -> c1/c2
3765
6.06k
  ConstantSDNode *N0C = isConstOrConstSplat(N0);
3766
6.06k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
3767
6.06k
  if (N0C && 
N1C187
&&
!N0C->isOpaque()0
&&
!N1C->isOpaque()0
)
3768
0
    return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3769
6.06k
  // fold (sdiv X, -1) -> 0-X
3770
6.06k
  if (N1C && 
N1C->isAllOnesValue()2.15k
)
3771
14
    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3772
6.05k
  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3773
6.05k
  if (N1C && 
N1C->getAPIntValue().isMinSignedValue()2.13k
)
3774
17
    return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3775
17
                         DAG.getConstant(1, DL, VT),
3776
17
                         DAG.getConstant(0, DL, VT));
3777
6.03k
3778
6.03k
  if (SDValue V = simplifyDivRem(N, DAG))
3779
74
    return V;
3780
5.96k
3781
5.96k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
3782
7
    return NewSel;
3783
5.95k
3784
5.95k
  // If we know the sign bits of both operands are zero, strength reduce to a
3785
5.95k
  // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3786
5.95k
  if (DAG.SignBitIsZero(N1) && 
DAG.SignBitIsZero(N0)3.59k
)
3787
24
    return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3788
5.93k
3789
5.93k
  if (SDValue V = visitSDIVLike(N0, N1, N)) {
3790
2.09k
    // If the corresponding remainder node exists, update its users with
3791
2.09k
    // (Dividend - (Quotient * Divisor).
3792
2.09k
    if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3793
12
                                              { N0, N1 })) {
3794
12
      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3795
12
      SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3796
12
      AddToWorklist(Mul.getNode());
3797
12
      AddToWorklist(Sub.getNode());
3798
12
      CombineTo(RemNode, Sub);
3799
12
    }
3800
2.09k
    return V;
3801
2.09k
  }
3802
3.83k
3803
3.83k
  // sdiv, srem -> sdivrem
3804
3.83k
  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3805
3.83k
  // true.  Otherwise, we break the simplification logic in visitREM().
3806
3.83k
  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3807
3.83k
  if (!N1C || 
TLI.isIntDivCheap(N->getValueType(0), Attr)165
)
3808
3.71k
    if (SDValue DivRem = useDivRem(N))
3809
14
        return DivRem;
3810
3.81k
3811
3.81k
  return SDValue();
3812
3.81k
}
3813
3814
6.57k
SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3815
6.57k
  SDLoc DL(N);
3816
6.57k
  EVT VT = N->getValueType(0);
3817
6.57k
  EVT CCVT = getSetCCResultType(VT);
3818
6.57k
  unsigned BitWidth = VT.getScalarSizeInBits();
3819
6.57k
3820
6.57k
  // Helper for determining whether a value is a power-2 constant scalar or a
3821
6.57k
  // vector of such elements.
3822
6.57k
  auto IsPowerOfTwo = [](ConstantSDNode *C) {
3823
3.69k
    if (C->isNullValue() || C->isOpaque())
3824
11
      return false;
3825
3.68k
    if (C->getAPIntValue().isPowerOf2())
3826
2.19k
      return true;
3827
1.48k
    if ((-C->getAPIntValue()).isPowerOf2())
3828
353
      return true;
3829
1.13k
    return false;
3830
1.13k
  };
3831
6.57k
3832
6.57k
  // fold (sdiv X, pow2) -> simple ops after legalize
3833
6.57k
  // FIXME: We check for the exact bit here because the generic lowering gives
3834
6.57k
  // better results in that case. The target-specific lowering should learn how
3835
6.57k
  // to handle exact sdivs efficiently.
3836
6.57k
  if (!N->getFlags().hasExact() && 
ISD::matchUnaryPredicate(N1, IsPowerOfTwo)5.75k
) {
3837
946
    // Target-specific implementation of sdiv x, pow2.
3838
946
    if (SDValue Res = BuildSDIVPow2(N))
3839
455
      return Res;
3840
491
3841
491
    // Create constants that are functions of the shift amount value.
3842
491
    EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3843
491
    SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3844
491
    SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3845
491
    C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3846
491
    SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3847
491
    if (!isConstantOrConstantVector(Inexact))
3848
0
      return SDValue();
3849
491
3850
491
    // Splat the sign bit into the register
3851
491
    SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3852
491
                               DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3853
491
    AddToWorklist(Sign.getNode());
3854
491
3855
491
    // Add (N0 < 0) ? abs2 - 1 : 0;
3856
491
    SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3857
491
    AddToWorklist(Srl.getNode());
3858
491
    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3859
491
    AddToWorklist(Add.getNode());
3860
491
    SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3861
491
    AddToWorklist(Sra.getNode());
3862
491
3863
491
    // Special case: (sdiv X, 1) -> X
3864
491
    // Special Case: (sdiv X, -1) -> 0-X
3865
491
    SDValue One = DAG.getConstant(1, DL, VT);
3866
491
    SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3867
491
    SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3868
491
    SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3869
491
    SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3870
491
    Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3871
491
3872
491
    // If dividing by a positive value, we're done. Otherwise, the result must
3873
491
    // be negated.
3874
491
    SDValue Zero = DAG.getConstant(0, DL, VT);
3875
491
    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3876
491
3877
491
    // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3878
491
    SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3879
491
    SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3880
491
    return Res;
3881
491
  }
3882
5.62k
3883
5.62k
  // If integer divide is expensive and we satisfy the requirements, emit an
3884
5.62k
  // alternate sequence.  Targets may check function attributes for size/speed
3885
5.62k
  // trade-offs.
3886
5.62k
  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3887
5.62k
  if (isConstantOrConstantVector(N1) &&
3888
5.62k
      
!TLI.isIntDivCheap(N->getValueType(0), Attr)1.95k
)
3889
1.91k
    if (SDValue Op = BuildSDIV(N))
3890
1.69k
      return Op;
3891
3.93k
3892
3.93k
  return SDValue();
3893
3.93k
}
3894
3895
4.95k
SDValue DAGCombiner::visitUDIV(SDNode *N) {
3896
4.95k
  SDValue N0 = N->getOperand(0);
3897
4.95k
  SDValue N1 = N->getOperand(1);
3898
4.95k
  EVT VT = N->getValueType(0);
3899
4.95k
  EVT CCVT = getSetCCResultType(VT);
3900
4.95k
3901
4.95k
  // fold vector ops
3902
4.95k
  if (VT.isVector())
3903
310
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
3904
1
      return FoldedVOp;
3905
4.95k
3906
4.95k
  SDLoc DL(N);
3907
4.95k
3908
4.95k
  // fold (udiv c1, c2) -> c1/c2
3909
4.95k
  ConstantSDNode *N0C = isConstOrConstSplat(N0);
3910
4.95k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
3911
4.95k
  if (N0C && 
N1C890
)
3912
0
    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3913
0
                                                    N0C, N1C))
3914
0
      return Folded;
3915
4.95k
  // fold (udiv X, -1) -> select(X == -1, 1, 0)
3916
4.95k
  if (N1C && 
N1C->getAPIntValue().isAllOnesValue()1.31k
)
3917
13
    return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3918
13
                         DAG.getConstant(1, DL, VT),
3919
13
                         DAG.getConstant(0, DL, VT));
3920
4.93k
3921
4.93k
  if (SDValue V = simplifyDivRem(N, DAG))
3922
61
    return V;
3923
4.87k
3924
4.87k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
3925
7
    return NewSel;
3926
4.86k
3927
4.86k
  if (SDValue V = visitUDIVLike(N0, N1, N)) {
3928
1.19k
    // If the corresponding remainder node exists, update its users with
3929
1.19k
    // (Dividend - (Quotient * Divisor).
3930
1.19k
    if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3931
346
                                              { N0, N1 })) {
3932
346
      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3933
346
      SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3934
346
      AddToWorklist(Mul.getNode());
3935
346
      AddToWorklist(Sub.getNode());
3936
346
      CombineTo(RemNode, Sub);
3937
346
    }
3938
1.19k
    return V;
3939
1.19k
  }
3940
3.67k
3941
3.67k
  // sdiv, srem -> sdivrem
3942
3.67k
  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3943
3.67k
  // true.  Otherwise, we break the simplification logic in visitREM().
3944
3.67k
  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3945
3.67k
  if (!N1C || 
TLI.isIntDivCheap(N->getValueType(0), Attr)160
)
3946
3.56k
    if (SDValue DivRem = useDivRem(N))
3947
22
        return DivRem;
3948
3.65k
3949
3.65k
  return SDValue();
3950
3.65k
}
3951
3952
5.49k
SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3953
5.49k
  SDLoc DL(N);
3954
5.49k
  EVT VT = N->getValueType(0);
3955
5.49k
3956
5.49k
  // fold (udiv x, (1 << c)) -> x >>u c
3957
5.49k
  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3958
5.49k
      
DAG.isKnownToBeAPowerOfTwo(N1)1.96k
) {
3959
86
    SDValue LogBase2 = BuildLogBase2(N1, DL);
3960
86
    AddToWorklist(LogBase2.getNode());
3961
86
3962
86
    EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3963
86
    SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3964
86
    AddToWorklist(Trunc.getNode());
3965
86
    return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3966
86
  }
3967
5.41k
3968
5.41k
  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3969
5.41k
  if (N1.getOpcode() == ISD::SHL) {
3970
29
    SDValue N10 = N1.getOperand(0);
3971
29
    if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3972
29
        
DAG.isKnownToBeAPowerOfTwo(N10)15
) {
3973
15
      SDValue LogBase2 = BuildLogBase2(N10, DL);
3974
15
      AddToWorklist(LogBase2.getNode());
3975
15
3976
15
      EVT ADDVT = N1.getOperand(1).getValueType();
3977
15
      SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3978
15
      AddToWorklist(Trunc.getNode());
3979
15
      SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3980
15
      AddToWorklist(Add.getNode());
3981
15
      return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3982
15
    }
3983
5.39k
  }
3984
5.39k
3985
5.39k
  // fold (udiv x, c) -> alternate
3986
5.39k
  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3987
5.39k
  if (isConstantOrConstantVector(N1) &&
3988
5.39k
      
!TLI.isIntDivCheap(N->getValueType(0), Attr)1.88k
)
3989
1.83k
    if (SDValue Op = BuildUDIV(N))
3990
1.59k
      return Op;
3991
3.80k
3992
3.80k
  return SDValue();
3993
3.80k
}
3994
3995
// handles ISD::SREM and ISD::UREM
3996
7.21k
SDValue DAGCombiner::visitREM(SDNode *N) {
3997
7.21k
  unsigned Opcode = N->getOpcode();
3998
7.21k
  SDValue N0 = N->getOperand(0);
3999
7.21k
  SDValue N1 = N->getOperand(1);
4000
7.21k
  EVT VT = N->getValueType(0);
4001
7.21k
  EVT CCVT = getSetCCResultType(VT);
4002
7.21k
4003
7.21k
  bool isSigned = (Opcode == ISD::SREM);
4004
7.21k
  SDLoc DL(N);
4005
7.21k
4006
7.21k
  // fold (rem c1, c2) -> c1%c2
4007
7.21k
  ConstantSDNode *N0C = isConstOrConstSplat(N0);
4008
7.21k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
4009
7.21k
  if (N0C && 
N1C144
)
4010
2
    if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
4011
2
      return Folded;
4012
7.21k
  // fold (urem X, -1) -> select(X == -1, 0, x)
4013
7.21k
  if (!isSigned && 
N1C3.74k
&&
N1C->getAPIntValue().isAllOnesValue()1.26k
)
4014
8
    return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4015
8
                         DAG.getConstant(0, DL, VT), N0);
4016
7.20k
4017
7.20k
  if (SDValue V = simplifyDivRem(N, DAG))
4018
131
    return V;
4019
7.07k
4020
7.07k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
4021
14
    return NewSel;
4022
7.06k
4023
7.06k
  if (isSigned) {
4024
3.40k
    // If we know the sign bits of both operands are zero, strength reduce to a
4025
3.40k
    // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4026
3.40k
    if (DAG.SignBitIsZero(N1) && 
DAG.SignBitIsZero(N0)1.96k
)
4027
16
      return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4028
3.65k
  } else {
4029
3.65k
    SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4030
3.65k
    if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4031
706
      // fold (urem x, pow2) -> (and x, pow2-1)
4032
706
      SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4033
706
      AddToWorklist(Add.getNode());
4034
706
      return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4035
706
    }
4036
2.95k
    if (N1.getOpcode() == ISD::SHL &&
4037
2.95k
        
DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))6
) {
4038
6
      // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4039
6
      SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4040
6
      AddToWorklist(Add.getNode());
4041
6
      return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4042
6
    }
4043
6.33k
  }
4044
6.33k
4045
6.33k
  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4046
6.33k
4047
6.33k
  // If X/C can be simplified by the division-by-constant logic, lower
4048
6.33k
  // X%C to the equivalent of X-X/C*C.
4049
6.33k
  // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4050
6.33k
  // speculative DIV must not cause a DIVREM conversion.  We guard against this
4051
6.33k
  // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4052
6.33k
  // combine will not return a DIVREM.  Regardless, checking cheapness here
4053
6.33k
  // makes sense since the simplification results in fatter code.
4054
6.33k
  if (DAG.isKnownNeverZero(N1) && 
!TLI.isIntDivCheap(VT, Attr)1.34k
) {
4055
1.26k
    SDValue OptimizedDiv =
4056
1.26k
        isSigned ? 
visitSDIVLike(N0, N1, N)638
:
visitUDIVLike(N0, N1, N)630
;
4057
1.26k
    if (OptimizedDiv.getNode()) {
4058
1.04k
      // If the equivalent Div node also exists, update its users.
4059
1.04k
      unsigned DivOpcode = isSigned ? 
ISD::SDIV540
:
ISD::UDIV504
;
4060
1.04k
      if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4061
38
                                                { N0, N1 }))
4062
38
        CombineTo(DivNode, OptimizedDiv);
4063
1.04k
      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4064
1.04k
      SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4065
1.04k
      AddToWorklist(OptimizedDiv.getNode());
4066
1.04k
      AddToWorklist(Mul.getNode());
4067
1.04k
      return Sub;
4068
1.04k
    }
4069
5.29k
  }
4070
5.29k
4071
5.29k
  // sdiv, srem -> sdivrem
4072
5.29k
  if (SDValue DivRem = useDivRem(N))
4073
145
    return DivRem.getValue(1);
4074
5.14k
4075
5.14k
  return SDValue();
4076
5.14k
}
4077
4078
2.21k
SDValue DAGCombiner::visitMULHS(SDNode *N) {
4079
2.21k
  SDValue N0 = N->getOperand(0);
4080
2.21k
  SDValue N1 = N->getOperand(1);
4081
2.21k
  EVT VT = N->getValueType(0);
4082
2.21k
  SDLoc DL(N);
4083
2.21k
4084
2.21k
  if (VT.isVector()) {
4085
1.22k
    // fold (mulhs x, 0) -> 0
4086
1.22k
    if (ISD::isBuildVectorAllZeros(N1.getNode()))
4087
0
      return N1;
4088
1.22k
    if (ISD::isBuildVectorAllZeros(N0.getNode()))
4089
2
      return N0;
4090
2.21k
  }
4091
2.21k
4092
2.21k
  // fold (mulhs x, 0) -> 0
4093
2.21k
  if (isNullConstant(N1))
4094
1
    return N1;
4095
2.21k
  // fold (mulhs x, 1) -> (sra x, size(x)-1)
4096
2.21k
  if (isOneConstant(N1))
4097
0
    return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4098
0
                       DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
4099
0
                                       getShiftAmountTy(N0.getValueType())));
4100
2.21k
4101
2.21k
  // fold (mulhs x, undef) -> 0
4102
2.21k
  if (N0.isUndef() || N1.isUndef())
4103
0
    return DAG.getConstant(0, DL, VT);
4104
2.21k
4105
2.21k
  // If the type twice as wide is legal, transform the mulhs to a wider multiply
4106
2.21k
  // plus a shift.
4107
2.21k
  if (VT.isSimple() && !VT.isVector()) {
4108
989
    MVT Simple = VT.getSimpleVT();
4109
989
    unsigned SimpleSize = Simple.getSizeInBits();
4110
989
    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4111
989
    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4112
271
      N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4113
271
      N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4114
271
      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4115
271
      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4116
271
            DAG.getConstant(SimpleSize, DL,
4117
271
                            getShiftAmountTy(N1.getValueType())));
4118
271
      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4119
271
    }
4120
1.94k
  }
4121
1.94k
4122
1.94k
  return SDValue();
4123
1.94k
}
4124
4125
8.94k
SDValue DAGCombiner::visitMULHU(SDNode *N) {
4126
8.94k
  SDValue N0 = N->getOperand(0);
4127
8.94k
  SDValue N1 = N->getOperand(1);
4128
8.94k
  EVT VT = N->getValueType(0);
4129
8.94k
  SDLoc DL(N);
4130
8.94k
4131
8.94k
  if (VT.isVector()) {
4132
1.35k
    // fold (mulhu x, 0) -> 0
4133
1.35k
    if (ISD::isBuildVectorAllZeros(N1.getNode()))
4134
0
      return N1;
4135
1.35k
    if (ISD::isBuildVectorAllZeros(N0.getNode()))
4136
0
      return N0;
4137
8.94k
  }
4138
8.94k
4139
8.94k
  // fold (mulhu x, 0) -> 0
4140
8.94k
  if (isNullConstant(N1))
4141
6
    return N1;
4142
8.93k
  // fold (mulhu x, 1) -> 0
4143
8.93k
  if (isOneConstant(N1))
4144
0
    return DAG.getConstant(0, DL, N0.getValueType());
4145
8.93k
  // fold (mulhu x, undef) -> 0
4146
8.93k
  if (N0.isUndef() || N1.isUndef())
4147
0
    return DAG.getConstant(0, DL, VT);
4148
8.93k
4149
8.93k
  // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4150
8.93k
  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4151
8.93k
      
DAG.isKnownToBeAPowerOfTwo(N1)3.24k
&&
hasOperation(ISD::SRL, VT)378
) {
4152
322
    unsigned NumEltBits = VT.getScalarSizeInBits();
4153
322
    SDValue LogBase2 = BuildLogBase2(N1, DL);
4154
322
    SDValue SRLAmt = DAG.getNode(
4155
322
        ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4156
322
    EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4157
322
    SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4158
322
    return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4159
322
  }
4160
8.61k
4161
8.61k
  // If the type twice as wide is legal, transform the mulhu to a wider multiply
4162
8.61k
  // plus a shift.
4163
8.61k
  if (VT.isSimple() && !VT.isVector()) {
4164
7.34k
    MVT Simple = VT.getSimpleVT();
4165
7.34k
    unsigned SimpleSize = Simple.getSizeInBits();
4166
7.34k
    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4167
7.34k
    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4168
754
      N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4169
754
      N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4170
754
      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4171
754
      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4172
754
            DAG.getConstant(SimpleSize, DL,
4173
754
                            getShiftAmountTy(N1.getValueType())));
4174
754
      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4175
754
    }
4176
7.85k
  }
4177
7.85k
4178
7.85k
  return SDValue();
4179
7.85k
}
4180
4181
/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4182
/// give the opcodes for the two computations that are being performed. Return
4183
/// true if a simplification was made.
4184
SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4185
8.70k
                                                unsigned HiOp) {
4186
8.70k
  // If the high half is not needed, just compute the low half.
4187
8.70k
  bool HiExists = N->hasAnyUseOfValue(1);
4188
8.70k
  if (!HiExists && 
(58
!LegalOperations58
||
4189
58
                    
TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0))7
)) {
4190
51
    SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4191
51
    return CombineTo(N, Res, Res);
4192
51
  }
4193
8.65k
4194
8.65k
  // If the low half is not needed, just compute the high half.
4195
8.65k
  bool LoExists = N->hasAnyUseOfValue(0);
4196
8.65k
  if (!LoExists && 
(2.36k
!LegalOperations2.36k
||
4197
2.36k
                    
TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1))1.00k
)) {
4198
1.35k
    SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4199
1.35k
    return CombineTo(N, Res, Res);
4200
1.35k
  }
4201
7.29k
4202
7.29k
  // If both halves are used, return as it is.
4203
7.29k
  if (LoExists && 
HiExists6.29k
)
4204
6.28k
    return SDValue();
4205
1.01k
4206
1.01k
  // If the two computed results can be simplified separately, separate them.
4207
1.01k
  if (LoExists) {
4208
7
    SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4209
7
    AddToWorklist(Lo.getNode());
4210
7
    SDValue LoOpt = combine(Lo.getNode());
4211
7
    if (LoOpt.getNode() && 
LoOpt.getNode() != Lo.getNode()0
&&
4212
7
        
(0
!LegalOperations0
||
4213
0
         TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4214
0
      return CombineTo(N, LoOpt, LoOpt);
4215
1.01k
  }
4216
1.01k
4217
1.01k
  if (HiExists) {
4218
1.00k
    SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4219
1.00k
    AddToWorklist(Hi.getNode());
4220
1.00k
    SDValue HiOpt = combine(Hi.getNode());
4221
1.00k
    if (HiOpt.getNode() && 
HiOpt != Hi0
&&
4222
1.00k
        
(0
!LegalOperations0
||
4223
0
         TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4224
0
      return CombineTo(N, HiOpt, HiOpt);
4225
1.01k
  }
4226
1.01k
4227
1.01k
  return SDValue();
4228
1.01k
}
4229
4230
889
SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4231
889
  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4232
298
    return Res;
4233
591
4234
591
  EVT VT = N->getValueType(0);
4235
591
  SDLoc DL(N);
4236
591
4237
591
  // If the type is twice as wide is legal, transform the mulhu to a wider
4238
591
  // multiply plus a shift.
4239
591
  if (VT.isSimple() && !VT.isVector()) {
4240
591
    MVT Simple = VT.getSimpleVT();
4241
591
    unsigned SimpleSize = Simple.getSizeInBits();
4242
591
    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4243
591
    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4244
10
      SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4245
10
      SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4246
10
      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4247
10
      // Compute the high part as N1.
4248
10
      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4249
10
            DAG.getConstant(SimpleSize, DL,
4250
10
                            getShiftAmountTy(Lo.getValueType())));
4251
10
      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4252
10
      // Compute the low part as N0.
4253
10
      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4254
10
      return CombineTo(N, Lo, Hi);
4255
10
    }
4256
581
  }
4257
581
4258
581
  return SDValue();
4259
581
}
4260
4261
7.81k
SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4262
7.81k
  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4263
1.10k
    return Res;
4264
6.70k
4265
6.70k
  EVT VT = N->getValueType(0);
4266
6.70k
  SDLoc DL(N);
4267
6.70k
4268
6.70k
  // If the type is twice as wide is legal, transform the mulhu to a wider
4269
6.70k
  // multiply plus a shift.
4270
6.70k
  if (VT.isSimple() && !VT.isVector()) {
4271
6.70k
    MVT Simple = VT.getSimpleVT();
4272
6.70k
    unsigned SimpleSize = Simple.getSizeInBits();
4273
6.70k
    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4274
6.70k
    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4275
3
      SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4276
3
      SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4277
3
      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4278
3
      // Compute the high part as N1.
4279
3
      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4280
3
            DAG.getConstant(SimpleSize, DL,
4281
3
                            getShiftAmountTy(Lo.getValueType())));
4282
3
      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4283
3
      // Compute the low part as N0.
4284
3
      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4285
3
      return CombineTo(N, Lo, Hi);
4286
3
    }
4287
6.70k
  }
4288
6.70k
4289
6.70k
  return SDValue();
4290
6.70k
}
4291
4292
1.52k
SDValue DAGCombiner::visitMULO(SDNode *N) {
4293
1.52k
  bool IsSigned = (ISD::SMULO == N->getOpcode());
4294
1.52k
4295
1.52k
  // (mulo x, 2) -> (addo x, x)
4296
1.52k
  if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1)))
4297
595
    if (C2->getAPIntValue() == 2)
4298
19
      return DAG.getNode(IsSigned ? 
ISD::SADDO8
:
ISD::UADDO11
, SDLoc(N),
4299
19
                         N->getVTList(), N->getOperand(0), N->getOperand(0));
4300
1.50k
4301
1.50k
  return SDValue();
4302
1.50k
}
4303
4304
41.7k
SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4305
41.7k
  SDValue N0 = N->getOperand(0);
4306
41.7k
  SDValue N1 = N->getOperand(1);
4307
41.7k
  EVT VT = N0.getValueType();
4308
41.7k
4309
41.7k
  // fold vector ops
4310
41.7k
  if (VT.isVector())
4311
35.3k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
4312
392
      return FoldedVOp;
4313
41.3k
4314
41.3k
  // fold operation with constant operands.
4315
41.3k
  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4316
41.3k
  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
4317
41.3k
  if (N0C && 
N1C0
)
4318
0
    return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
4319
41.3k
4320
41.3k
  // canonicalize constant to RHS
4321
41.3k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4322
41.3k
     
!DAG.isConstantIntBuildVectorOrConstantInt(N1)10
)
4323
10
    return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4324
41.3k
4325
41.3k
  // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4326
41.3k
  // Only do this if the current op isn't legal and the flipped is.
4327
41.3k
  unsigned Opcode = N->getOpcode();
4328
41.3k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4329
41.3k
  if (!TLI.isOperationLegal(Opcode, VT) &&
4330
41.3k
      
(10.1k
N0.isUndef()10.1k
||
DAG.SignBitIsZero(N0)10.1k
) &&
4331
41.3k
      
(164
N1.isUndef()164
||
DAG.SignBitIsZero(N1)164
)) {
4332
130
    unsigned AltOpcode;
4333
130
    switch (Opcode) {
4334
130
    
case ISD::SMIN: AltOpcode = ISD::UMIN; break1
;
4335
130
    
case ISD::SMAX: AltOpcode = ISD::UMAX; break1
;
4336
130
    
case ISD::UMIN: AltOpcode = ISD::SMIN; break71
;
4337
130
    
case ISD::UMAX: AltOpcode = ISD::SMAX; break57
;
4338
130
    
default: 0
llvm_unreachable0
("Unknown MINMAX opcode");
4339
130
    }
4340
130
    if (TLI.isOperationLegal(AltOpcode, VT))
4341
11
      return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4342
41.3k
  }
4343
41.3k
4344
41.3k
  return SDValue();
4345
41.3k
}
4346
4347
/// If this is a bitwise logic instruction and both operands have the same
4348
/// opcode, try to sink the other opcode after the logic instruction.
4349
106k
SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4350
106k
  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4351
106k
  EVT VT = N0.getValueType();
4352
106k
  unsigned LogicOpcode = N->getOpcode();
4353
106k
  unsigned HandOpcode = N0.getOpcode();
4354
106k
  assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4355
106k
          LogicOpcode == ISD::XOR) && "Expected logic opcode");
4356
106k
  assert(HandOpcode == N1.getOpcode() && "Bad input!");
4357
106k
4358
106k
  // Bail early if none of these transforms apply.
4359
106k
  if (N0.getNumOperands() == 0)
4360
1.49k
    return SDValue();
4361
104k
4362
104k
  // FIXME: We should check number of uses of the operands to not increase
4363
104k
  //        the instruction count for all transforms.
4364
104k
4365
104k
  // Handle size-changing casts.
4366
104k
  SDValue X = N0.getOperand(0);
4367
104k
  SDValue Y = N1.getOperand(0);
4368
104k
  EVT XVT = X.getValueType();
4369
104k
  SDLoc DL(N);
4370
104k
  if (HandOpcode == ISD::ANY_EXTEND || 
HandOpcode == ISD::ZERO_EXTEND104k
||
4371
104k
      
HandOpcode == ISD::SIGN_EXTEND104k
) {
4372
602
    // If both operands have other uses, this transform would create extra
4373
602
    // instructions without eliminating anything.
4374
602
    if (!N0.hasOneUse() && 
!N1.hasOneUse()110
)
4375
23
      return SDValue();
4376
579
    // We need matching integer source types.
4377
579
    if (XVT != Y.getValueType())
4378
19
      return SDValue();
4379
560
    // Don't create an illegal op during or after legalization. Don't ever
4380
560
    // create an unsupported vector op.
4381
560
    if ((VT.isVector() || 
LegalOperations516
) &&
4382
560
        
!TLI.isOperationLegalOrCustom(LogicOpcode, XVT)386
)
4383
30
      return SDValue();
4384
530
    // Avoid infinite looping with PromoteIntBinOp.
4385
530
    // TODO: Should we apply desirable/legal constraints to all opcodes?
4386
530
    if (HandOpcode == ISD::ANY_EXTEND && 
LegalTypes340
&&
4387
530
        
!TLI.isTypeDesirableForOp(LogicOpcode, XVT)340
)
4388
332
      return SDValue();
4389
198
    // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4390
198
    SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4391
198
    return DAG.getNode(HandOpcode, DL, VT, Logic);
4392
198
  }
4393
104k
4394
104k
  // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4395
104k
  if (HandOpcode == ISD::TRUNCATE) {
4396
1.96k
    // If both operands have other uses, this transform would create extra
4397
1.96k
    // instructions without eliminating anything.
4398
1.96k
    if (!N0.hasOneUse() && 
!N1.hasOneUse()152
)
4399
72
      return SDValue();
4400
1.89k
    // We need matching source types.
4401
1.89k
    if (XVT != Y.getValueType())
4402
53
      return SDValue();
4403
1.84k
    // Don't create an illegal op during or after legalization.
4404
1.84k
    if (LegalOperations && 
!TLI.isOperationLegal(LogicOpcode, XVT)399
)
4405
5
      return SDValue();
4406
1.83k
    // Be extra careful sinking truncate. If it's free, there's no benefit in
4407
1.83k
    // widening a binop. Also, don't create a logic op on an illegal type.
4408
1.83k
    if (TLI.isZExtFree(VT, XVT) && 
TLI.isTruncateFree(XVT, VT)554
)
4409
554
      return SDValue();
4410
1.28k
    if (!TLI.isTypeLegal(XVT))
4411
29
      return SDValue();
4412
1.25k
    SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4413
1.25k
    return DAG.getNode(HandOpcode, DL, VT, Logic);
4414
1.25k
  }
4415
102k
4416
102k
  // For binops SHL/SRL/SRA/AND:
4417
102k
  //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4418
102k
  if ((HandOpcode == ISD::SHL || 
HandOpcode == ISD::SRL100k
||
4419
102k
       
HandOpcode == ISD::SRA100k
||
HandOpcode == ISD::AND100k
) &&
4420
102k
      
N0.getOperand(1) == N1.getOperand(1)17.8k
) {
4421
343
    // If either operand has other uses, this transform is not an improvement.
4422
343
    if (!N0.hasOneUse() || 
!N1.hasOneUse()93
)
4423
280
      return SDValue();
4424
63
    SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4425
63
    return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4426
63
  }
4427
101k
4428
101k
  // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4429
101k
  if (HandOpcode == ISD::BSWAP) {
4430
8
    // If either operand has other uses, this transform is not an improvement.
4431
8
    if (!N0.hasOneUse() || 
!N1.hasOneUse()4
)
4432
4
      return SDValue();
4433
4
    SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4434
4
    return DAG.getNode(HandOpcode, DL, VT, Logic);
4435
4
  }
4436
101k
4437
101k
  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4438
101k
  // Only perform this optimization up until type legalization, before
4439
101k
  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4440
101k
  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4441
101k
  // we don't want to undo this promotion.
4442
101k
  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4443
101k
  // on scalars.
4444
101k
  if ((HandOpcode == ISD::BITCAST || 
HandOpcode == ISD::SCALAR_TO_VECTOR96.3k
) &&
4445
101k
       
Level <= AfterLegalizeTypes5.37k
) {
4446
602
    // Input types must be integer and the same.
4447
602
    if (XVT.isInteger() && 
XVT == Y.getValueType()133
) {
4448
126
      SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4449
126
      return DAG.getNode(HandOpcode, DL, VT, Logic);
4450
126
    }
4451
101k
  }
4452
101k
4453
101k
  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4454
101k
  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4455
101k
  // If both shuffles use the same mask, and both shuffle within a single
4456
101k
  // vector, then it is worthwhile to move the swizzle after the operation.
4457
101k
  // The type-legalizer generates this pattern when loading illegal
4458
101k
  // vector types from memory. In many cases this allows additional shuffle
4459
101k
  // optimizations.
4460
101k
  // There are other cases where moving the shuffle after the xor/and/or
4461
101k
  // is profitable even if shuffles don't perform a swizzle.
4462
101k
  // If both shuffles use the same mask, and both shuffles have the same first
4463
101k
  // or second operand, then it might still be profitable to move the shuffle
4464
101k
  // after the xor/and/or operation.
4465
101k
  if (HandOpcode == ISD::VECTOR_SHUFFLE && 
Level < AfterLegalizeDAG2.15k
) {
4466
2.15k
    auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4467
2.15k
    auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4468
2.15k
    assert(X.getValueType() == Y.getValueType() &&
4469
2.15k
           "Inputs to shuffles are not the same type");
4470
2.15k
4471
2.15k
    // Check that both shuffles use the same mask. The masks are known to be of
4472
2.15k
    // the same length because the result vector type is the same.
4473
2.15k
    // Check also that shuffles have only one use to avoid introducing extra
4474
2.15k
    // instructions.
4475
2.15k
    if (!SVN0->hasOneUse() || 
!SVN1->hasOneUse()2.14k
||
4476
2.15k
        
!SVN0->getMask().equals(SVN1->getMask())2.03k
)
4477
1.93k
      return SDValue();
4478
218
4479
218
    // Don't try to fold this node if it requires introducing a
4480
218
    // build vector of all zeros that might be illegal at this stage.
4481
218
    SDValue ShOp = N0.getOperand(1);
4482
218
    if (LogicOpcode == ISD::XOR && 
!ShOp.isUndef()44
)
4483
44
      ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4484
218
4485
218
    // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4486
218
    if (N0.getOperand(1) == N1.getOperand(1) && 
ShOp.getNode()42
) {
4487
42
      SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4488
42
                                  N0.getOperand(0), N1.getOperand(0));
4489
42
      return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4490
42
    }
4491
176
4492
176
    // Don't try to fold this node if it requires introducing a
4493
176
    // build vector of all zeros that might be illegal at this stage.
4494
176
    ShOp = N0.getOperand(0);
4495
176
    if (LogicOpcode == ISD::XOR && 
!ShOp.isUndef()30
)
4496
30
      ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4497
176
4498
176
    // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4499
176
    if (N0.getOperand(0) == N1.getOperand(0) && 
ShOp.getNode()36
) {
4500
36
      SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4501
36
                                  N1.getOperand(1));
4502
36
      return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4503
36
    }
4504
99.5k
  }
4505
99.5k
4506
99.5k
  return SDValue();
4507
99.5k
}
4508
4509
/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4510
SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4511
969k
                                       const SDLoc &DL) {
4512
969k
  SDValue LL, LR, RL, RR, N0CC, N1CC;
4513
969k
  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4514
969k
      
!isSetCCEquivalent(N1, RL, RR, N1CC)252k
)
4515
954k
    return SDValue();
4516
15.7k
4517
15.7k
  assert(N0.getValueType() == N1.getValueType() &&
4518
15.7k
         "Unexpected operand types for bitwise logic op");
4519
15.7k
  assert(LL.getValueType() == LR.getValueType() &&
4520
15.7k
         RL.getValueType() == RR.getValueType() &&
4521
15.7k
         "Unexpected operand types for setcc");
4522
15.7k
4523
15.7k
  // If we're here post-legalization or the logic op type is not i1, the logic
4524
15.7k
  // op type must match a setcc result type. Also, all folds require new
4525
15.7k
  // operations on the left and right operands, so those types must match.
4526
15.7k
  EVT VT = N0.getValueType();
4527
15.7k
  EVT OpVT = LL.getValueType();
4528
15.7k
  if (LegalOperations || 
VT.getScalarType() != MVT::i114.6k
)
4529
3.07k
    if (VT != getSetCCResultType(OpVT))
4530
34
      return SDValue();
4531
15.7k
  if (OpVT != RL.getValueType())
4532
496
    return SDValue();
4533
15.2k
4534
15.2k
  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4535
15.2k
  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4536
15.2k
  bool IsInteger = OpVT.isInteger();
4537
15.2k
  if (LR == RR && 
CC0 == CC111.0k
&&
IsInteger10.5k
) {
4538
10.3k
    bool IsZero = isNullOrNullSplat(LR);
4539
10.3k
    bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4540
10.3k
4541
10.3k
    // All bits clear?
4542
10.3k
    bool AndEqZero = IsAnd && 
CC1 == ISD::SETEQ4.35k
&&
IsZero4.18k
;
4543
10.3k
    // All sign bits clear?
4544
10.3k
    bool AndGtNeg1 = IsAnd && 
CC1 == ISD::SETGT4.35k
&&
IsNeg111
;
4545
10.3k
    // Any bits set?
4546
10.3k
    bool OrNeZero = !IsAnd && 
CC1 == ISD::SETNE5.98k
&&
IsZero5.59k
;
4547
10.3k
    // Any sign bits set?
4548
10.3k
    bool OrLtZero = !IsAnd && 
CC1 == ISD::SETLT5.98k
&&
IsZero14
;
4549
10.3k
4550
10.3k
    // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4551
10.3k
    // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4552
10.3k
    // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4553
10.3k
    // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4554
10.3k
    if (AndEqZero || 
AndGtNeg16.16k
||
OrNeZero6.15k
||
OrLtZero574
) {
4555
9.76k
      SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4556
9.76k
      AddToWorklist(Or.getNode());
4557
9.76k
      return DAG.getSetCC(DL, VT, Or, LR, CC1);
4558
9.76k
    }
4559
567
4560
567
    // All bits set?
4561
567
    bool AndEqNeg1 = IsAnd && 
CC1 == ISD::SETEQ178
&&
IsNeg113
;
4562
567
    // All sign bits set?
4563
567
    bool AndLtZero = IsAnd && 
CC1 == ISD::SETLT178
&&
IsZero10
;
4564
567
    // Any bits clear?
4565
567
    bool OrNeNeg1 = !IsAnd && 
CC1 == ISD::SETNE389
&&
IsNeg17
;
4566
567
    // Any sign bits clear?
4567
567
    bool OrGtNeg1 = !IsAnd && 
CC1 == ISD::SETGT389
&&
IsNeg15
;
4568
567
4569
567
    // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4570
567
    // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
4571
567
    // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4572
567
    // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
4573
567
    if (AndEqNeg1 || 
AndLtZero560
||
OrNeNeg1555
||
OrGtNeg1550
) {
4574
22
      SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4575
22
      AddToWorklist(And.getNode());
4576
22
      return DAG.getSetCC(DL, VT, And, LR, CC1);
4577
22
    }
4578
5.47k
  }
4579
5.47k
4580
5.47k
  // TODO: What is the 'or' equivalent of this fold?
4581
5.47k
  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4582
5.47k
  if (IsAnd && 
LL == RL3.69k
&&
CC0 == CC1421
&&
OpVT.getScalarSizeInBits() > 1127
&&
4583
5.47k
      
IsInteger112
&&
CC0 == ISD::SETNE102
&&
4584
5.47k
      
(93
(93
isNullConstant(LR)93
&&
isAllOnesConstant(RR)0
) ||
4585
93
       (isAllOnesConstant(LR) && 
isNullConstant(RR)3
))) {
4586
3
    SDValue One = DAG.getConstant(1, DL, OpVT);
4587
3
    SDValue Two = DAG.getConstant(2, DL, OpVT);
4588
3
    SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4589
3
    AddToWorklist(Add.getNode());
4590
3
    return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4591
3
  }
4592
5.46k
4593
5.46k
  // Try more general transforms if the predicates match and the only user of
4594
5.46k
  // the compares is the 'and' or 'or'.
4595
5.46k
  if (IsInteger && 
TLI.convertSetCCLogicToBitwiseLogic(OpVT)4.54k
&&
CC0 == CC11.10k
&&
4596
5.46k
      
N0.hasOneUse()494
&&
N1.hasOneUse()473
) {
4597
473
    // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4598
473
    // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4599
473
    if ((IsAnd && 
CC1 == ISD::SETEQ189
) ||
(455
!IsAnd455
&&
CC1 == ISD::SETNE284
)) {
4600
90
      SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4601
90
      SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4602
90
      SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4603
90
      SDValue Zero = DAG.getConstant(0, DL, OpVT);
4604
90
      return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4605
90
    }
4606
383
4607
383
    // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4608
383
    // TODO - support non-uniform vector amounts.
4609
383
    if ((IsAnd && 
CC1 == ISD::SETNE171
) ||
(252
!IsAnd252
&&
CC1 == ISD::SETEQ212
)) {
4610
271
      // Match a shared variable operand and 2 non-opaque constant operands.
4611
271
      ConstantSDNode *C0 = isConstOrConstSplat(LR);
4612
271
      ConstantSDNode *C1 = isConstOrConstSplat(RR);
4613
271
      if (LL == RL && 
C070
&&
C124
&&
!C0->isOpaque()17
&&
!C1->isOpaque()17
) {
4614
17
        // Canonicalize larger constant as C0.
4615
17
        if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4616
14
          std::swap(C0, C1);
4617
17
4618
17
        // The difference of the constants must be a single bit.
4619
17
        const APInt &C0Val = C0->getAPIntValue();
4620
17
        const APInt &C1Val = C1->getAPIntValue();
4621
17
        if ((C0Val - C1Val).isPowerOf2()) {
4622
8
          // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4623
8
          // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4624
8
          SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4625
8
          SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4626
8
          SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4627
8
          SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4628
8
          SDValue Zero = DAG.getConstant(0, DL, OpVT);
4629
8
          return DAG.getSetCC(DL, VT, And, Zero, CC0);
4630
8
        }
4631
5.37k
      }
4632
271
    }
4633
383
  }
4634
5.37k
4635
5.37k
  // Canonicalize equivalent operands to LL == RL.
4636
5.37k
  if (LL == RR && 
LR == RL142
) {
4637
0
    CC1 = ISD::getSetCCSwappedOperands(CC1);
4638
0
    std::swap(RL, RR);
4639
0
  }
4640
5.37k
4641
5.37k
  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4642
5.37k
  // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4643
5.37k
  if (LL == RL && 
LR == RR687
) {
4644
97
    ISD::CondCode NewCC = IsAnd ? 
ISD::getSetCCAndOperation(CC0, CC1, IsInteger)45
4645
97
                                : 
ISD::getSetCCOrOperation(CC0, CC1, IsInteger)52
;
4646
97
    if (NewCC != ISD::SETCC_INVALID &&
4647
97
        
(87
!LegalOperations87
||
4648
87
         
(55
TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType())55
&&
4649
55
          
TLI.isOperationLegal(ISD::SETCC, OpVT)0
)))
4650
32
      return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4651
5.33k
  }
4652
5.33k
4653
5.33k
  return SDValue();
4654
5.33k
}
4655
4656
/// This contains all DAGCombine rules which reduce two values combined by
4657
/// an And operation to a single value. This makes them reusable in the context
4658
/// of visitSELECT(). Rules involving constants are not included as
4659
/// visitSELECT() already handles those cases.
4660
744k
SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4661
744k
  EVT VT = N1.getValueType();
4662
744k
  SDLoc DL(N);
4663
744k
4664
744k
  // fold (and x, undef) -> 0
4665
744k
  if (N0.isUndef() || 
N1.isUndef()744k
)
4666
5
    return DAG.getConstant(0, DL, VT);
4667
744k
4668
744k
  if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4669
4.23k
    return V;
4670
740k
4671
740k
  if (N0.getOpcode() == ISD::ADD && 
N1.getOpcode() == ISD::SRL30.1k
&&
4672
740k
      
VT.getSizeInBits() <= 64218
) {
4673
218
    if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4674
218
      if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4675
17
        // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4676
17
        // immediate for an add, but it is legal if its top c2 bits are set,
4677
17
        // transform the ADD so the immediate doesn't need to be materialized
4678
17
        // in a register.
4679
17
        APInt ADDC = ADDI->getAPIntValue();
4680
17
        APInt SRLC = SRLI->getAPIntValue();
4681
17
        if (ADDC.getMinSignedBits() <= 64 &&
4682
17
            SRLC.ult(VT.getSizeInBits()) &&
4683
17
            !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4684
2
          APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4685
2
                                             SRLC.getZExtValue());
4686
2
          if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4687
2
            ADDC |= Mask;
4688
2
            if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4689
2
              SDLoc DL0(N0);
4690
2
              SDValue NewAdd =
4691
2
                DAG.getNode(ISD::ADD, DL0, VT,
4692
2
                            N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4693
2
              CombineTo(N0.getNode(), NewAdd);
4694
2
              // Return N so it doesn't get rechecked!
4695
2
              return SDValue(N, 0);
4696
2
            }
4697
740k
          }
4698
2
        }
4699
17
      }
4700
218
    }
4701
218
  }
4702
740k
4703
740k
  // Reduce bit extract of low half of an integer to the narrower type.
4704
740k
  // (and (srl i64:x, K), KMask) ->
4705
740k
  //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4706
740k
  if (N0.getOpcode() == ISD::SRL && 
N0.hasOneUse()58.2k
) {
4707
52.1k
    if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4708
50.1k
      if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4709
46.7k
        unsigned Size = VT.getSizeInBits();
4710
46.7k
        const APInt &AndMask = CAnd->getAPIntValue();
4711
46.7k
        unsigned ShiftBits = CShift->getZExtValue();
4712
46.7k
4713
46.7k
        // Bail out, this node will probably disappear anyway.
4714
46.7k
        if (ShiftBits == 0)
4715
0
          return SDValue();
4716
46.7k
4717
46.7k
        unsigned MaskBits = AndMask.countTrailingOnes();
4718
46.7k
        EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4719
46.7k
4720
46.7k
        if (AndMask.isMask() &&
4721
46.7k
            // Required bits must not span the two halves of the integer and
4722
46.7k
            // must fit in the half size type.
4723
46.7k
            
(ShiftBits + MaskBits <= Size / 2)35.8k
&&
4724
46.7k
            
TLI.isNarrowingProfitable(VT, HalfVT)15.3k
&&
4725
46.7k
            
TLI.isTypeDesirableForOp(ISD::AND, HalfVT)356
&&
4726
46.7k
            
TLI.isTypeDesirableForOp(ISD::SRL, HalfVT)355
&&
4727
46.7k
            
TLI.isTruncateFree(VT, HalfVT)355
&&
4728
46.7k
            
TLI.isZExtFree(HalfVT, VT)355
) {
4729
320
          // The isNarrowingProfitable is to avoid regressions on PPC and
4730
320
          // AArch64 which match a few 64-bit bit insert / bit extract patterns
4731
320
          // on downstream users of this. Those patterns could probably be
4732
320
          // extended to handle extensions mixed in.
4733
320
4734
320
          SDValue SL(N0);
4735
320
          assert(MaskBits <= Size);
4736
320
4737
320
          // Extracting the highest bit of the low half.
4738
320
          EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4739
320
          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4740
320
                                      N0.getOperand(0));
4741
320
4742
320
          SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4743
320
          SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4744
320
          SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4745
320
          SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4746
320
          return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4747
320
        }
4748
739k
      }
4749
50.1k
    }
4750
52.1k
  }
4751
739k
4752
739k
  return SDValue();
4753
739k
}
4754
4755
bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4756
1.45k
                                   EVT LoadResultTy, EVT &ExtVT) {
4757
1.45k
  if (!AndC->getAPIntValue().isMask())
4758
0
    return false;
4759
1.45k
4760
1.45k
  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4761
1.45k
4762
1.45k
  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4763
1.45k
  EVT LoadedVT = LoadN->getMemoryVT();
4764
1.45k
4765
1.45k
  if (ExtVT == LoadedVT &&
4766
1.45k
      
(53
!LegalOperations53
||
4767
53
       
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)13
)) {
4768
53
    // ZEXTLOAD will match without needing to change the size of the value being
4769
53
    // loaded.
4770
53
    return true;
4771
53
  }
4772
1.39k
4773
1.39k
  // Do not change the width of a volatile load.
4774
1.39k
  if (LoadN->isVolatile())
4775
3
    return false;
4776
1.39k
4777
1.39k
  // Do not generate loads of non-round integer types since these can
4778
1.39k
  // be expensive (and would be wrong if the type is not byte sized).
4779
1.39k
  if (!LoadedVT.bitsGT(ExtVT) || 
!ExtVT.isRound()1.38k
)
4780
841
    return false;
4781
554
4782
554
  if (LegalOperations &&
4783
554
      
!TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)453
)
4784
408
    return false;
4785
146
4786
146
  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4787
0
    return false;
4788
146
4789
146
  return true;
4790
146
}
4791
4792
bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4793
                                    ISD::LoadExtType ExtType, EVT &MemVT,
4794
122k
                                    unsigned ShAmt) {
4795
122k
  if (!LDST)
4796
0
    return false;
4797
122k
  // Only allow byte offsets.
4798
122k
  if (ShAmt % 8)
4799
19.3k
    return false;
4800
102k
4801
102k
  // Do not generate loads of non-round integer types since these can
4802
102k
  // be expensive (and would be wrong if the type is not byte sized).
4803
102k
  if (!MemVT.isRound())
4804
34.2k
    return false;
4805
68.6k
4806
68.6k
  // Don't change the width of a volatile load.
4807
68.6k
  if (LDST->isVolatile())
4808
1.19k
    return false;
4809
67.4k
4810
67.4k
  // Verify that we are actually reducing a load width here.
4811
67.4k
  if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4812
7.85k
    return false;
4813
59.5k
4814
59.5k
  // Ensure that this isn't going to produce an unsupported unaligned access.
4815
59.5k
  if (ShAmt &&
4816
59.5k
      !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4817
14.0k
                              LDST->getAddressSpace(), ShAmt / 8,
4818
14.0k
                              LDST->getMemOperand()->getFlags()))
4819
6
    return false;
4820
59.5k
4821
59.5k
  // It's not possible to generate a constant of extended or untyped type.
4822
59.5k
  EVT PtrType = LDST->getBasePtr().getValueType();
4823
59.5k
  if (PtrType == MVT::Untyped || PtrType.isExtended())
4824
0
    return false;
4825
59.5k
4826
59.5k
  if (isa<LoadSDNode>(LDST)) {
4827
59.5k
    LoadSDNode *Load = cast<LoadSDNode>(LDST);
4828
59.5k
    // Don't transform one with multiple uses, this would require adding a new
4829
59.5k
    // load.
4830
59.5k
    if (!SDValue(Load, 0).hasOneUse())
4831
40.9k
      return false;
4832
18.5k
4833
18.5k
    if (LegalOperations &&
4834
18.5k
        
!TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT)9.43k
)
4835
8.39k
      return false;
4836
10.1k
4837
10.1k
    // For the transform to be legal, the load must produce only two values
4838
10.1k
    // (the value loaded and the chain).  Don't transform a pre-increment
4839
10.1k
    // load, for example, which produces an extra value.  Otherwise the
4840
10.1k
    // transformation is not equivalent, and the downstream logic to replace
4841
10.1k
    // uses gets things wrong.
4842
10.1k
    if (Load->getNumValues() > 2)
4843
0
      return false;
4844
10.1k
4845
10.1k
    // If the load that we're shrinking is an extload and we're not just
4846
10.1k
    // discarding the extension we can't simply shrink the load. Bail.
4847
10.1k
    // TODO: It would be possible to merge the extensions in some cases.
4848
10.1k
    if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4849
10.1k
        
Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt4.72k
)
4850
1
      return false;
4851
10.1k
4852
10.1k
    if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4853
2.07k
      return false;
4854
0
  } else {
4855
0
    assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4856
0
    StoreSDNode *Store = cast<StoreSDNode>(LDST);
4857
0
    // Can't write outside the original store
4858
0
    if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4859
0
      return false;
4860
0
4861
0
    if (LegalOperations &&
4862
0
        !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4863
0
      return false;
4864
8.11k
  }
4865
8.11k
  return true;
4866
8.11k
}
4867
4868
bool DAGCombiner::SearchForAndLoads(SDNode *N,
4869
                                    SmallVectorImpl<LoadSDNode*> &Loads,
4870
                                    SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4871
                                    ConstantSDNode *Mask,
4872
434k
                                    SDNode *&NodeToMask) {
4873
434k
  // Recursively search for the operands, looking for loads which can be
4874
434k
  // narrowed.
4875
820k
  for (SDValue Op : N->op_values()) {
4876
820k
    if (Op.getValueType().isVector())
4877
0
      return false;
4878
820k
4879
820k
    // Some constants may need fixing up later if they are too large.
4880
820k
    if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4881
383k
      if ((N->getOpcode() == ISD::OR || 
N->getOpcode() == ISD::XOR383k
) &&
4882
383k
          
(Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue()1.70k
)
4883
1.14k
        NodesWithConsts.insert(N);
4884
383k
      continue;
4885
383k
    }
4886
437k
4887
437k
    if (!Op.hasOneUse())
4888
37.9k
      return false;
4889
399k
4890
399k
    switch(Op.getOpcode()) {
4891
399k
    case ISD::LOAD: {
4892
1.43k
      auto *Load = cast<LoadSDNode>(Op);
4893
1.43k
      EVT ExtVT;
4894
1.43k
      if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4895
1.43k
          
isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)199
) {
4896
185
4897
185
        // ZEXTLOAD is already small enough.
4898
185
        if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4899
185
            
ExtVT.bitsGE(Load->getMemoryVT())13
)
4900
9
          continue;
4901
176
4902
176
        // Use LE to convert equal sized loads to zext.
4903
176
        if (ExtVT.bitsLE(Load->getMemoryVT()))
4904
176
          Loads.push_back(Load);
4905
176
4906
176
        continue;
4907
176
      }
4908
1.25k
      return false;
4909
1.25k
    }
4910
26.3k
    case ISD::ZERO_EXTEND:
4911
26.3k
    case ISD::AssertZext: {
4912
26.3k
      unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4913
26.3k
      EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4914
26.3k
      EVT VT = Op.getOpcode() == ISD::AssertZext ?
4915
25.6k
        cast<VTSDNode>(Op.getOperand(1))->getVT() :
4916
26.3k
        
Op.getOperand(0).getValueType()710
;
4917
26.3k
4918
26.3k
      // We can accept extending nodes if the mask is wider or an equal
4919
26.3k
      // width to the original type.
4920
26.3k
      if (ExtVT.bitsGE(VT))
4921
25.3k
        continue;
4922
1.00k
      break;
4923
1.00k
    }
4924
8.59k
    case ISD::OR:
4925
8.59k
    case ISD::XOR:
4926
8.59k
    case ISD::AND:
4927
8.59k
      if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4928
8.59k
                             NodeToMask))
4929
5.98k
        return false;
4930
2.61k
      continue;
4931
363k
    }
4932
363k
4933
363k
    // Allow one node which will masked along with any loads found.
4934
363k
    if (NodeToMask)
4935
1.53k
      return false;
4936
362k
4937
362k
    // Also ensure that the node to be masked only produces one data result.
4938
362k
    NodeToMask = Op.getNode();
4939
362k
    if (NodeToMask->getNumValues() > 1) {
4940
22.8k
      bool HasValue = false;
4941
65.4k
      for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; 
++i42.5k
) {
4942
46.4k
        MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4943
46.4k
        if (VT != MVT::Glue && 
VT != MVT::Other45.7k
) {
4944
26.7k
          if (HasValue) {
4945
3.90k
            NodeToMask = nullptr;
4946
3.90k
            return false;
4947
3.90k
          }
4948
22.8k
          HasValue = true;
4949
22.8k
        }
4950
46.4k
      }
4951
22.8k
      assert(HasValue && "Node to be masked has no data result?");
4952
18.9k
    }
4953
362k
  }
4954
434k
  
return true384k
;
4955
434k
}
4956
4957
619k
bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4958
619k
  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4959
619k
  if (!Mask)
4960
112k
    return false;
4961
506k
4962
506k
  if (!Mask->getAPIntValue().isMask())
4963
56.2k
    return false;
4964
450k
4965
450k
  // No need to do anything if the and directly uses a load.
4966
450k
  if (isa<LoadSDNode>(N->getOperand(0)))
4967
24.3k
    return false;
4968
426k
4969
426k
  SmallVector<LoadSDNode*, 8> Loads;
4970
426k
  SmallPtrSet<SDNode*, 2> NodesWithConsts;
4971
426k
  SDNode *FixupNode = nullptr;
4972
426k
  if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4973
381k
    if (Loads.size() == 0)
4974
381k
      return false;
4975
92
4976
92
    LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4977
92
    SDValue MaskOp = N->getOperand(1);
4978
92
4979
92
    // If it exists, fixup the single node we allow in the tree that needs
4980
92
    // masking.
4981
92
    if (FixupNode) {
4982
26
      LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4983
26
      SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4984
26
                                FixupNode->getValueType(0),
4985
26
                                SDValue(FixupNode, 0), MaskOp);
4986
26
      DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4987
26
      if (And.getOpcode() == ISD ::AND)
4988
26
        DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4989
26
    }
4990
92
4991
92
    // Narrow any constants that need it.
4992
92
    for (auto *LogicN : NodesWithConsts) {
4993
15
      SDValue Op0 = LogicN->getOperand(0);
4994
15
      SDValue Op1 = LogicN->getOperand(1);
4995
15
4996
15
      if (isa<ConstantSDNode>(Op0))
4997
0
          std::swap(Op0, Op1);
4998
15
4999
15
      SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5000
15
                                Op1, MaskOp);
5001
15
5002
15
      DAG.UpdateNodeOperands(LogicN, Op0, And);
5003
15
    }
5004
92
5005
92
    // Create narrow loads.
5006
151
    for (auto *Load : Loads) {
5007
151
      LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5008
151
      SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5009
151
                                SDValue(Load, 0), MaskOp);
5010
151
      DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5011
151
      if (And.getOpcode() == ISD ::AND)
5012
151
        And = SDValue(
5013
151
            DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5014
151
      SDValue NewLoad = ReduceLoadWidth(And.getNode());
5015
151
      assert(NewLoad &&
5016
151
             "Shouldn't be masking the load if it can't be narrowed");
5017
151
      CombineTo(Load, NewLoad, NewLoad.getValue(1));
5018
151
    }
5019
92
    DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5020
92
    return true;
5021
92
  }
5022
44.6k
  return false;
5023
44.6k
}
5024
5025
// Unfold
5026
//    x &  (-1 'logical shift' y)
5027
// To
5028
//    (x 'opposite logical shift' y) 'logical shift' y
5029
// if it is better for performance.
5030
446k
SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5031
446k
  assert(N->getOpcode() == ISD::AND);
5032
446k
5033
446k
  SDValue N0 = N->getOperand(0);
5034
446k
  SDValue N1 = N->getOperand(1);
5035
446k
5036
446k
  // Do we actually prefer shifts over mask?
5037
446k
  if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5038
297k
    return SDValue();
5039
149k
5040
149k
  // Try to match  (-1 '[outer] logical shift' y)
5041
149k
  unsigned OuterShift;
5042
149k
  unsigned InnerShift; // The opposite direction to the OuterShift.
5043
149k
  SDValue Y;           // Shift amount.
5044
299k
  auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5045
299k
    if (!M.hasOneUse())
5046
70.3k
      return false;
5047
229k
    OuterShift = M->getOpcode();
5048
229k
    if (OuterShift == ISD::SHL)
5049
3.24k
      InnerShift = ISD::SRL;
5050
225k
    else if (OuterShift == ISD::SRL)
5051
18.7k
      InnerShift = ISD::SHL;
5052
207k
    else
5053
207k
      return false;
5054
22.0k
    if (!isAllOnesConstant(M->getOperand(0)))
5055
21.4k
      return false;
5056
583
    Y = M->getOperand(1);
5057
583
    return true;
5058
583
  };
5059
149k
5060
149k
  SDValue X;
5061
149k
  if (matchMask(N1))
5062
123
    X = N0;
5063
149k
  else if (matchMask(N0))
5064
460
    X = N1;
5065
149k
  else
5066
149k
    return SDValue();
5067
583
5068
583
  SDLoc DL(N);
5069
583
  EVT VT = N->getValueType(0);
5070
583
5071
583
  //     tmp = x   'opposite logical shift' y
5072
583
  SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5073
583
  //     ret = tmp 'logical shift' y
5074
583
  SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5075
583
5076
583
  return T1;
5077
583
}
5078
5079
834k
SDValue DAGCombiner::visitAND(SDNode *N) {
5080
834k
  SDValue N0 = N->getOperand(0);
5081
834k
  SDValue N1 = N->getOperand(1);
5082
834k
  EVT VT = N1.getValueType();
5083
834k
5084
834k
  // x & x --> x
5085
834k
  if (N0 == N1)
5086
23
    return N0;
5087
834k
5088
834k
  // fold vector ops
5089
834k
  if (VT.isVector()) {
5090
94.2k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
5091
245
      return FoldedVOp;
5092
94.0k
5093
94.0k
    // fold (and x, 0) -> 0, vector edition
5094
94.0k
    if (ISD::isBuildVectorAllZeros(N0.getNode()))
5095
52
      // do not return N0, because undef node may exist in N0
5096
52
      return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5097
52
                             SDLoc(N), N0.getValueType());
5098
93.9k
    if (ISD::isBuildVectorAllZeros(N1.getNode()))
5099
2
      // do not return N1, because undef node may exist in N1
5100
2
      return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5101
2
                             SDLoc(N), N1.getValueType());
5102
93.9k
5103
93.9k
    // fold (and x, -1) -> x, vector edition
5104
93.9k
    if (ISD::isBuildVectorAllOnes(N0.getNode()))
5105
54
      return N1;
5106
93.9k
    if (ISD::isBuildVectorAllOnes(N1.getNode()))
5107
5
      return N0;
5108
834k
  }
5109
834k
5110
834k
  // fold (and c1, c2) -> c1&c2
5111
834k
  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5112
834k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
5113
834k
  if (N0C && 
N1C39
&&
!N1C->isOpaque()25
)
5114
23
    return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
5115
834k
  // canonicalize constant to RHS
5116
834k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5117
834k
      
!DAG.isConstantIntBuildVectorOrConstantInt(N1)1.67k
)
5118
1.65k
    return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5119
832k
  // fold (and x, -1) -> x
5120
832k
  if (isAllOnesConstant(N1))
5121
17
    return N0;
5122
832k
  // if (and x, c) is known to be zero, return 0
5123
832k
  unsigned BitWidth = VT.getScalarSizeInBits();
5124
832k
  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5125
706k
                                   APInt::getAllOnesValue(BitWidth)))
5126
66
    return DAG.getConstant(0, SDLoc(N), VT);
5127
832k
5128
832k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
5129
227
    return NewSel;
5130
832k
5131
832k
  // reassociate and
5132
832k
  if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5133
3.36k
    return RAND;
5134
828k
5135
828k
  // Try to convert a constant mask AND into a shuffle clear mask.
5136
828k
  if (VT.isVector())
5137
92.0k
    if (SDValue Shuffle = XformToShuffleWithZero(N))
5138
2.59k
      return Shuffle;
5139
826k
5140
826k
  // fold (and (or x, C), D) -> D if (C & D) == D
5141
826k
  auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5142
6.25k
    return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5143
6.25k
  };
5144
826k
  if (N0.getOpcode() == ISD::OR &&
5145
826k
      
ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset)15.9k
)
5146
2.27k
    return N1;
5147
824k
  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5148
824k
  if (N1C && 
N0.getOpcode() == ISD::ANY_EXTEND699k
) {
5149
18.6k
    SDValue N0Op0 = N0.getOperand(0);
5150
18.6k
    APInt Mask = ~N1C->getAPIntValue();
5151
18.6k
    Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5152
18.6k
    if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5153
7.36k
      SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5154
7.36k
                                 N0.getValueType(), N0Op0);
5155
7.36k
5156
7.36k
      // Replace uses of the AND with uses of the Zero extend node.
5157
7.36k
      CombineTo(N, Zext);
5158
7.36k
5159
7.36k
      // We actually want to replace all uses of the any_extend with the
5160
7.36k
      // zero_extend, to avoid duplicating things.  This will later cause this
5161
7.36k
      // AND to be folded.
5162
7.36k
      CombineTo(N0.getNode(), Zext);
5163
7.36k
      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5164
7.36k
    }
5165
816k
  }
5166
816k
  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5167
816k
  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5168
816k
  // already be zero by virtue of the width of the base type of the load.
5169
816k
  //
5170
816k
  // the 'X' node here can either be nothing or an extract_vector_elt to catch
5171
816k
  // more cases.
5172
816k
  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5173
816k
       
N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits()29.1k
&&
5174
816k
       
N0.getOperand(0).getOpcode() == ISD::LOAD19.8k
&&
5175
816k
       
N0.getOperand(0).getResNo() == 04.03k
) ||
5176
816k
      
(812k
N0.getOpcode() == ISD::LOAD812k
&&
N0.getResNo() == 0134k
)) {
5177
138k
    LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5178
134k
                                         N0 : 
N0.getOperand(0)4.03k
);
5179
138k
5180
138k
    // Get the constant (if applicable) the zero'th operand is being ANDed with.
5181
138k
    // This can be a pure constant or a vector splat, in which case we treat the
5182
138k
    // vector as a scalar and use the splat value.
5183
138k
    APInt Constant = APInt::getNullValue(1);
5184
138k
    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5185
127k
      Constant = C->getAPIntValue();
5186
127k
    } else 
if (BuildVectorSDNode *11.0k
Vector11.0k
= dyn_cast<BuildVectorSDNode>(N1)) {
5187
2.59k
      APInt SplatValue, SplatUndef;
5188
2.59k
      unsigned SplatBitSize;
5189
2.59k
      bool HasAnyUndefs;
5190
2.59k
      bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5191
2.59k
                                             SplatBitSize, HasAnyUndefs);
5192
2.59k
      if (IsSplat) {
5193
2.59k
        // Undef bits can contribute to a possible optimisation if set, so
5194
2.59k
        // set them.
5195
2.59k
        SplatValue |= SplatUndef;
5196
2.59k
5197
2.59k
        // The splat value may be something like "0x00FFFFFF", which means 0 for
5198
2.59k
        // the first vector value and FF for the rest, repeating. We need a mask
5199
2.59k
        // that will apply equally to all members of the vector, so AND all the
5200
2.59k
        // lanes of the constant together.
5201
2.59k
        unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5202
2.59k
5203
2.59k
        // If the splat value has been compressed to a bitlength lower
5204
2.59k
        // than the size of the vector lane, we need to re-expand it to
5205
2.59k
        // the lane size.
5206
2.59k
        if (EltBitWidth > SplatBitSize)
5207
1
          for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5208
2
               SplatBitSize < EltBitWidth; 
SplatBitSize = SplatBitSize * 21
)
5209
1
            SplatValue |= SplatValue.shl(SplatBitSize);
5210
2.59k
5211
2.59k
        // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5212
2.59k
        // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5213
2.59k
        if ((SplatBitSize % EltBitWidth) == 0) {
5214
2.59k
          Constant = APInt::getAllOnesValue(EltBitWidth);
5215
5.32k
          for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; 
++i2.73k
)
5216
2.73k
            Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5217
2.59k
        }
5218
2.59k
      }
5219
2.59k
    }
5220
138k
5221
138k
    // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5222
138k
    // actually legal and isn't going to get expanded, else this is a false
5223
138k
    // optimisation.
5224
138k
    bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5225
138k
                                                    Load->getValueType(0),
5226
138k
                                                    Load->getMemoryVT());
5227
138k
5228
138k
    // Resize the constant to the same size as the original memory access before
5229
138k
    // extension. If it is still the AllOnesValue then this AND is completely
5230
138k
    // unneeded.
5231
138k
    Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5232
138k
5233
138k
    bool B;
5234
138k
    switch (Load->getExtensionType()) {
5235
138k
    
default: B = false; break876
;
5236
138k
    
case ISD::EXTLOAD: B = CanZextLoadProfitably; break78.9k
;
5237
138k
    case ISD::ZEXTLOAD:
5238
58.3k
    case ISD::NON_EXTLOAD: B = true; break;
5239
138k
    }
5240
138k
5241
138k
    if (B && 
Constant.isAllOnesValue()132k
) {
5242
72.4k
      // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5243
72.4k
      // preserve semantics once we get rid of the AND.
5244
72.4k
      SDValue NewLoad(Load, 0);
5245
72.4k
5246
72.4k
      // Fold the AND away. NewLoad may get replaced immediately.
5247
72.4k
      CombineTo(N, (N0.getNode() == Load) ? 
NewLoad72.4k
:
N01
);
5248
72.4k
5249
72.4k
      if (Load->getExtensionType() == ISD::EXTLOAD) {
5250
70.5k
        NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5251
70.5k
                              Load->getValueType(0), SDLoc(Load),
5252
70.5k
                              Load->getChain(), Load->getBasePtr(),
5253
70.5k
                              Load->getOffset(), Load->getMemoryVT(),
5254
70.5k
                              Load->getMemOperand());
5255
70.5k
        // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5256
70.5k
        if (Load->getNumValues() == 3) {
5257
0
          // PRE/POST_INC loads have 3 values.
5258
0
          SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5259
0
                           NewLoad.getValue(2) };
5260
0
          CombineTo(Load, To, 3, true);
5261
70.5k
        } else {
5262
70.5k
          CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5263
70.5k
        }
5264
70.5k
      }
5265
72.4k
5266
72.4k
      return SDValue(N, 0); // Return N so it doesn't get rechecked!
5267
72.4k
    }
5268
744k
  }
5269
744k
5270
744k
  // fold (and (load x), 255) -> (zextload x, i8)
5271
744k
  // fold (and (extload x, i16), 255) -> (zextload x, i8)
5272
744k
  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5273
744k
  if (!VT.isVector() && 
N1C655k
&&
(590k
N0.getOpcode() == ISD::LOAD590k
||
5274
590k
                                
(538k
N0.getOpcode() == ISD::ANY_EXTEND538k
&&
5275
538k
                                 
N0.getOperand(0).getOpcode() == ISD::LOAD11.1k
))) {
5276
52.4k
    if (SDValue Res = ReduceLoadWidth(N)) {
5277
233
      LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5278
233
        ? 
cast<LoadSDNode>(N0.getOperand(0))0
: cast<LoadSDNode>(N0);
5279
233
      AddToWorklist(N);
5280
233
      DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5281
233
      return SDValue(N, 0);
5282
233
    }
5283
744k
  }
5284
744k
5285
744k
  if (Level >= AfterLegalizeTypes) {
5286
619k
    // Attempt to propagate the AND back up to the leaves which, if they're
5287
619k
    // loads, can be combined to narrow loads and the AND node can be removed.
5288
619k
    // Perform after legalization so that extend nodes will already be
5289
619k
    // combined into the loads.
5290
619k
    if (BackwardsPropagateMask(N, DAG)) {
5291
92
      return SDValue(N, 0);
5292
92
    }
5293
743k
  }
5294
743k
5295
743k
  if (SDValue Combined = visitANDLike(N0, N1, N))
5296
4.54k
    return Combined;
5297
739k
5298
739k
  // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
5299
739k
  if (N0.getOpcode() == N1.getOpcode())
5300
35.4k
    if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5301
495
      return V;
5302
738k
5303
738k
  // Masking the negated extension of a boolean is just the zero-extended
5304
738k
  // boolean:
5305
738k
  // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5306
738k
  // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5307
738k
  //
5308
738k
  // Note: the SimplifyDemandedBits fold below can make an information-losing
5309
738k
  // transform, and then we have no way to find this better fold.
5310
738k
  if (N1C && 
N1C->isOne()619k
&&
N0.getOpcode() == ISD::SUB335k
) {
5311
83
    if (isNullOrNullSplat(N0.getOperand(0))) {
5312
6
      SDValue SubRHS = N0.getOperand(1);
5313
6
      if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5314
6
          
SubRHS.getOperand(0).getScalarValueSizeInBits() == 13
)
5315
3
        return SubRHS;
5316
3
      if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5317
3
          SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5318
3
        return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5319
738k
    }
5320
83
  }
5321
738k
5322
738k
  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5323
738k
  // fold (and (sra)) -> (and (srl)) when possible.
5324
738k
  if (SimplifyDemandedBits(SDValue(N, 0)))
5325
287k
    return SDValue(N, 0);
5326
451k
5327
451k
  // fold (zext_inreg (extload x)) -> (zextload x)
5328
451k
  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5329
451k
  if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5330
451k
      
(61.4k
ISD::isEXTLoad(N0.getNode())61.4k
||
5331
61.4k
       
(54.6k
ISD::isSEXTLoad(N0.getNode())54.6k
&&
N0.hasOneUse()875
))) {
5332
7.14k
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5333
7.14k
    EVT MemVT = LN0->getMemoryVT();
5334
7.14k
    // If we zero all the possible extended bits, then we can turn this into
5335
7.14k
    // a zextload if we are running before legalize or the operation is legal.
5336
7.14k
    unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5337
7.14k
    unsigned MemBitSize = MemVT.getScalarSizeInBits();
5338
7.14k
    APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5339
7.14k
    if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5340
7.14k
        
(6.90k
(6.90k
!LegalOperations6.90k
&&
!LN0->isVolatile()4.22k
) ||
5341
6.90k
         
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)2.68k
)) {
5342
4.95k
      SDValue ExtLoad =
5343
4.95k
          DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5344
4.95k
                         LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5345
4.95k
      AddToWorklist(N);
5346
4.95k
      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5347
4.95k
      return SDValue(N, 0); // Return N so it doesn't get rechecked!
5348
4.95k
    }
5349
446k
  }
5350
446k
5351
446k
  // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5352
446k
  if (N1C && 
N1C->getAPIntValue() == 0xffff328k
&&
N0.getOpcode() == ISD::OR28.4k
) {
5353
477
    if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5354
1
                                           N0.getOperand(1), false))
5355
1
      return BSwap;
5356
446k
  }
5357
446k
5358
446k
  if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5359
583
    return Shifts;
5360
446k
5361
446k
  return SDValue();
5362
446k
}
5363
5364
/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5365
SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5366
220k
                                        bool DemandHighBits) {
5367
220k
  if (!LegalOperations)
5368
107k
    return SDValue();
5369
112k
5370
112k
  EVT VT = N->getValueType(0);
5371
112k
  if (VT != MVT::i64 && 
VT != MVT::i3288.0k
&&
VT != MVT::i1632.4k
)
5372
31.4k
    return SDValue();
5373
81.4k
  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5374
15.1k
    return SDValue();
5375
66.2k
5376
66.2k
  // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5377
66.2k
  bool LookPassAnd0 = false;
5378
66.2k
  bool LookPassAnd1 = false;
5379
66.2k
  if (N0.getOpcode() == ISD::AND && 
N0.getOperand(0).getOpcode() == ISD::SRL6.01k
)
5380
779
      std::swap(N0, N1);
5381
66.2k
  if (N1.getOpcode() == ISD::AND && 
N1.getOperand(0).getOpcode() == ISD::SHL4.31k
)
5382
335
      std::swap(N0, N1);
5383
66.2k
  if (N0.getOpcode() == ISD::AND) {
5384
5.61k
    if (!N0.getNode()->hasOneUse())
5385
880
      return SDValue();
5386
4.73k
    ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5387
4.73k
    // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5388
4.73k
    // This is needed for X86.
5389
4.73k
    if (!N01C || 
(4.17k
N01C->getZExtValue() != 0xFF004.17k
&&
5390
4.17k
                  
N01C->getZExtValue() != 0xFFFF4.16k
))
5391
3.74k
      return SDValue();
5392
991
    N0 = N0.getOperand(0);
5393
991
    LookPassAnd0 = true;
5394
991
  }
5395
66.2k
5396
66.2k
  
if (61.6k
N1.getOpcode() == ISD::AND61.6k
) {
5397
2.35k
    if (!N1.getNode()->hasOneUse())
5398
104
      return SDValue();
5399
2.25k
    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5400
2.25k
    if (!N11C || 
N11C->getZExtValue() != 0xFF2.12k
)
5401
2.09k
      return SDValue();
5402
159
    N1 = N1.getOperand(0);
5403
159
    LookPassAnd1 = true;
5404
159
  }
5405
61.6k
5406
61.6k
  
if (59.4k
N0.getOpcode() == ISD::SRL59.4k
&&
N1.getOpcode() == ISD::SHL1.79k
)
5407
1.15k
    std::swap(N0, N1);
5408
59.4k
  if (N0.getOpcode() != ISD::SHL || 
N1.getOpcode() != ISD::SRL6.29k
)
5409
57.1k
    return SDValue();
5410
2.34k
  if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5411
65
    return SDValue();
5412
2.27k
5413
2.27k
  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5414
2.27k
  ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5415
2.27k
  if (!N01C || 
!N11C1.61k
)
5416
666
    return SDValue();
5417
1.60k
  if (N01C->getZExtValue() != 8 || 
N11C->getZExtValue() != 850
)
5418
1.58k
    return SDValue();
5419
26
5420
26
  // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5421
26
  SDValue N00 = N0->getOperand(0);
5422
26
  if (!LookPassAnd0 && 
N00.getOpcode() == ISD::AND17
) {
5423
6
    if (!N00.getNode()->hasOneUse())
5424
0
      return SDValue();
5425
6
    ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5426
6
    if (!N001C || N001C->getZExtValue() != 0xFF)
5427
2
      return SDValue();
5428
4
    N00 = N00.getOperand(0);
5429
4
    LookPassAnd0 = true;
5430
4
  }
5431
26
5432
26
  SDValue N10 = N1->getOperand(0);
5433
24
  if (!LookPassAnd1 && 
N10.getOpcode() == ISD::AND18
) {
5434
5
    if (!N10.getNode()->hasOneUse())
5435
0
      return SDValue();
5436
5
    ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5437
5
    // Also allow 0xFFFF since the bits will be shifted out. This is needed
5438
5
    // for X86.
5439
5
    if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5440
5
                   N101C->getZExtValue() != 0xFFFF))
5441
0
      return SDValue();
5442
5
    N10 = N10.getOperand(0);
5443
5
    LookPassAnd1 = true;
5444
5
  }
5445
24
5446
24
  if (N00 != N10)
5447
0
    return SDValue();
5448
24
5449
24
  // Make sure everything beyond the low halfword gets set to zero since the SRL
5450
24
  // 16 will clear the top bits.
5451
24
  unsigned OpSizeInBits = VT.getSizeInBits();
5452
24
  if (DemandHighBits && 
OpSizeInBits > 1615
) {
5453
15
    // If the left-shift isn't masked out then the only way this is a bswap is
5454
15
    // if all bits beyond the low 8 are 0. In that case the entire pattern
5455
15
    // reduces to a left shift anyway: leave it for other parts of the combiner.
5456
15
    if (!LookPassAnd0)
5457
2
      return SDValue();
5458
13
5459
13
    // However, if the right shift isn't masked out then it might be because
5460
13
    // it's not needed. See if we can spot that too.
5461
13
    if (!LookPassAnd1 &&
5462
13
        !DAG.MaskedValueIsZero(
5463
8
            N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5464
0
      return SDValue();
5465
22
  }
5466
22
5467
22
  SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5468
22
  if (OpSizeInBits > 16) {
5469
22
    SDLoc DL(N);
5470
22
    Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5471
22
                      DAG.getConstant(OpSizeInBits - 16, DL,
5472
22
                                      getShiftAmountTy(VT)));
5473
22
  }
5474
22
  return Res;
5475
22
}
5476
5477
/// Return true if the specified node is an element that makes up a 32-bit
5478
/// packed halfword byteswap.
5479
/// ((x & 0x000000ff) << 8) |
5480
/// ((x & 0x0000ff00) >> 8) |
5481
/// ((x & 0x00ff0000) << 8) |
5482
/// ((x & 0xff000000) >> 8)
5483
3.25k
static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5484
3.25k
  if (!N.getNode()->hasOneUse())
5485
299
    return false;
5486
2.95k
5487
2.95k
  unsigned Opc = N.getOpcode();
5488
2.95k
  if (Opc != ISD::AND && 
Opc != ISD::SHL2.76k
&&
Opc != ISD::SRL2.01k
)
5489
1.97k
    return false;
5490
977
5491
977
  SDValue N0 = N.getOperand(0);
5492
977
  unsigned Opc0 = N0.getOpcode();
5493
977
  if (Opc0 != ISD::AND && 
Opc0 != ISD::SHL433
&&
Opc0 != ISD::SRL402
)
5494
303
    return false;
5495
674
5496
674
  ConstantSDNode *N1C = nullptr;
5497
674
  // SHL or SRL: look upstream for AND mask operand
5498
674
  if (Opc == ISD::AND)
5499
130
    N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5500
544
  else if (Opc0 == ISD::AND)
5501
544
    N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5502
674
  if (!N1C)
5503
16
    return false;
5504
658
5505
658
  unsigned MaskByteOffset;
5506
658
  switch (N1C->getZExtValue()) {
5507
658
  default:
5508
620
    return false;
5509
658
  
case 0xFF: MaskByteOffset = 0; break10
;
5510
658
  
case 0xFF00: MaskByteOffset = 1; break4
;
5511
658
  case 0xFFFF:
5512
6
    // In case demanded bits didn't clear the bits that will be shifted out.
5513
6
    // This is needed for X86.
5514
6
    if (Opc == ISD::SRL || 
(2
Opc == ISD::AND2
&&
Opc0 == ISD::SHL2
)) {
5515
6
      MaskByteOffset = 1;
5516
6
      break;
5517
6
    }
5518
0
    return false;
5519
10
  case 0xFF0000:   MaskByteOffset = 2; break;
5520
8
  case 0xFF000000: MaskByteOffset = 3; break;
5521
38
  }
5522
38
5523
38
  // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5524
38
  if (Opc == ISD::AND) {
5525
24
    if (MaskByteOffset == 0 || 
MaskByteOffset == 218
) {
5526
12
      // (x >> 8) & 0xff
5527
12
      // (x >> 8) & 0xff0000
5528
12
      if (Opc0 != ISD::SRL)
5529
0
        return false;
5530
12
      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5531
12
      if (!C || C->getZExtValue() != 8)
5532
0
        return false;
5533
12
    } else {
5534
12
      // (x << 8) & 0xff00
5535
12
      // (x << 8) & 0xff000000
5536
12
      if (Opc0 != ISD::SHL)
5537
0
        return false;
5538
12
      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5539
12
      if (!C || C->getZExtValue() != 8)
5540
0
        return false;
5541
14
    }
5542
14
  } else if (Opc == ISD::SHL) {
5543
8
    // (x & 0xff) << 8
5544
8
    // (x & 0xff0000) << 8
5545
8
    if (MaskByteOffset != 0 && 
MaskByteOffset != 24
)
5546
0
      return false;
5547
8
    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5548
8
    if (!C || C->getZExtValue() != 8)
5549
0
      return false;
5550
6
  } else { // Opc == ISD::SRL
5551
6
    // (x & 0xff00) >> 8
5552
6
    // (x & 0xff000000) >> 8
5553
6
    if (MaskByteOffset != 1 && 
MaskByteOffset != 32
)
5554
0
      return false;
5555
6
    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5556
6
    if (!C || C->getZExtValue() != 8)
5557
0
      return false;
5558
38
  }
5559
38
5560
38
  if (Parts[MaskByteOffset])
5561
0
    return false;
5562
38
5563
38
  Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5564
38
  return true;
5565
38
}
5566
5567
/// Match a 32-bit packed halfword bswap. That is
5568
/// ((x & 0x000000ff) << 8) |
5569
/// ((x & 0x0000ff00) >> 8) |
5570
/// ((x & 0x00ff0000) << 8) |
5571
/// ((x & 0xff000000) >> 8)
5572
/// => (rotl (bswap x), 16)
5573
219k
SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5574
219k
  if (!LegalOperations)
5575
106k
    return SDValue();
5576
112k
5577
112k
  EVT VT = N->getValueType(0);
5578
112k
  if (VT != MVT::i32)
5579
57.2k
    return SDValue();
5580
55.3k
  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5581
10.9k
    return SDValue();
5582
44.3k
5583
44.3k
  // Look for either
5584
44.3k
  // (or (or (and), (and)), (or (and), (and)))
5585
44.3k
  // (or (or (or (and), (and)), (and)), (and))
5586
44.3k
  if (N0.getOpcode() != ISD::OR)
5587
41.1k
    return SDValue();
5588
3.22k
  SDValue N00 = N0.getOperand(0);
5589
3.22k
  SDValue N01 = N0.getOperand(1);
5590
3.22k
  SDNode *Parts[4] = {};
5591
3.22k
5592
3.22k
  if (N1.getOpcode() == ISD::OR &&
5593
3.22k
      
N00.getNumOperands() == 2104
&&
N01.getNumOperands() == 292
) {
5594
92
    // (or (or (and), (and)), (or (and), (and)))
5595
92
    if (!isBSwapHWordElement(N00, Parts))
5596
86
      return SDValue();
5597
6
5598
6
    if (!isBSwapHWordElement(N01, Parts))
5599
0
      return SDValue();
5600
6
    SDValue N10 = N1.getOperand(0);
5601
6
    if (!isBSwapHWordElement(N10, Parts))
5602
0
      return SDValue();
5603
6
    SDValue N11 = N1.getOperand(1);
5604
6
    if (!isBSwapHWordElement(N11, Parts))
5605
2
      return SDValue();
5606
3.13k
  } else {
5607
3.13k
    // (or (or (or (and), (and)), (and)), (and))
5608
3.13k
    if (!isBSwapHWordElement(N1, Parts))
5609
3.12k
      return SDValue();
5610
4
    if (!isBSwapHWordElement(N01, Parts))
5611
0
      return SDValue();
5612
4
    if (N00.getOpcode() != ISD::OR)
5613
0
      return SDValue();
5614
4
    SDValue N000 = N00.getOperand(0);
5615
4
    if (!isBSwapHWordElement(N000, Parts))
5616
0
      return SDValue();
5617
4
    SDValue N001 = N00.getOperand(1);
5618
4
    if (!isBSwapHWordElement(N001, Parts))
5619
0
      return SDValue();
5620
8
  }
5621
8
5622
8
  // Make sure the parts are all coming from the same node.
5623
8
  if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5624
0
    return SDValue();
5625
8
5626
8
  SDLoc DL(N);
5627
8
  SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5628
8
                              SDValue(Parts[0], 0));
5629
8
5630
8
  // Result of the bswap should be rotated by 16. If it's not legal, then
5631
8
  // do  (x << 16) | (x >> 16).
5632
8
  SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5633
8
  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5634
4
    return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5635
4
  if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5636
4
    return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5637
0
  return DAG.getNode(ISD::OR, DL, VT,
5638
0
                     DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5639
0
                     DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5640
0
}
5641
5642
/// This contains all DAGCombine rules which reduce two values combined by
5643
/// an Or operation to a single value \see visitANDLike().
5644
225k
SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5645
225k
  EVT VT = N1.getValueType();
5646
225k
  SDLoc DL(N);
5647
225k
5648
225k
  // fold (or x, undef) -> -1
5649
225k
  if (!LegalOperations && 
(112k
N0.isUndef()112k
||
N1.isUndef()112k
))
5650
1
    return DAG.getAllOnesConstant(DL, VT);
5651
225k
5652
225k
  if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5653
5.69k
    return V;
5654
219k
5655
219k
  // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
5656
219k
  if (N0.getOpcode() == ISD::AND && 
N1.getOpcode() == ISD::AND40.1k
&&
5657
219k
      // Don't increase # computations.
5658
219k
      
(15.8k
N0.getNode()->hasOneUse()15.8k
||
N1.getNode()->hasOneUse()46
)) {
5659
15.8k
    // We can only do this xform if we know that bits from X that are set in C2
5660
15.8k
    // but not in C1 are already zero.  Likewise for Y.
5661
15.8k
    if (const ConstantSDNode *N0O1C =
5662
5.23k
        getAsNonOpaqueConstant(N0.getOperand(1))) {
5663
5.23k
      if (const ConstantSDNode *N1O1C =
5664
5.15k
          getAsNonOpaqueConstant(N1.getOperand(1))) {
5665
5.15k
        // We can only do this xform if we know that bits from X that are set in
5666
5.15k
        // C2 but not in C1 are already zero.  Likewise for Y.
5667
5.15k
        const APInt &LHSMask = N0O1C->getAPIntValue();
5668
5.15k
        const APInt &RHSMask = N1O1C->getAPIntValue();
5669
5.15k
5670
5.15k
        if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5671
5.15k
            
DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)388
) {
5672
35
          SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5673
35
                                  N0.getOperand(0), N1.getOperand(0));
5674
35
          return DAG.getNode(ISD::AND, DL, VT, X,
5675
35
                             DAG.getConstant(LHSMask | RHSMask, DL, VT));
5676
35
        }
5677
219k
      }
5678
5.23k
    }
5679
15.8k
  }
5680
219k
5681
219k
  // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5682
219k
  if (N0.getOpcode() == ISD::AND &&
5683
219k
      
N1.getOpcode() == ISD::AND40.1k
&&
5684
219k
      
N0.getOperand(0) == N1.getOperand(0)15.8k
&&
5685
219k
      // Don't increase # computations.
5686
219k
      
(215
N0.getNode()->hasOneUse()215
||
N1.getNode()->hasOneUse()2
)) {
5687
213
    SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5688
213
                            N0.getOperand(1), N1.getOperand(1));
5689
213
    return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5690
213
  }
5691
219k
5692
219k
  return SDValue();
5693
219k
}
5694
5695
/// OR combines for which the commuted variant will be tried as well.
5696
static SDValue visitORCommutative(
5697
438k
    SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
5698
438k
  EVT VT = N0.getValueType();
5699
438k
  if (N0.getOpcode() == ISD::AND) {
5700
61.4k
    // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
5701
61.4k
    if (isBitwiseNot(N0.getOperand(1)) && 
N0.getOperand(1).getOperand(0) == N17.30k
)
5702
50
      return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
5703
61.3k
5704
61.3k
    // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
5705
61.3k
    if (isBitwiseNot(N0.getOperand(0)) && 
N0.getOperand(0).getOperand(0) == N12.05k
)
5706
14
      return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
5707
438k
  }
5708
438k
5709
438k
  return SDValue();
5710
438k
}
5711
5712
228k
SDValue DAGCombiner::visitOR(SDNode *N) {
5713
228k
  SDValue N0 = N->getOperand(0);
5714
228k
  SDValue N1 = N->getOperand(1);
5715
228k
  EVT VT = N1.getValueType();
5716
228k
5717
228k
  // x | x --> x
5718
228k
  if (N0 == N1)
5719
18
    return N0;
5720
228k
5721
228k
  // fold vector ops
5722
228k
  if (VT.isVector()) {
5723
40.7k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
5724
16
      return FoldedVOp;
5725
40.7k
5726
40.7k
    // fold (or x, 0) -> x, vector edition
5727
40.7k
    if (ISD::isBuildVectorAllZeros(N0.getNode()))
5728
89
      return N1;
5729
40.6k
    if (ISD::isBuildVectorAllZeros(N1.getNode()))
5730
197
      return N0;
5731
40.4k
5732
40.4k
    // fold (or x, -1) -> -1, vector edition
5733
40.4k
    if (ISD::isBuildVectorAllOnes(N0.getNode()))
5734
5
      // do not return N0, because undef node may exist in N0
5735
5
      return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5736
40.4k
    if (ISD::isBuildVectorAllOnes(N1.getNode()))
5737
1
      // do not return N1, because undef node may exist in N1
5738
1
      return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5739
40.4k
5740
40.4k
    // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5741
40.4k
    // Do this only if the resulting shuffle is legal.
5742
40.4k
    if (isa<ShuffleVectorSDNode>(N0) &&
5743
40.4k
        
isa<ShuffleVectorSDNode>(N1)254
&&
5744
40.4k
        // Avoid folding a node with illegal type.
5745
40.4k
        
TLI.isTypeLegal(VT)205
) {
5746
204
      bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5747
204
      bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5748
204
      bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5749
204
      bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5750
204
      // Ensure both shuffles have a zero input.
5751
204
      if ((ZeroN00 != ZeroN01) && 
(ZeroN10 != ZeroN11)68
) {
5752
68
        assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5753
68
        assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5754
68
        const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5755
68
        const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5756
68
        bool CanFold = true;
5757
68
        int NumElts = VT.getVectorNumElements();
5758
68
        SmallVector<int, 4> Mask(NumElts);
5759
68
5760
302
        for (int i = 0; i != NumElts; 
++i234
) {
5761
241
          int M0 = SV0->getMaskElt(i);
5762
241
          int M1 = SV1->getMaskElt(i);
5763
241
5764
241
          // Determine if either index is pointing to a zero vector.
5765
241
          bool M0Zero = M0 < 0 || 
(ZeroN00 == (M0 < NumElts))240
;
5766
241
          bool M1Zero = M1 < 0 || 
(ZeroN10 == (M1 < NumElts))240
;
5767
241
5768
241
          // If one element is zero and the otherside is undef, keep undef.
5769
241
          // This also handles the case that both are undef.
5770
241
          if ((M0Zero && 
M1 < 095
) ||
(240
M1Zero240
&&
M0 < 0143
)) {
5771
1
            Mask[i] = -1;
5772
1
            continue;
5773
1
          }
5774
240
5775
240
          // Make sure only one of the elements is zero.
5776
240
          if (M0Zero == M1Zero) {
5777
7
            CanFold = false;
5778
7
            break;
5779
7
          }
5780
233
5781
233
          assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5782
233
5783
233
          // We have a zero and non-zero element. If the non-zero came from
5784
233
          // SV0 make the index a LHS index. If it came from SV1, make it
5785
233
          // a RHS index. We need to mod by NumElts because we don't care
5786
233
          // which operand it came from in the original shuffles.
5787
233
          Mask[i] = M1Zero ? 
M0 % NumElts141
:
(M1 % NumElts) + NumElts92
;
5788
233
        }
5789
68
5790
68
        if (CanFold) {
5791
61
          SDValue NewLHS = ZeroN00 ? 
N0.getOperand(1)2
:
N0.getOperand(0)59
;
5792
61
          SDValue NewRHS = ZeroN10 ? 
N1.getOperand(1)2
:
N1.getOperand(0)59
;
5793
61
5794
61
          bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5795
61
          if (!LegalMask) {
5796
0
            std::swap(NewLHS, NewRHS);
5797
0
            ShuffleVectorSDNode::commuteMask(Mask);
5798
0
            LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5799
0
          }
5800
61
5801
61
          if (LegalMask)
5802
61
            return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
5803
228k
        }
5804
68
      }
5805
204
    }
5806
40.4k
  }
5807
228k
5808
228k
  // fold (or c1, c2) -> c1|c2
5809
228k
  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5810
228k
  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5811
228k
  if (N0C && 
N1C1.93k
&&
!N1C->isOpaque()132
)
5812
130
    return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5813
228k
  // canonicalize constant to RHS
5814
228k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5815
228k
     
!DAG.isConstantIntBuildVectorOrConstantInt(N1)3.28k
)
5816
1.80k
    return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5817
226k
  // fold (or x, 0) -> x
5818
226k
  if (isNullConstant(N1))
5819
838
    return N0;
5820
225k
  // fold (or x, -1) -> -1
5821
225k
  if (isAllOnesConstant(N1))
5822
85
    return N1;
5823
225k
5824
225k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
5825
54
    return NewSel;
5826
225k
5827
225k
  // fold (or x, c) -> c iff (x & ~c) == 0
5828
225k
  if (N1C && 
DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())62.4k
)
5829
27
    return N1;
5830
225k
5831
225k
  if (SDValue Combined = visitORLike(N0, N1, N))
5832
5.93k
    return Combined;
5833
219k
5834
219k
  // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5835
219k
  if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5836
8
    return BSwap;
5837
219k
  if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5838
13
    return BSwap;
5839
219k
5840
219k
  // reassociate or
5841
219k
  if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5842
80
    return ROR;
5843
219k
5844
219k
  // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5845
219k
  // iff (c1 & c2) != 0 or c1/c2 are undef.
5846
219k
  auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
5847
2.87k
    return !C1 || 
!C22.86k
||
C1->getAPIntValue().intersects(C2->getAPIntValue())2.86k
;
5848
2.87k
  };
5849
219k
  if (N0.getOpcode() == ISD::AND && 
N0.getNode()->hasOneUse()39.8k
&&
5850
219k
      
ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)38.0k
) {
5851
69
    if (SDValue COR = DAG.FoldConstantArithmetic(
5852
69
            ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
5853
69
      SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5854
69
      AddToWorklist(IOR.getNode());
5855
69
      return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5856
69
    }
5857
219k
  }
5858
219k
5859
219k
  if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
5860
7
    return Combined;
5861
219k
  if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
5862
57
    return Combined;
5863
219k
5864
219k
  // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
5865
219k
  if (N0.getOpcode() == N1.getOpcode())
5866
53.0k
    if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5867
799
      return V;
5868
218k
5869
218k
  // See if this is some rotate idiom.
5870
218k
  if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
5871
1.87k
    return SDValue(Rot, 0);
5872
216k
5873
216k
  if (SDValue Load = MatchLoadCombine(N))
5874
226
    return Load;
5875
216k
5876
216k
  // Simplify the operands using demanded-bits information.
5877
216k
  if (SimplifyDemandedBits(SDValue(N, 0)))
5878
8.92k
    return SDValue(N, 0);
5879
207k
5880
207k
  // If OR can be rewritten into ADD, try combines based on ADD.
5881
207k
  if ((!LegalOperations || 
TLI.isOperationLegal(ISD::ADD, VT)108k
) &&
5882
207k
      
DAG.haveNoCommonBitsSet(N0, N1)202k
)
5883
87.2k
    if (SDValue Combined = visitADDLike(N))
5884
374
      return Combined;
5885
207k
5886
207k
  return SDValue();
5887
207k
}
5888
5889
373k
static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
5890
373k
  if (Op.getOpcode() == ISD::AND &&
5891
373k
      
DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))39.4k
) {
5892
32.9k
    Mask = Op.getOperand(1);
5893
32.9k
    return Op.getOperand(0);
5894
32.9k
  }
5895
340k
  return Op;
5896
340k
}
5897
5898
/// Match "(X shl/srl V1) & V2" where V2 may not be present.
5899
static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5900
316k
                            SDValue &Mask) {
5901
316k
  Op = stripConstantMask(DAG, Op, Mask);
5902
316k
  if (Op.getOpcode() == ISD::SRL || 
Op.getOpcode() == ISD::SHL301k
) {
5903
56.0k
    Shift = Op;
5904
56.0k
    return true;
5905
56.0k
  }
5906
260k
  return false;
5907
260k
}
5908
5909
/// Helper function for visitOR to extract the needed side of a rotate idiom
5910
/// from a shl/srl/mul/udiv.  This is meant to handle cases where
5911
/// InstCombine merged some outside op with one of the shifts from
5912
/// the rotate pattern.
5913
/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5914
/// Otherwise, returns an expansion of \p ExtractFrom based on the following
5915
/// patterns:
5916
///
5917
///   (or (mul v c0) (shrl (mul v c1) c2)):
5918
///     expands (mul v c0) -> (shl (mul v c1) c3)
5919
///
5920
///   (or (udiv v c0) (shl (udiv v c1) c2)):
5921
///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
5922
///
5923
///   (or (shl v c0) (shrl (shl v c1) c2)):
5924
///     expands (shl v c0) -> (shl (shl v c1) c3)
5925
///
5926
///   (or (shrl v c0) (shl (shrl v c1) c2)):
5927
///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
5928
///
5929
/// Such that in all cases, c3+c2==bitwidth(op v c1).
5930
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
5931
                                     SDValue ExtractFrom, SDValue &Mask,
5932
56.0k
                                     const SDLoc &DL) {
5933
56.0k
  assert(OppShift && ExtractFrom && "Empty SDValue");
5934
56.0k
  assert(
5935
56.0k
      (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
5936
56.0k
      "Existing shift must be valid as a rotate half");
5937
56.0k
5938
56.0k
  ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
5939
56.0k
  // Preconditions:
5940
56.0k
  //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5941
56.0k
  //
5942
56.0k
  // Find opcode of the needed shift to be extracted from (op0 v c0).
5943
56.0k
  unsigned Opcode = ISD::DELETED_NODE;
5944
56.0k
  bool IsMulOrDiv = false;
5945
56.0k
  // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5946
56.0k
  // opcode or its arithmetic (mul or udiv) variant.
5947
56.0k
  auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
5948
56.0k
    IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
5949
56.0k
    if (!IsMulOrDiv && 
ExtractFrom.getOpcode() != NeededShift56.0k
)
5950
35.3k
      return false;
5951
20.6k
    Opcode = NeededShift;
5952
20.6k
    return true;
5953
20.6k
  };
5954
56.0k
  // op0 must be either the needed shift opcode or the mul/udiv equivalent
5955
56.0k
  // that the needed shift can be extracted from.
5956
56.0k
  if ((OppShift.getOpcode() != ISD::SRL || 
!SelectOpcode(ISD::SHL, ISD::MUL)15.0k
) &&
5957
56.0k
      
(45.7k
OppShift.getOpcode() != ISD::SHL45.7k
||
!SelectOpcode(ISD::SRL, ISD::UDIV)41.0k
))
5958
35.3k
    return SDValue();
5959
20.6k
5960
20.6k
  // op0 must be the same opcode on both sides, have the same LHS argument,
5961
20.6k
  // and produce the same value type.
5962
20.6k
  SDValue OppShiftLHS = OppShift.getOperand(0);
5963
20.6k
  EVT ShiftedVT = OppShiftLHS.getValueType();
5964
20.6k
  if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
5965
20.6k
      
OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0)223
||
5966
20.6k
      
ShiftedVT != ExtractFrom.getValueType()167
)
5967
20.5k
    return SDValue();
5968
167
5969
167
  // Amount of the existing shift.
5970
167
  ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
5971
167
  // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5972
167
  ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
5973
167
  // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5974
167
  ConstantSDNode *ExtractFromCst =
5975
167
      isConstOrConstSplat(ExtractFrom.getOperand(1));
5976
167
  // TODO: We should be able to handle non-uniform constant vectors for these values
5977
167
  // Check that we have constant values.
5978
167
  if (!OppShiftCst || 
!OppShiftCst->getAPIntValue()165
||
5979
167
      
!OppLHSCst165
||
!OppLHSCst->getAPIntValue()159
||
5980
167
      
!ExtractFromCst159
||
!ExtractFromCst->getAPIntValue()159
)
5981
8
    return SDValue();
5982
159
5983
159
  // Compute the shift amount we need to extract to complete the rotate.
5984
159
  const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
5985
159
  if (OppShiftCst->getAPIntValue().ugt(VTWidth))
5986
0
    return SDValue();
5987
159
  APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
5988
159
  // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5989
159
  APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
5990
159
  APInt OppLHSAmt = OppLHSCst->getAPIntValue();
5991
159
  zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
5992
159
5993
159
  // Now try extract the needed shift from the ExtractFrom op and see if the
5994
159
  // result matches up with the existing shift's LHS op.
5995
159
  if (IsMulOrDiv) {
5996
43
    // Op to extract from is a mul or udiv by a constant.
5997
43
    // Check:
5998
43
    //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5999
43
    //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6000
43
    const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6001
43
                                                 NeededShiftAmt.getZExtValue());
6002
43
    APInt ResultAmt;
6003
43
    APInt Rem;
6004
43
    APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6005
43
    if (Rem != 0 || 
ResultAmt != OppLHSAmt38
)
6006
29
      return SDValue();
6007
116
  } else {
6008
116
    // Op to extract from is a shift by a constant.
6009
116
    // Check:
6010
116
    //      c2 - (bitwidth(op0 v c0) - c1) == c0
6011
116
    if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6012
116
                                          ExtractFromAmt.getBitWidth()))
6013
107
      return SDValue();
6014
23
  }
6015
23
6016
23
  // Return the expanded shift op that should allow a rotate to be formed.
6017
23
  EVT ShiftVT = OppShift.getOperand(1).getValueType();
6018
23
  EVT ResVT = ExtractFrom.getValueType();
6019
23
  SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6020
23
  return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6021
23
}
6022
6023
// Return true if we can prove that, whenever Neg and Pos are both in the
6024
// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
6025
// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6026
//
6027
//     (or (shift1 X, Neg), (shift2 X, Pos))
6028
//
6029
// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6030
// in direction shift1 by Neg.  The range [0, EltSize) means that we only need
6031
// to consider shift amounts with defined behavior.
6032
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6033
665
                           SelectionDAG &DAG) {
6034
665
  // If EltSize is a power of 2 then:
6035
665
  //
6036
665
  //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6037
665
  //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6038
665
  //
6039
665
  // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6040
665
  // for the stronger condition:
6041
665
  //
6042
665
  //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
6043
665
  //
6044
665
  // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6045
665
  // we can just replace Neg with Neg' for the rest of the function.
6046
665
  //
6047
665
  // In other cases we check for the even stronger condition:
6048
665
  //
6049
665
  //     Neg == EltSize - Pos                                    [B]
6050
665
  //
6051
665
  // for all Neg and Pos.  Note that the (or ...) then invokes undefined
6052
665
  // behavior if Pos == 0 (and consequently Neg == EltSize).
6053
665
  //
6054
665
  // We could actually use [A] whenever EltSize is a power of 2, but the
6055
665
  // only extra cases that it would match are those uninteresting ones
6056
665
  // where Neg and Pos are never in range at the same time.  E.g. for
6057
665
  // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6058
665
  // as well as (sub 32, Pos), but:
6059
665
  //
6060
665
  //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6061
665
  //
6062
665
  // always invokes undefined behavior for 32-bit X.
6063
665
  //
6064
665
  // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6065
665
  unsigned MaskLoBits = 0;
6066
665
  if (Neg.getOpcode() == ISD::AND && 
isPowerOf2_64(EltSize)152
) {
6067
152
    if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6068
152
      KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6069
152
      unsigned Bits = Log2_64(EltSize);
6070
152
      if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6071
152
          
((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)128
) {
6072
126
        Neg = Neg.getOperand(0);
6073
126
        MaskLoBits = Bits;
6074
126
      }
6075
152
    }
6076
152
  }
6077
665
6078
665
  // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6079
665
  if (Neg.getOpcode() != ISD::SUB)
6080
219
    return false;
6081
446
  ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6082
446
  if (!NegC)
6083
0
    return false;
6084
446
  SDValue NegOp1 = Neg.getOperand(1);
6085
446
6086
446
  // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6087
446
  // Pos'.  The truncation is redundant for the purpose of the equality.
6088
446
  if (MaskLoBits && 
Pos.getOpcode() == ISD::AND92
) {
6089
86
    if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6090
86
      KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6091
86
      if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6092
86
          ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6093
86
           MaskLoBits))
6094
74
        Pos = Pos.getOperand(0);
6095
86
    }
6096
86
  }
6097
446
6098
446
  // The condition we need is now:
6099
446
  //
6100
446
  //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6101
446
  //
6102
446
  // If NegOp1 == Pos then we need:
6103
446
  //
6104
446
  //              EltSize & Mask == NegC & Mask
6105
446
  //
6106
446
  // (because "x & Mask" is a truncation and distributes through subtraction).
6107
446
  APInt Width;
6108
446
  if (Pos == NegOp1)
6109
435
    Width = NegC->getAPIntValue();
6110
11
6111
11
  // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6112
11
  // Then the condition we want to prove becomes:
6113
11
  //
6114
11
  //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6115
11
  //
6116
11
  // which, again because "x & Mask" is a truncation, becomes:
6117
11
  //
6118
11
  //                NegC & Mask == (EltSize - PosC) & Mask
6119
11
  //             EltSize & Mask == (NegC + PosC) & Mask
6120
11
  else if (Pos.getOpcode() == ISD::ADD && 
Pos.getOperand(0) == NegOp13
) {
6121
3
    if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6122
3
      Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6123
0
    else
6124
0
      return false;
6125
8
  } else
6126
8
    return false;
6127
438
6128
438
  // Now we just need to check that EltSize & Mask == Width & Mask.
6129
438
  if (MaskLoBits)
6130
92
    // EltSize & Mask is 0 since Mask is EltSize - 1.
6131
92
    return Width.getLoBits(MaskLoBits) == 0;
6132
346
  return Width == EltSize;
6133
346
}
6134
6135
// A subroutine of MatchRotate used once we have found an OR of two opposite
6136
// shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
6137
// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6138
// former being preferred if supported.  InnerPos and InnerNeg are Pos and
6139
// Neg with outer conversions stripped away.
6140
SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6141
                                       SDValue Neg, SDValue InnerPos,
6142
                                       SDValue InnerNeg, unsigned PosOpcode,
6143
665
                                       unsigned NegOpcode, const SDLoc &DL) {
6144
665
  // fold (or (shl x, (*ext y)),
6145
665
  //          (srl x, (*ext (sub 32, y)))) ->
6146
665
  //   (rotl x, y) or (rotr x, (sub 32, y))
6147
665
  //
6148
665
  // fold (or (shl x, (*ext (sub 32, y))),
6149
665
  //          (srl x, (*ext y))) ->
6150
665
  //   (rotr x, y) or (rotl x, (sub 32, y))
6151
665
  EVT VT = Shifted.getValueType();
6152
665
  if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
6153
432
    bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6154
432
    return DAG.getNode(HasPos ? 
PosOpcode380
:
NegOpcode52
, DL, VT, Shifted,
6155
432
                       HasPos ? 
Pos380
:
Neg52
).getNode();
6156
432
  }
6157
233
6158
233
  return nullptr;
6159
233
}
6160
6161
// MatchRotate - Handle an 'or' of two operands.  If this is one of the many
6162
// idioms for rotate, and if the target supports rotation instructions, generate
6163
// a rot[lr].
6164
218k
SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6165
218k
  // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
6166
218k
  EVT VT = LHS.getValueType();
6167
218k
  if (!TLI.isTypeLegal(VT)) 
return nullptr19.0k
;
6168
199k
6169
199k
  // The target must have at least one rotate flavor.
6170
199k
  bool HasROTL = hasOperation(ISD::ROTL, VT);
6171
199k
  bool HasROTR = hasOperation(ISD::ROTR, VT);
6172
199k
  if (!HasROTL && 
!HasROTR133k
)
return nullptr41.1k
;
6173
158k
6174
158k
  // Check for truncated rotate.
6175
158k
  if (LHS.getOpcode() == ISD::TRUNCATE && 
RHS.getOpcode() == ISD::TRUNCATE1.14k
&&
6176
158k
      
LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()252
) {
6177
252
    assert(LHS.getValueType() == RHS.getValueType());
6178
252
    if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6179
1
      return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
6180
1
                         SDValue(Rot, 0)).getNode();
6181
1
    }
6182
158k
  }
6183
158k
6184
158k
  // Match "(X shl/srl V1) & V2" where V2 may not be present.
6185
158k
  SDValue LHSShift;   // The shift.
6186
158k
  SDValue LHSMask;    // AND value if any.
6187
158k
  matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6188
158k
6189
158k
  SDValue RHSShift;   // The shift.
6190
158k
  SDValue RHSMask;    // AND value if any.
6191
158k
  matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6192
158k
6193
158k
  // If neither side matched a rotate half, bail
6194
158k
  if (!LHSShift && 
!RHSShift134k
)
6195
114k
    return nullptr;
6196
44.2k
6197
44.2k
  // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6198
44.2k
  // side of the rotate, so try to handle that here. In all cases we need to
6199
44.2k
  // pass the matched shift from the opposite side to compute the opcode and
6200
44.2k
  // needed shift amount to extract.  We still want to do this if both sides
6201
44.2k
  // matched a rotate half because one half may be a potential overshift that
6202
44.2k
  // can be broken down (ie if InstCombine merged two shl or srl ops into a
6203
44.2k
  // single one).
6204
44.2k
6205
44.2k
  // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6206
44.2k
  if (LHSShift)
6207
24.1k
    if (SDValue NewRHSShift =
6208
12
            extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6209
12
      RHSShift = NewRHSShift;
6210
44.2k
  // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6211
44.2k
  if (RHSShift)
6212
31.9k
    if (SDValue NewLHSShift =
6213
11
            extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6214
11
      LHSShift = NewLHSShift;
6215
44.2k
6216
44.2k
  // If a side is still missing, nothing else we can do.
6217
44.2k
  if (!RHSShift || 
!LHSShift31.9k
)
6218
32.4k
    return nullptr;
6219
11.8k
6220
11.8k
  // At this point we've matched or extracted a shift op on each side.
6221
11.8k
6222
11.8k
  if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
6223
9.53k
    return nullptr;   // Not shifting the same value.
6224
2.27k
6225
2.27k
  if (LHSShift.getOpcode() == RHSShift.getOpcode())
6226
243
    return nullptr;   // Shifts must disagree.
6227
2.03k
6228
2.03k
  // Canonicalize shl to left side in a shl/srl pair.
6229
2.03k
  if (RHSShift.getOpcode() == ISD::SHL) {
6230
317
    std::swap(LHS, RHS);
6231
317
    std::swap(LHSShift, RHSShift);
6232
317
    std::swap(LHSMask, RHSMask);
6233
317
  }
6234
2.03k
6235
2.03k
  unsigned EltSizeInBits = VT.getScalarSizeInBits();
6236
2.03k
  SDValue LHSShiftArg = LHSShift.getOperand(0);
6237
2.03k
  SDValue LHSShiftAmt = LHSShift.getOperand(1);
6238
2.03k
  SDValue RHSShiftArg = RHSShift.getOperand(0);
6239
2.03k
  SDValue RHSShiftAmt = RHSShift.getOperand(1);
6240
2.03k
6241
2.03k
  // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6242
2.03k
  // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6243
2.03k
  auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6244
4.67k
                                        ConstantSDNode *RHS) {
6245
4.67k
    return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6246
4.67k
  };
6247
2.03k
  if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6248
1.44k
    SDValue Rot = DAG.getNode(HasROTL ? 
ISD::ROTL969
:
ISD::ROTR477
, DL, VT,
6249
1.44k
                              LHSShiftArg, HasROTL ? 
LHSShiftAmt969
:
RHSShiftAmt477
);
6250
1.44k
6251
1.44k
    // If there is an AND of either shifted operand, apply it to the result.
6252
1.44k
    if (LHSMask.getNode() || 
RHSMask.getNode()1.34k
) {
6253
98
      SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6254
98
      SDValue Mask = AllOnes;
6255
98
6256
98
      if (LHSMask.getNode()) {
6257
98
        SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6258
98
        Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6259
98
                           DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6260
98
      }
6261
98
      if (RHSMask.getNode()) {
6262
81
        SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
6263
81
        Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6264
81
                           DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
6265
81
      }
6266
98
6267
98
      Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
6268
98
    }
6269
1.44k
6270
1.44k
    return Rot.getNode();
6271
1.44k
  }
6272
585
6273
585
  // If there is a mask here, and we have a variable shift, we can't be sure
6274
585
  // that we're masking out the right stuff.
6275
585
  if (LHSMask.getNode() || 
RHSMask.getNode()491
)
6276
97
    return nullptr;
6277
488
6278
488
  // If the shift amount is sign/zext/any-extended just peel it off.
6279
488
  SDValue LExtOp0 = LHSShiftAmt;
6280
488
  SDValue RExtOp0 = RHSShiftAmt;
6281
488
  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6282
488
       LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6283
488
       
LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND475
||
6284
488
       
LHSShiftAmt.getOpcode() == ISD::TRUNCATE475
) &&
6285
488
      
(105
RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND105
||
6286
105
       RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6287
105
       
RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND94
||
6288
105
       
RHSShiftAmt.getOpcode() == ISD::TRUNCATE94
)) {
6289
93
    LExtOp0 = LHSShiftAmt.getOperand(0);
6290
93
    RExtOp0 = RHSShiftAmt.getOperand(0);
6291
93
  }
6292
488
6293
488
  SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
6294
488
                                   LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
6295
488
  if (TryL)
6296
311
    return TryL;
6297
177
6298
177
  SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
6299
177
                                   RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
6300
177
  if (TryR)
6301
121
    return TryR;
6302
56
6303
56
  return nullptr;
6304
56
}
6305
6306
namespace {
6307
6308
/// Represents known origin of an individual byte in load combine pattern. The
6309
/// value of the byte is either constant zero or comes from memory.
6310
struct ByteProvider {
6311
  // For constant zero providers Load is set to nullptr. For memory providers
6312
  // Load represents the node which loads the byte from memory.
6313
  // ByteOffset is the offset of the byte in the value produced by the load.
6314
  LoadSDNode *Load = nullptr;
6315
  unsigned ByteOffset = 0;
6316
6317
  ByteProvider() = default;
6318
6319
16.4k
  static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
6320
16.4k
    return ByteProvider(Load, ByteOffset);
6321
16.4k
  }
6322
6323
21.8k
  static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
6324
6325
25.3k
  bool isConstantZero() const { return !Load; }
6326
13.1k
  bool isMemory() const { return Load; }
6327
6328
0
  bool operator==(const ByteProvider &Other) const {
6329
0
    return Other.Load == Load && Other.ByteOffset == ByteOffset;
6330
0
  }
6331
6332
private:
6333
  ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
6334
38.2k
      : Load(Load), ByteOffset(ByteOffset) {}
6335
};
6336
6337
} // end anonymous namespace
6338
6339
/// Recursively traverses the expression calculating the origin of the requested
6340
/// byte of the given value. Returns None if the provider can't be calculated.
6341
///
6342
/// For all the values except the root of the expression verifies that the value
6343
/// has exactly one use and if it's not true return None. This way if the origin
6344
/// of the byte is returned it's guaranteed that the values which contribute to
6345
/// the byte are not used outside of this expression.
6346
///
6347
/// Because the parts of the expression are not allowed to have more than one
6348
/// use this function iterates over trees, not DAGs. So it never visits the same
6349
/// node more than once.
6350
static const Optional<ByteProvider>
6351
calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
6352
384k
                      bool Root = false) {
6353
384k
  // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6354
384k
  if (Depth == 10)
6355
959
    return None;
6356
383k
6357
383k
  if (!Root && 
!Op.hasOneUse()226k
)
6358
58.5k
    return None;
6359
325k
6360
325k
  assert(Op.getValueType().isScalarInteger() && "can't handle other types");
6361
325k
  unsigned BitWidth = Op.getValueSizeInBits();
6362
325k
  if (BitWidth % 8 != 0)
6363
0
    return None;
6364
325k
  unsigned ByteWidth = BitWidth / 8;
6365
325k
  assert(Index < ByteWidth && "invalid index requested");
6366
325k
  (void) ByteWidth;
6367
325k
6368
325k
  switch (Op.getOpcode()) {
6369
325k
  case ISD::OR: {
6370
185k
    auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
6371
185k
    if (!LHS)
6372
160k
      return None;
6373
24.2k
    auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
6374
24.2k
    if (!RHS)
6375
6.07k
      return None;
6376
18.1k
6377
18.1k
    if (LHS->isConstantZero())
6378
10.9k
      return RHS;
6379
7.19k
    if (RHS->isConstantZero())
6380
6.34k
      return LHS;
6381
848
    return None;
6382
848
  }
6383
30.8k
  case ISD::SHL: {
6384
30.8k
    auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
6385
30.8k
    if (!ShiftOp)
6386
2.07k
      return None;
6387
28.8k
6388
28.8k
    uint64_t BitShift = ShiftOp->getZExtValue();
6389
28.8k
    if (BitShift % 8 != 0)
6390
6.89k
      return None;
6391
21.9k
    uint64_t ByteShift = BitShift / 8;
6392
21.9k
6393
21.9k
    return Index < ByteShift
6394
21.9k
               ? 
ByteProvider::getConstantZero()14.1k
6395
21.9k
               : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
6396
7.73k
                                       Depth + 1);
6397
21.9k
  }
6398
21.9k
  case ISD::ANY_EXTEND:
6399
13.2k
  case ISD::SIGN_EXTEND:
6400
13.2k
  case ISD::ZERO_EXTEND: {
6401
13.2k
    SDValue NarrowOp = Op->getOperand(0);
6402
13.2k
    unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
6403
13.2k
    if (NarrowBitWidth % 8 != 0)
6404
184
      return None;
6405
13.0k
    uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6406
13.0k
6407
13.0k
    if (Index >= NarrowByteWidth)
6408
2.16k
      return Op.getOpcode() == ISD::ZERO_EXTEND
6409
2.16k
                 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6410
2.16k
                 : 
None0
;
6411
10.8k
    return calculateByteProvider(NarrowOp, Index, Depth + 1);
6412
10.8k
  }
6413
10.8k
  case ISD::BSWAP:
6414
42
    return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
6415
42
                                 Depth + 1);
6416
22.4k
  case ISD::LOAD: {
6417
22.4k
    auto L = cast<LoadSDNode>(Op.getNode());
6418
22.4k
    if (L->isVolatile() || 
L->isIndexed()22.2k
)
6419
343
      return None;
6420
22.0k
6421
22.0k
    unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
6422
22.0k
    if (NarrowBitWidth % 8 != 0)
6423
0
      return None;
6424
22.0k
    uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6425
22.0k
6426
22.0k
    if (Index >= NarrowByteWidth)
6427
5.67k
      return L->getExtensionType() == ISD::ZEXTLOAD
6428
5.67k
                 ? 
Optional<ByteProvider>(ByteProvider::getConstantZero())5.47k
6429
5.67k
                 : 
None199
;
6430
16.4k
    return ByteProvider::getMemory(L, Index);
6431
16.4k
  }
6432
73.5k
  }
6433
73.5k
6434
73.5k
  return None;
6435
73.5k
}
6436
6437
15.8k
static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
6438
15.8k
  return i;
6439
15.8k
}
6440
6441
4.81k
static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
6442
4.81k
  return BW - i - 1;
6443
4.81k
}
6444
6445
// Check if the bytes offsets we are looking at match with either big or
6446
// little endian value loaded. Return true for big endian, false for little
6447
// endian, and None if match failed.
6448
static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
6449
974
                                  int64_t FirstOffset) {
6450
974
  // The endian can be decided only when it is 2 bytes at least.
6451
974
  unsigned Width = ByteOffsets.size();
6452
974
  if (Width < 2)
6453
0
    return None;
6454
974
6455
974
  bool BigEndian = true, LittleEndian = true;
6456
5.14k
  for (unsigned i = 0; i < Width; 
i++4.16k
) {
6457
4.18k
    int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6458
4.18k
    LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
6459
4.18k
    BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
6460
4.18k
    if (!BigEndian && 
!LittleEndian3.52k
)
6461
16
      return None;
6462
4.18k
  }
6463
974
6464
974
  assert((BigEndian != LittleEndian) && "It should be either big endian or"
6465
958
                                        "little endian");
6466
958
  return BigEndian;
6467
974
}
6468
6469
3.55k
static SDValue stripTruncAndExt(SDValue Value) {
6470
3.55k
  switch (Value.getOpcode()) {
6471
3.55k
  case ISD::TRUNCATE:
6472
116
  case ISD::ZERO_EXTEND:
6473
116
  case ISD::SIGN_EXTEND:
6474
116
  case ISD::ANY_EXTEND:
6475
116
    return stripTruncAndExt(Value.getOperand(0));
6476
3.43k
  }
6477
3.43k
  return Value;
6478
3.43k
}
6479
6480
/// Match a pattern where a wide type scalar value is stored by several narrow
6481
/// stores. Fold it into a single store or a BSWAP and a store if the targets
6482
/// supports it.
6483
///
6484
/// Assuming little endian target:
6485
///  i8 *p = ...
6486
///  i32 val = ...
6487
///  p[0] = (val >> 0) & 0xFF;
6488
///  p[1] = (val >> 8) & 0xFF;
6489
///  p[2] = (val >> 16) & 0xFF;
6490
///  p[3] = (val >> 24) & 0xFF;
6491
/// =>
6492
///  *((i32)p) = val;
6493
///
6494
///  i8 *p = ...
6495
///  i32 val = ...
6496
///  p[0] = (val >> 24) & 0xFF;
6497
///  p[1] = (val >> 16) & 0xFF;
6498
///  p[2] = (val >> 8) & 0xFF;
6499
///  p[3] = (val >> 0) & 0xFF;
6500
/// =>
6501
///  *((i32)p) = BSWAP(val);
6502
3.22M
SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
6503
3.22M
  // Collect all the stores in the chain.
6504
3.22M
  SDValue Chain;
6505
3.22M
  SmallVector<StoreSDNode *, 8> Stores;
6506
3.82M
  for (StoreSDNode *Store = N; Store; 
Store = dyn_cast<StoreSDNode>(Chain)594k
) {
6507
3.41M
    if (Store->getMemoryVT() != MVT::i8 ||
6508
3.41M
        
Store->isVolatile()604k
||
Store->isIndexed()594k
)
6509
2.82M
      return SDValue();
6510
594k
    Stores.push_back(Store);
6511
594k
    Chain = Store->getChain();
6512
594k
  }
6513
3.22M
  // Handle the simple type only.
6514
3.22M
  unsigned Width = Stores.size();
6515
406k
  EVT VT = EVT::getIntegerVT(
6516
406k
    *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
6517
406k
  if (VT != MVT::i16 && 
VT != MVT::i32397k
&&
VT != MVT::i64394k
)
6518
392k
    return SDValue();
6519
14.2k
6520
14.2k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6521
14.2k
  if (LegalOperations && 
!TLI.isOperationLegal(ISD::STORE, VT)356
)
6522
221
    return SDValue();
6523
14.0k
6524
14.0k
  // Check if all the bytes of the combined value we are looking at are stored 
6525
14.0k
  // to the same base address. Collect bytes offsets from Base address into 
6526
14.0k
  // ByteOffsets. 
6527
14.0k
  SDValue CombinedValue;
6528
14.0k
  SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX);
6529
14.0k
  int64_t FirstOffset = INT64_MAX;
6530
14.0k
  StoreSDNode *FirstStore = nullptr;
6531
14.0k
  Optional<BaseIndexOffset> Base;
6532
15.7k
  for (auto Store : Stores) {
6533
15.7k
    // All the stores store different byte of the CombinedValue. A truncate is
6534
15.7k
    // required to get that byte value.
6535
15.7k
    SDValue Trunc = Store->getValue();
6536
15.7k
    if (Trunc.getOpcode() != ISD::TRUNCATE)
6537
12.4k
      return SDValue();
6538
3.30k
    // A shift operation is required to get the right byte offset, except the
6539
3.30k
    // first byte.
6540
3.30k
    int64_t Offset = 0;
6541
3.30k
    SDValue Value = Trunc.getOperand(0);
6542
3.30k
    if (Value.getOpcode() == ISD::SRL ||
6543
3.30k
        
Value.getOpcode() == ISD::SRA3.02k
) {
6544
288
      ConstantSDNode *ShiftOffset =
6545
288
        dyn_cast<ConstantSDNode>(Value.getOperand(1));
6546
288
      // Trying to match the following pattern. The shift offset must be 
6547
288
      // a constant and a multiple of 8. It is the byte offset in "y".
6548
288
      // 
6549
288
      // x = srl y, offset
6550
288
      // i8 z = trunc x 
6551
288
      // store z, ...
6552
288
      if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
6553
5
        return SDValue();
6554
283
  
6555
283
     Offset = ShiftOffset->getSExtValue()/8;
6556
283
     Value = Value.getOperand(0);
6557
283
    }
6558
3.30k
6559
3.30k
    // Stores must share the same combined value with different offsets.
6560
3.30k
    
if (3.29k
!CombinedValue3.29k
)
6561
1.57k
      CombinedValue = Value;
6562
1.71k
    else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
6563
1.43k
      return SDValue();
6564
280
6565
280
    // The trunc and all the extend operation should be stripped to get the
6566
280
    // real value we are stored.
6567
280
    else if (CombinedValue.getValueType() != VT) {
6568
49
      if (Value.getValueType() == VT ||
6569
49
          
Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits()37
)
6570
18
        CombinedValue = Value;
6571
49
      // Give up if the combined value type is smaller than the store size.
6572
49
      if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
6573
0
        return SDValue();
6574
1.85k
    }
6575
1.85k
6576
1.85k
    // Stores must share the same base address
6577
1.85k
    BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
6578
1.85k
    int64_t ByteOffsetFromBase = 0;
6579
1.85k
    if (!Base)
6580
1.57k
      Base = Ptr;
6581
280
    else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6582
10
      return SDValue();
6583
1.84k
6584
1.84k
    // Remember the first byte store
6585
1.84k
    if (ByteOffsetFromBase < FirstOffset) {
6586
1.78k
      FirstStore = Store;
6587
1.78k
      FirstOffset = ByteOffsetFromBase;
6588
1.78k
    }
6589
1.84k
    // Map the offset in the store and the offset in the combined value, and
6590
1.84k
    // early return if it has been set before.
6591
1.84k
    if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
6592
1.84k
      
return SDValue()15
;
6593
1.83k
    ByteOffsets[Offset] = ByteOffsetFromBase;
6594
1.83k
  }
6595
14.0k
6596
14.0k
  assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6597
78
  assert(FirstStore && "First store must be set");
6598
78
6599
78
  // Check if the bytes of the combined value we are looking at match with 
6600
78
  // either big or little endian value store.
6601
78
  Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6602
78
  if (!IsBigEndian.hasValue())
6603
3
    return SDValue();
6604
75
6605
75
  // The node we are looking at matches with the pattern, check if we can
6606
75
  // replace it with a single bswap if needed and store.
6607
75
6608
75
  // If the store needs byte swap check if the target supports it
6609
75
  bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
6610
75
6611
75
  // Before legalize we can introduce illegal bswaps which will be later
6612
75
  // converted to an explicit bswap sequence. This way we end up with a single
6613
75
  // store and byte shuffling instead of several stores and byte shuffling.
6614
75
  if (NeedsBswap && 
LegalOperations30
&&
!TLI.isOperationLegal(ISD::BSWAP, VT)0
)
6615
0
    return SDValue();
6616
75
6617
75
  // Check that a store of the wide type is both allowed and fast on the target
6618
75
  bool Fast = false;
6619
75
  bool Allowed =
6620
75
      TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
6621
75
                             *FirstStore->getMemOperand(), &Fast);
6622
75
  if (!Allowed || !Fast)
6623
0
    return SDValue();
6624
75
6625
75
  if (VT != CombinedValue.getValueType()) {
6626
14
    assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
6627
14
           "Get unexpected store value to combine");
6628
14
    CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
6629
14
                             CombinedValue);
6630
14
  }
6631
75
6632
75
  if (NeedsBswap)
6633
30
    CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
6634
75
6635
75
  SDValue NewStore =
6636
75
    DAG.getStore(Chain, SDLoc(N),  CombinedValue, FirstStore->getBasePtr(),
6637
75
                 FirstStore->getPointerInfo(), FirstStore->getAlignment());
6638
75
6639
75
  // Rely on other DAG combine rules to remove the other individual stores.
6640
75
  DAG.ReplaceAllUsesWith(N, NewStore.getNode());
6641
75
  return NewStore;
6642
75
}
6643
6644
/// Match a pattern where a wide type scalar value is loaded by several narrow
6645
/// loads and combined by shifts and ors. Fold it into a single load or a load
6646
/// and a BSWAP if the targets supports it.
6647
///
6648
/// Assuming little endian target:
6649
///  i8 *a = ...
6650
///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
6651
/// =>
6652
///  i32 val = *((i32)a)
6653
///
6654
///  i8 *a = ...
6655
///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
6656
/// =>
6657
///  i32 val = BSWAP(*((i32)a))
6658
///
6659
/// TODO: This rule matches complex patterns with OR node roots and doesn't
6660
/// interact well with the worklist mechanism. When a part of the pattern is
6661
/// updated (e.g. one of the loads) its direct users are put into the worklist,
6662
/// but the root node of the pattern which triggers the load combine is not
6663
/// necessarily a direct user of the changed node. For example, once the address
6664
/// of t28 load is reassociated load combine won't be triggered:
6665
///             t25: i32 = add t4, Constant:i32<2>
6666
///           t26: i64 = sign_extend t25
6667
///        t27: i64 = add t2, t26
6668
///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
6669
///     t29: i32 = zero_extend t28
6670
///   t32: i32 = shl t29, Constant:i8<8>
6671
/// t33: i32 = or t23, t32
6672
/// As a possible fix visitLoad can check if the load can be a part of a load
6673
/// combine pattern and add corresponding OR roots to the worklist.
6674
216k
SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
6675
216k
  assert(N->getOpcode() == ISD::OR &&
6676
216k
         "Can only match load combining against OR nodes");
6677
216k
6678
216k
  // Handles simple types only
6679
216k
  EVT VT = N->getValueType(0);
6680
216k
  if (VT != MVT::i16 && 
VT != MVT::i32214k
&&
VT != MVT::i64114k
)
6681
56.9k
    return SDValue();
6682
159k
  unsigned ByteWidth = VT.getSizeInBits() / 8;
6683
159k
6684
159k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6685
159k
  // Before legalize we can introduce too wide illegal loads which will be later
6686
159k
  // split into legal sized loads. This enables us to combine i64 load by i8
6687
159k
  // patterns to a couple of i32 loads on 32 bit targets.
6688
159k
  if (LegalOperations && 
!TLI.isOperationLegal(ISD::LOAD, VT)81.0k
)
6689
13.4k
    return SDValue();
6690
146k
6691
146k
  bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
6692
146k
  auto MemoryByteOffset = [&] (ByteProvider P) {
6693
12.2k
    assert(P.isMemory() && "Must be a memory byte provider");
6694
12.2k
    unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
6695
12.2k
    assert(LoadBitWidth % 8 == 0 &&
6696
12.2k
           "can only analyze providers for individual bytes not bit");
6697
12.2k
    unsigned LoadByteWidth = LoadBitWidth / 8;
6698
12.2k
    return IsBigEndianTarget
6699
12.2k
            ? 
BigEndianByteAt(LoadByteWidth, P.ByteOffset)628
6700
12.2k
            : 
LittleEndianByteAt(LoadByteWidth, P.ByteOffset)11.6k
;
6701
12.2k
  };
6702
146k
6703
146k
  Optional<BaseIndexOffset> Base;
6704
146k
  SDValue Chain;
6705
146k
6706
146k
  SmallPtrSet<LoadSDNode *, 8> Loads;
6707
146k
  Optional<ByteProvider> FirstByteProvider;
6708
146k
  int64_t FirstOffset = INT64_MAX;
6709
146k
6710
146k
  // Check if all the bytes of the OR we are looking at are loaded from the same
6711
146k
  // base address. Collect bytes offsets from Base address in ByteOffsets.
6712
146k
  SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
6713
157k
  for (unsigned i = 0; i < ByteWidth; 
i++11.3k
) {
6714
156k
    auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
6715
156k
    if (!P || 
!P->isMemory()13.1k
) // All the bytes must be loaded from memory
6716
144k
      return SDValue();
6717
12.1k
6718
12.1k
    LoadSDNode *L = P->Load;
6719
12.1k
    assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
6720
12.1k
           "Must be enforced by calculateByteProvider");
6721
12.1k
    assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
6722
12.1k
6723
12.1k
    // All loads must share the same chain
6724
12.1k
    SDValue LChain = L->getChain();
6725
12.1k
    if (!Chain)
6726
3.58k
      Chain = LChain;
6727
8.55k
    else if (Chain != LChain)
6728
346
      return SDValue();
6729
11.7k
6730
11.7k
    // Loads must share the same base address
6731
11.7k
    BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
6732
11.7k
    int64_t ByteOffsetFromBase = 0;
6733
11.7k
    if (!Base)
6734
3.58k
      Base = Ptr;
6735
8.20k
    else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6736
417
      return SDValue();
6737
11.3k
6738
11.3k
    // Calculate the offset of the current byte from the base address
6739
11.3k
    ByteOffsetFromBase += MemoryByteOffset(*P);
6740
11.3k
    ByteOffsets[i] = ByteOffsetFromBase;
6741
11.3k
6742
11.3k
    // Remember the first byte load
6743
11.3k
    if (ByteOffsetFromBase < FirstOffset) {
6744
4.27k
      FirstByteProvider = P;
6745
4.27k
      FirstOffset = ByteOffsetFromBase;
6746
4.27k
    }
6747
11.3k
6748
11.3k
    Loads.insert(L);
6749
11.3k
  }
6750
146k
  assert(!Loads.empty() && "All the bytes of the value must be loaded from "
6751
896
         "memory, so there must be at least one load which produces the value");
6752
896
  assert(Base && "Base address of the accessed memory location must be set");
6753
896
  assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6754
896
6755
896
  // Check if the bytes of the OR we are looking at match with either big or
6756
896
  // little endian value load
6757
896
  Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6758
896
  if (!IsBigEndian.hasValue())
6759
13
    return SDValue();
6760
883
6761
883
  assert(FirstByteProvider && "must be set");
6762
883
6763
883
  // Ensure that the first byte is loaded from zero offset of the first load.
6764
883
  // So the combined value can be loaded from the first load address.
6765
883
  if (MemoryByteOffset(*FirstByteProvider) != 0)
6766
10
    return SDValue();
6767
873
  LoadSDNode *FirstLoad = FirstByteProvider->Load;
6768
873
6769
873
  // The node we are looking at matches with the pattern, check if we can
6770
873
  // replace it with a single load and bswap if needed.
6771
873
6772
873
  // If the load needs byte swap check if the target supports it
6773
873
  bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
6774
873
6775
873
  // Before legalize we can introduce illegal bswaps which will be later
6776
873
  // converted to an explicit bswap sequence. This way we end up with a single
6777
873
  // load and byte shuffling instead of several loads and byte shuffling.
6778
873
  if (NeedsBswap && 
LegalOperations80
&&
!TLI.isOperationLegal(ISD::BSWAP, VT)2
)
6779
0
    return SDValue();
6780
873
6781
873
  // Check that a load of the wide type is both allowed and fast on the target
6782
873
  bool Fast = false;
6783
873
  bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
6784
873
                                        VT, *FirstLoad->getMemOperand(), &Fast);
6785
873
  if (!Allowed || 
!Fast232
)
6786
647
    return SDValue();
6787
226
6788
226
  SDValue NewLoad =
6789
226
      DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
6790
226
                  FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
6791
226
6792
226
  // Transfer chain users from old loads to the new load.
6793
226
  for (LoadSDNode *L : Loads)
6794
877
    DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
6795
226
6796
226
  return NeedsBswap ? 
DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad)80
:
NewLoad146
;
6797
226
}
6798
6799
// If the target has andn, bsl, or a similar bit-select instruction,
6800
// we want to unfold masked merge, with canonical pattern of:
6801
//   |        A  |  |B|
6802
//   ((x ^ y) & m) ^ y
6803
//    |  D  |
6804
// Into:
6805
//   (x & m) | (y & ~m)
6806
// If y is a constant, and the 'andn' does not work with immediates,
6807
// we unfold into a different pattern:
6808
//   ~(~x & m) & (m | y)
6809
// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
6810
//       the very least that breaks andnpd / andnps patterns, and because those
6811
//       patterns are simplified in IR and shouldn't be created in the DAG
6812
119k
SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
6813
119k
  assert(N->getOpcode() == ISD::XOR);
6814
119k
6815
119k
  // Don't touch 'not' (i.e. where y = -1).
6816
119k
  if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
6817
56.7k
    return SDValue();
6818
62.4k
6819
62.4k
  EVT VT = N->getValueType(0);
6820
62.4k
6821
62.4k
  // There are 3 commutable operators in the pattern,
6822
62.4k
  // so we have to deal with 8 possible variants of the basic pattern.
6823
62.4k
  SDValue X, Y, M;
6824
245k
  auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
6825
245k
    if (And.getOpcode() != ISD::AND || 
!And.hasOneUse()5.92k
)
6826
240k
      return false;
6827
5.10k
    SDValue Xor = And.getOperand(XorIdx);
6828
5.10k
    if (Xor.getOpcode() != ISD::XOR || 
!Xor.hasOneUse()1.62k
)
6829
3.53k
      return false;
6830
1.57k
    SDValue Xor0 = Xor.getOperand(0);
6831
1.57k
    SDValue Xor1 = Xor.getOperand(1);
6832
1.57k
    // Don't touch 'not' (i.e. where y = -1).
6833
1.57k
    if (isAllOnesOrAllOnesSplat(Xor1))
6834
32
      return false;
6835
1.53k
    if (Other == Xor0)
6836
89
      std::swap(Xor0, Xor1);
6837
1.53k
    if (Other != Xor1)
6838
56
      return false;
6839
1.48k
    X = Xor0;
6840
1.48k
    Y = Xor1;
6841
1.48k
    M = And.getOperand(XorIdx ? 
0295
:
11.18k
);
6842
1.48k
    return true;
6843
1.48k
  };
6844
62.4k
6845
62.4k
  SDValue N0 = N->getOperand(0);
6846
62.4k
  SDValue N1 = N->getOperand(1);
6847
62.4k
  if (!matchAndXor(N0, 0, N1) && 
!matchAndXor(N0, 1, N1)61.3k
&&
6848
62.4k
      
!matchAndXor(N1, 0, N0)61.0k
&&
!matchAndXor(N1, 1, N0)60.9k
)
6849
60.9k
    return SDValue();
6850
1.48k
6851
1.48k
  // Don't do anything if the mask is constant. This should not be reachable.
6852
1.48k
  // InstCombine should have already unfolded this pattern, and DAGCombiner
6853
1.48k
  // probably shouldn't produce it, too.
6854
1.48k
  if (isa<ConstantSDNode>(M.getNode()))
6855
213
    return SDValue();
6856
1.26k
6857
1.26k
  // We can transform if the target has AndNot
6858
1.26k
  if (!TLI.hasAndNot(M))
6859
1.02k
    return SDValue();
6860
241
6861
241
  SDLoc DL(N);
6862
241
6863
241
  // If Y is a constant, check that 'andn' works with immediates.
6864
241
  if (!TLI.hasAndNot(Y)) {
6865
2
    assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
6866
2
    // If not, we need to do a bit more work to make sure andn is still used.
6867
2
    SDValue NotX = DAG.getNOT(DL, X, VT);
6868
2
    SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
6869
2
    SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
6870
2
    SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
6871
2
    return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
6872
2
  }
6873
239
6874
239
  SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
6875
239
  SDValue NotM = DAG.getNOT(DL, M, VT);
6876
239
  SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
6877
239
6878
239
  return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
6879
239
}
6880
6881
352k
SDValue DAGCombiner::visitXOR(SDNode *N) {
6882
352k
  SDValue N0 = N->getOperand(0);
6883
352k
  SDValue N1 = N->getOperand(1);
6884
352k
  EVT VT = N0.getValueType();
6885
352k
6886
352k
  // fold vector ops
6887
352k
  if (VT.isVector()) {
6888
39.5k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
6889
48
      return FoldedVOp;
6890
39.5k
6891
39.5k
    // fold (xor x, 0) -> x, vector edition
6892
39.5k
    if (ISD::isBuildVectorAllZeros(N0.getNode()))
6893
0
      return N1;
6894
39.5k
    if (ISD::isBuildVectorAllZeros(N1.getNode()))
6895
157
      return N0;
6896
352k
  }
6897
352k
6898
352k
  // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
6899
352k
  SDLoc DL(N);
6900
352k
  if (N0.isUndef() && 
N1.isUndef()8
)
6901
0
    return DAG.getConstant(0, DL, VT);
6902
352k
  // fold (xor x, undef) -> undef
6903
352k
  if (N0.isUndef())
6904
8
    return N0;
6905
352k
  if (N1.isUndef())
6906
3
    return N1;
6907
352k
  // fold (xor c1, c2) -> c1^c2
6908
352k
  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6909
352k
  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
6910
352k
  if (N0C && 
N1C913
)
6911
911
    return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
6912
351k
  // canonicalize constant to RHS
6913
351k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6914
351k
     
!DAG.isConstantIntBuildVectorOrConstantInt(N1)423
)
6915
398
    return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
6916
350k
  // fold (xor x, 0) -> x
6917
350k
  if (isNullConstant(N1))
6918
121
    return N0;
6919
350k
6920
350k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
6921
6
    return NewSel;
6922
350k
6923
350k
  // reassociate xor
6924
350k
  if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
6925
569
    return RXOR;
6926
350k
6927
350k
  // fold !(x cc y) -> (x !cc y)
6928
350k
  unsigned N0Opcode = N0.getOpcode();
6929
350k
  SDValue LHS, RHS, CC;
6930
350k
  if (TLI.isConstTrueVal(N1.getNode()) && 
isSetCCEquivalent(N0, LHS, RHS, CC)266k
) {
6931
224k
    ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
6932
224k
                                               LHS.getValueType().isInteger());
6933
224k
    if (!LegalOperations ||
6934
224k
        
TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())277
) {
6935
223k
      switch (N0Opcode) {
6936
223k
      default:
6937
0
        llvm_unreachable("Unhandled SetCC Equivalent!");
6938
223k
      case ISD::SETCC:
6939
223k
        return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
6940
223k
      case ISD::SELECT_CC:
6941
0
        return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
6942
0
                               N0.getOperand(3), NotCC);
6943
126k
      }
6944
126k
    }
6945
224k
  }
6946
126k
6947
126k
  // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6948
126k
  if (isOneConstant(N1) && 
N0Opcode == ISD::ZERO_EXTEND25.7k
&&
N0.hasOneUse()92
&&
6949
126k
      
isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)91
){
6950
3
    SDValue V = N0.getOperand(0);
6951
3
    SDLoc DL0(N0);
6952
3
    V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
6953
3
                    DAG.getConstant(1, DL0, V.getValueType()));
6954
3
    AddToWorklist(V.getNode());
6955
3
    return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
6956
3
  }
6957
126k
6958
126k
  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6959
126k
  if (isOneConstant(N1) && 
VT == MVT::i125.7k
&&
N0.hasOneUse()23.2k
&&
6960
126k
      
(23.0k
N0Opcode == ISD::OR23.0k
||
N0Opcode == ISD::AND22.9k
)) {
6961
5.73k
    SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6962
5.73k
    if (isOneUseSetCC(RHS) || 
isOneUseSetCC(LHS)42
) {
6963
5.70k
      unsigned NewOpcode = N0Opcode == ISD::AND ? 
ISD::OR5.62k
:
ISD::AND74
;
6964
5.70k
      LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6965
5.70k
      RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6966
5.70k
      AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6967
5.70k
      return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6968
5.70k
    }
6969
120k
  }
6970
120k
  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6971
120k
  if (isAllOnesConstant(N1) && 
N0.hasOneUse()42.7k
&&
6972
120k
      
(35.7k
N0Opcode == ISD::OR35.7k
||
N0Opcode == ISD::AND35.2k
)) {
6973
1.05k
    SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6974
1.05k
    if (isa<ConstantSDNode>(RHS) || 
isa<ConstantSDNode>(LHS)971
) {
6975
81
      unsigned NewOpcode = N0Opcode == ISD::AND ? 
ISD::OR70
:
ISD::AND11
;
6976
81
      LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6977
81
      RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6978
81
      AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6979
81
      return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6980
81
    }
6981
120k
  }
6982
120k
6983
120k
  // fold (not (neg x)) -> (add X, -1)
6984
120k
  // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
6985
120k
  // Y is a constant or the subtract has a single use.
6986
120k
  if (isAllOnesConstant(N1) && 
N0.getOpcode() == ISD::SUB42.6k
&&
6987
120k
      
isNullConstant(N0.getOperand(0))95
) {
6988
12
    return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
6989
12
                       DAG.getAllOnesConstant(DL, VT));
6990
12
  }
6991
120k
6992
120k
  // fold (xor (and x, y), y) -> (and (not x), y)
6993
120k
  if (N0Opcode == ISD::AND && 
N0.hasOneUse()4.32k
&&
N0->getOperand(1) == N13.86k
) {
6994
190
    SDValue X = N0.getOperand(0);
6995
190
    SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
6996
190
    AddToWorklist(NotX.getNode());
6997
190
    return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
6998
190
  }
6999
120k
7000
120k
  if ((N0Opcode == ISD::SRL || 
N0Opcode == ISD::SHL118k
) &&
N0.hasOneUse()10.5k
) {
7001
9.49k
    ConstantSDNode *XorC = isConstOrConstSplat(N1);
7002
9.49k
    ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7003
9.49k
    unsigned BitWidth = VT.getScalarSizeInBits();
7004
9.49k
    if (XorC && 
ShiftC8.36k
) {
7005
409
      // Don't crash on an oversized shift. We can not guarantee that a bogus
7006
409
      // shift has been simplified to undef.
7007
409
      uint64_t ShiftAmt = ShiftC->getLimitedValue();
7008
409
      if (ShiftAmt < BitWidth) {
7009
409
        APInt Ones = APInt::getAllOnesValue(BitWidth);
7010
409
        Ones = N0Opcode == ISD::SHL ? 
Ones.shl(ShiftAmt)167
:
Ones.lshr(ShiftAmt)242
;
7011
409
        if (XorC->getAPIntValue() == Ones) {
7012
80
          // If the xor constant is a shifted -1, do a 'not' before the shift:
7013
80
          // xor (X << ShiftC), XorC --> (not X) << ShiftC
7014
80
          // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7015
80
          SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7016
80
          return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7017
80
        }
7018
119k
      }
7019
409
    }
7020
9.49k
  }
7021
119k
7022
119k
  // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7023
119k
  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7024
59.2k
    SDValue A = N0Opcode == ISD::ADD ? 
N02.26k
:
N156.9k
;
7025
59.2k
    SDValue S = N0Opcode == ISD::SRA ? 
N0264
:
N158.9k
;
7026
59.2k
    if (A.getOpcode() == ISD::ADD && 
S.getOpcode() == ISD::SRA3.44k
) {
7027
69
      SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7028
69
      SDValue S0 = S.getOperand(0);
7029
69
      if ((A0 == S && 
A1 == S03
) ||
(66
A1 == S66
&&
A0 == S060
)) {
7030
63
        unsigned OpSizeInBits = VT.getScalarSizeInBits();
7031
63
        if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7032
63
          if (C->getAPIntValue() == (OpSizeInBits - 1))
7033
63
            return DAG.getNode(ISD::ABS, DL, VT, S0);
7034
119k
      }
7035
69
    }
7036
59.2k
  }
7037
119k
7038
119k
  // fold (xor x, x) -> 0
7039
119k
  if (N0 == N1)
7040
16
    return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7041
119k
7042
119k
  // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7043
119k
  // Here is a concrete example of this equivalence:
7044
119k
  // i16   x ==  14
7045
119k
  // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
7046
119k
  // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7047
119k
  //
7048
119k
  // =>
7049
119k
  //
7050
119k
  // i16     ~1      == 0b1111111111111110
7051
119k
  // i16 rol(~1, 14) == 0b1011111111111111
7052
119k
  //
7053
119k
  // Some additional tips to help conceptualize this transform:
7054
119k
  // - Try to see the operation as placing a single zero in a value of all ones.
7055
119k
  // - There exists no value for x which would allow the result to contain zero.
7056
119k
  // - Values of x larger than the bitwidth are undefined and do not require a
7057
119k
  //   consistent result.
7058
119k
  // - Pushing the zero left requires shifting one bits in from the right.
7059
119k
  // A rotate left of ~1 is a nice way of achieving the desired result.
7060
119k
  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && 
N0Opcode == ISD::SHL51.5k
&&
7061
119k
      
isAllOnesConstant(N1)3.07k
&&
isOneConstant(N0.getOperand(0))2.92k
) {
7062
286
    return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7063
286
                       N0.getOperand(1));
7064
286
  }
7065
119k
7066
119k
  // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
7067
119k
  if (N0Opcode == N1.getOpcode())
7068
17.6k
    if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7069
431
      return V;
7070
119k
7071
119k
  // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
7072
119k
  if (SDValue MM = unfoldMaskedMerge(N))
7073
241
    return MM;
7074
118k
7075
118k
  // Simplify the expression using non-local knowledge.
7076
118k
  if (SimplifyDemandedBits(SDValue(N, 0)))
7077
898
    return SDValue(N, 0);
7078
118k
7079
118k
  return SDValue();
7080
118k
}
7081
7082
/// Handle transforms common to the three shifts, when the shift amount is a
7083
/// constant.
7084
/// We are looking for: (shift being one of shl/sra/srl)
7085
///   shift (binop X, C0), C1
7086
/// And want to transform into:
7087
///   binop (shift X, C1), (shift C0, C1)
7088
524k
SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
7089
524k
  // Do not turn a 'not' into a regular xor.
7090
524k
  if (isBitwiseNot(N->getOperand(0)))
7091
443
    return SDValue();
7092
524k
7093
524k
  // The inner binop must be one-use, since we want to replace it.
7094
524k
  SDNode *LHS = N->getOperand(0).getNode();
7095
524k
  if (!LHS->hasOneUse()) 
return SDValue()250k
;
7096
274k
7097
274k
  // We want to pull some binops through shifts, so that we have (and (shift))
7098
274k
  // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
7099
274k
  // thing happens with address calculations, so it's important to canonicalize
7100
274k
  // it.
7101
274k
  switch (LHS->getOpcode()) {
7102
274k
  default:
7103
240k
    return SDValue();
7104
274k
  case ISD::OR:
7105
16.4k
  case ISD::XOR:
7106
16.4k
  case ISD::AND:
7107
16.4k
    break;
7108
17.0k
  case ISD::ADD:
7109
17.0k
    if (N->getOpcode() != ISD::SHL)
7110
12.2k
      return SDValue(); // only shl(add) not sr[al](add).
7111
4.78k
    break;
7112
21.2k
  }
7113
21.2k
7114
21.2k
  // We require the RHS of the binop to be a constant and not opaque as well.
7115
21.2k
  ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
7116
21.2k
  if (!BinOpCst)
7117
7.06k
    return SDValue();
7118
14.1k
7119
14.1k
  // FIXME: disable this unless the input to the binop is a shift by a constant
7120
14.1k
  // or is copy/select. Enable this in other cases when figure out it's exactly
7121
14.1k
  // profitable.
7122
14.1k
  SDValue BinOpLHSVal = LHS->getOperand(0);
7123
14.1k
  bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
7124
14.1k
                            
BinOpLHSVal.getOpcode() == ISD::SRA14.0k
||
7125
14.1k
                            
BinOpLHSVal.getOpcode() == ISD::SRL14.0k
) &&
7126
14.1k
                           
isa<ConstantSDNode>(BinOpLHSVal.getOperand(1))1.03k
;
7127
14.1k
  bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
7128
14.1k
                        
BinOpLHSVal.getOpcode() == ISD::SELECT11.0k
;
7129
14.1k
7130
14.1k
  if (!IsShiftByConstant && 
!IsCopyOrSelect13.1k
)
7131
10.0k
    return SDValue();
7132
4.14k
7133
4.14k
  if (IsCopyOrSelect && 
N->hasOneUse()3.11k
)
7134
2.75k
    return SDValue();
7135
1.39k
7136
1.39k
  EVT VT = N->getValueType(0);
7137
1.39k
7138
1.39k
  if (!TLI.isDesirableToCommuteWithShift(N, Level))
7139
80
    return SDValue();
7140
1.31k
7141
1.31k
  // Fold the constants, shifting the binop RHS by the shift amount.
7142
1.31k
  SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
7143
1.31k
                               N->getValueType(0),
7144
1.31k
                               LHS->getOperand(1), N->getOperand(1));
7145
1.31k
  assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
7146
1.31k
7147
1.31k
  // Create the new shift.
7148
1.31k
  SDValue NewShift = DAG.getNode(N->getOpcode(),
7149
1.31k
                                 SDLoc(LHS->getOperand(0)),
7150
1.31k
                                 VT, LHS->getOperand(0), N->getOperand(1));
7151
1.31k
7152
1.31k
  // Create the new binop.
7153
1.31k
  return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
7154
1.31k
}
7155
7156
2.44k
SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
7157
2.44k
  assert(N->getOpcode() == ISD::TRUNCATE);
7158
2.44k
  assert(N->getOperand(0).getOpcode() == ISD::AND);
7159
2.44k
7160
2.44k
  // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
7161
2.44k
  EVT TruncVT = N->getValueType(0);
7162
2.44k
  if (N->hasOneUse() && 
N->getOperand(0).hasOneUse()2.44k
&&
7163
2.44k
      
TLI.isTypeDesirableForOp(ISD::AND, TruncVT)1.94k
) {
7164
1.94k
    SDValue N01 = N->getOperand(0).getOperand(1);
7165
1.94k
    if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
7166
1.94k
      SDLoc DL(N);
7167
1.94k
      SDValue N00 = N->getOperand(0).getOperand(0);
7168
1.94k
      SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
7169
1.94k
      SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
7170
1.94k
      AddToWorklist(Trunc00.getNode());
7171
1.94k
      AddToWorklist(Trunc01.getNode());
7172
1.94k
      return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
7173
1.94k
    }
7174
505
  }
7175
505
7176
505
  return SDValue();
7177
505
}
7178
7179
7.80k
SDValue DAGCombiner::visitRotate(SDNode *N) {
7180
7.80k
  SDLoc dl(N);
7181
7.80k
  SDValue N0 = N->getOperand(0);
7182
7.80k
  SDValue N1 = N->getOperand(1);
7183
7.80k
  EVT VT = N->getValueType(0);
7184
7.80k
  unsigned Bitsize = VT.getScalarSizeInBits();
7185
7.80k
7186
7.80k
  // fold (rot x, 0) -> x
7187
7.80k
  if (isNullOrNullSplat(N1))
7188
4
    return N0;
7189
7.80k
7190
7.80k
  // fold (rot x, c) -> x iff (c % BitSize) == 0
7191
7.80k
  if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
7192
7.80k
    APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
7193
7.80k
    if (DAG.MaskedValueIsZero(N1, ModuloMask))
7194
6
      return N0;
7195
7.79k
  }
7196
7.79k
7197
7.79k
  // fold (rot x, c) -> (rot x, c % BitSize)
7198
7.79k
  // TODO - support non-uniform vector amounts.
7199
7.79k
  if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
7200
4.33k
    if (Cst->getAPIntValue().uge(Bitsize)) {
7201
85
      uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
7202
85
      return DAG.getNode(N->getOpcode(), dl, VT, N0,
7203
85
                         DAG.getConstant(RotAmt, dl, N1.getValueType()));
7204
85
    }
7205
7.71k
  }
7206
7.71k
7207
7.71k
  // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
7208
7.71k
  if (N1.getOpcode() == ISD::TRUNCATE &&
7209
7.71k
      
N1.getOperand(0).getOpcode() == ISD::AND937
) {
7210
51
    if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7211
51
      return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
7212
7.66k
  }
7213
7.66k
7214
7.66k
  unsigned NextOp = N0.getOpcode();
7215
7.66k
  // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
7216
7.66k
  if (NextOp == ISD::ROTL || 
NextOp == ISD::ROTR7.64k
) {
7217
17
    SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
7218
17
    SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
7219
17
    if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
7220
17
      EVT ShiftVT = C1->getValueType(0);
7221
17
      bool SameSide = (N->getOpcode() == NextOp);
7222
17
      unsigned CombineOp = SameSide ? ISD::ADD : 
ISD::SUB0
;
7223
17
      if (SDValue CombinedShift =
7224
17
              DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
7225
17
        SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
7226
17
        SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
7227
17
            ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
7228
17
            BitsizeC.getNode());
7229
17
        return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
7230
17
                           CombinedShiftNorm);
7231
17
      }
7232
7.64k
    }
7233
17
  }
7234
7.64k
  return SDValue();
7235
7.64k
}
7236
7237
338k
SDValue DAGCombiner::visitSHL(SDNode *N) {
7238
338k
  SDValue N0 = N->getOperand(0);
7239
338k
  SDValue N1 = N->getOperand(1);
7240
338k
  if (SDValue V = DAG.simplifyShift(N0, N1))
7241
892
    return V;
7242
337k
7243
337k
  EVT VT = N0.getValueType();
7244
337k
  EVT ShiftVT = N1.getValueType();
7245
337k
  unsigned OpSizeInBits = VT.getScalarSizeInBits();
7246
337k
7247
337k
  // fold vector ops
7248
337k
  if (VT.isVector()) {
7249
11.9k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
7250
10
      return FoldedVOp;
7251
11.9k
7252
11.9k
    BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
7253
11.9k
    // If setcc produces all-one true value then:
7254
11.9k
    // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
7255
11.9k
    if (N1CV && 
N1CV->isConstant()7.52k
) {
7256
7.25k
      if (N0.getOpcode() == ISD::AND) {
7257
308
        SDValue N00 = N0->getOperand(0);
7258
308
        SDValue N01 = N0->getOperand(1);
7259
308
        BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
7260
308
7261
308
        if (N01CV && 
N01CV->isConstant()298
&&
N00.getOpcode() == ISD::SETCC298
&&
7262
308
            TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
7263
10
                TargetLowering::ZeroOrNegativeOneBooleanContent) {
7264
10
          if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
7265
10
                                                     N01CV, N1CV))
7266
10
            return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
7267
337k
        }
7268
308
      }
7269
7.25k
    }
7270
11.9k
  }
7271
337k
7272
337k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
7273
337k
7274
337k
  // fold (shl c1, c2) -> c1<<c2
7275
337k
  // TODO - support non-uniform vector shift amounts.
7276
337k
  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7277
337k
  if (N0C && 
N1C23.9k
&&
!N1C->isOpaque()3.30k
)
7278
3.30k
    return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
7279
333k
7280
333k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
7281
25
    return NewSel;
7282
333k
7283
333k
  // if (shl x, c) is known to be zero, return 0
7284
333k
  if (DAG.MaskedValueIsZero(SDValue(N, 0),
7285
333k
                            APInt::getAllOnesValue(OpSizeInBits)))
7286
2.11k
    return DAG.getConstant(0, SDLoc(N), VT);
7287
331k
7288
331k
  // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
7289
331k
  if (N1.getOpcode() == ISD::TRUNCATE &&
7290
331k
      
N1.getOperand(0).getOpcode() == ISD::AND8.83k
) {
7291
1.37k
    if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7292
874
      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
7293
330k
  }
7294
330k
7295
330k
  // TODO - support non-uniform vector shift amounts.
7296
330k
  if (N1C && 
SimplifyDemandedBits(SDValue(N, 0))285k
)
7297
6.26k
    return SDValue(N, 0);
7298
324k
7299
324k
  // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
7300
324k
  if (N0.getOpcode() == ISD::SHL) {
7301
702
    auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7302
702
                                          ConstantSDNode *RHS) {
7303
260
      APInt c1 = LHS->getAPIntValue();
7304
260
      APInt c2 = RHS->getAPIntValue();
7305
260
      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7306
260
      return (c1 + c2).uge(OpSizeInBits);
7307
260
    };
7308
702
    if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7309
4
      return DAG.getConstant(0, SDLoc(N), VT);
7310
698
7311
698
    auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7312
698
                                       ConstantSDNode *RHS) {
7313
277
      APInt c1 = LHS->getAPIntValue();
7314
277
      APInt c2 = RHS->getAPIntValue();
7315
277
      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7316
277
      return (c1 + c2).ult(OpSizeInBits);
7317
277
    };
7318
698
    if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7319
244
      SDLoc DL(N);
7320
244
      SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7321
244
      return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
7322
244
    }
7323
324k
  }
7324
324k
7325
324k
  // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
7326
324k
  // For this to be valid, the second form must not preserve any of the bits
7327
324k
  // that are shifted out by the inner shift in the first form.  This means
7328
324k
  // the outer shift size must be >= the number of bits added by the ext.
7329
324k
  // As a corollary, we don't care what kind of ext it is.
7330
324k
  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
7331
324k
       
N0.getOpcode() == ISD::ANY_EXTEND295k
||
7332
324k
       
N0.getOpcode() == ISD::SIGN_EXTEND288k
) &&
7333
324k
      
N0.getOperand(0).getOpcode() == ISD::SHL50.5k
) {
7334
145
    SDValue N0Op0 = N0.getOperand(0);
7335
145
    SDValue InnerShiftAmt = N0Op0.getOperand(1);
7336
145
    EVT InnerVT = N0Op0.getValueType();
7337
145
    uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
7338
145
7339
145
    auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7340
165
                                                         ConstantSDNode *RHS) {
7341
165
      APInt c1 = LHS->getAPIntValue();
7342
165
      APInt c2 = RHS->getAPIntValue();
7343
165
      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7344
165
      return c2.uge(OpSizeInBits - InnerBitwidth) &&
7345
165
             
(c1 + c2).uge(OpSizeInBits)49
;
7346
165
    };
7347
145
    if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
7348
145
                                  /*AllowUndefs*/ false,
7349
145
                                  /*AllowTypeMismatch*/ true))
7350
4
      return DAG.getConstant(0, SDLoc(N), VT);
7351
141
7352
141
    auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7353
198
                                                      ConstantSDNode *RHS) {
7354
198
      APInt c1 = LHS->getAPIntValue();
7355
198
      APInt c2 = RHS->getAPIntValue();
7356
198
      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7357
198
      return c2.uge(OpSizeInBits - InnerBitwidth) &&
7358
198
             
(c1 + c2).ult(OpSizeInBits)82
;
7359
198
    };
7360
141
    if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
7361
141
                                  /*AllowUndefs*/ false,
7362
141
                                  /*AllowTypeMismatch*/ true)) {
7363
17
      SDLoc DL(N);
7364
17
      SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
7365
17
      SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
7366
17
      Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
7367
17
      return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
7368
17
    }
7369
324k
  }
7370
324k
7371
324k
  // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
7372
324k
  // Only fold this if the inner zext has no other uses to avoid increasing
7373
324k
  // the total number of instructions.
7374
324k
  if (N0.getOpcode() == ISD::ZERO_EXTEND && 
N0.hasOneUse()29.2k
&&
7375
324k
      
N0.getOperand(0).getOpcode() == ISD::SRL25.1k
) {
7376
94
    SDValue N0Op0 = N0.getOperand(0);
7377
94
    SDValue InnerShiftAmt = N0Op0.getOperand(1);
7378
94
7379
159
    auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7380
159
      APInt c1 = LHS->getAPIntValue();
7381
159
      APInt c2 = RHS->getAPIntValue();
7382
159
      zeroExtendToMatch(c1, c2);
7383
159
      return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
7384
159
    };
7385
94
    if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
7386
94
                                  /*AllowUndefs*/ false,
7387
94
                                  /*AllowTypeMismatch*/ true)) {
7388
32
      SDLoc DL(N);
7389
32
      EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
7390
32
      SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
7391
32
      NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
7392
32
      AddToWorklist(NewSHL.getNode());
7393
32
      return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
7394
32
    }
7395
324k
  }
7396
324k
7397
324k
  // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
7398
324k
  // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
7399
324k
  // TODO - support non-uniform vector shift amounts.
7400
324k
  if (N1C && 
(278k
N0.getOpcode() == ISD::SRL278k
||
N0.getOpcode() == ISD::SRA271k
) &&
7401
324k
      
N0->getFlags().hasExact()8.45k
) {
7402
316
    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7403
316
      uint64_t C1 = N0C1->getZExtValue();
7404
316
      uint64_t C2 = N1C->getZExtValue();
7405
316
      SDLoc DL(N);
7406
316
      if (C1 <= C2)
7407
175
        return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7408
175
                           DAG.getConstant(C2 - C1, DL, ShiftVT));
7409
141
      return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
7410
141
                         DAG.getConstant(C1 - C2, DL, ShiftVT));
7411
141
    }
7412
316
  }
7413
323k
7414
323k
  // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
7415
323k
  //                               (and (srl x, (sub c1, c2), MASK)
7416
323k
  // Only fold this if the inner shift has no other uses -- if it does, folding
7417
323k
  // this will increase the total number of instructions.
7418
323k
  // TODO - drop hasOneUse requirement if c1 == c2?
7419
323k
  // TODO - support non-uniform vector shift amounts.
7420
323k
  if (N1C && 
N0.getOpcode() == ISD::SRL278k
&&
N0.hasOneUse()7.12k
&&
7421
323k
      
TLI.shouldFoldConstantShiftPairToMask(N, Level)5.07k
) {
7422
4.85k
    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7423
4.61k
      if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
7424
4.61k
        uint64_t c1 = N0C1->getZExtValue();
7425
4.61k
        uint64_t c2 = N1C->getZExtValue();
7426
4.61k
        APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
7427
4.61k
        SDValue Shift;
7428
4.61k
        if (c2 > c1) {
7429
184
          Mask <<= c2 - c1;
7430
184
          SDLoc DL(N);
7431
184
          Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7432
184
                              DAG.getConstant(c2 - c1, DL, ShiftVT));
7433
4.42k
        } else {
7434
4.42k
          Mask.lshrInPlace(c1 - c2);
7435
4.42k
          SDLoc DL(N);
7436
4.42k
          Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
7437
4.42k
                              DAG.getConstant(c1 - c2, DL, ShiftVT));
7438
4.42k
        }
7439
4.61k
        SDLoc DL(N0);
7440
4.61k
        return DAG.getNode(ISD::AND, DL, VT, Shift,
7441
4.61k
                           DAG.getConstant(Mask, DL, VT));
7442
4.61k
      }
7443
319k
    }
7444
4.85k
  }
7445
319k
7446
319k
  // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
7447
319k
  if (N0.getOpcode() == ISD::SRA && 
N1 == N0.getOperand(1)1.04k
&&
7448
319k
      
isConstantOrConstantVector(N1, /* No Opaques */ true)10
) {
7449
10
    SDLoc DL(N);
7450
10
    SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
7451
10
    SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
7452
10
    return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
7453
10
  }
7454
319k
7455
319k
  // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
7456
319k
  // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
7457
319k
  // Variant of version done on multiply, except mul by a power of 2 is turned
7458
319k
  // into a shift.
7459
319k
  if ((N0.getOpcode() == ISD::ADD || 
N0.getOpcode() == ISD::OR305k
) &&
7460
319k
      
N0.getNode()->hasOneUse()15.6k
&&
7461
319k
      
isConstantOrConstantVector(N1, /* No Opaques */ true)11.5k
&&
7462
319k
      
isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)11.4k
&&
7463
319k
      
TLI.isDesirableToCommuteWithShift(N, Level)6.00k
) {
7464
5.84k
    SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
7465
5.84k
    SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7466
5.84k
    AddToWorklist(Shl0.getNode());
7467
5.84k
    AddToWorklist(Shl1.getNode());
7468
5.84k
    return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
7469
5.84k
  }
7470
313k
7471
313k
  // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
7472
313k
  if (N0.getOpcode() == ISD::MUL && 
N0.getNode()->hasOneUse()587
&&
7473
313k
      
isConstantOrConstantVector(N1, /* No Opaques */ true)506
&&
7474
313k
      
isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)433
) {
7475
126
    SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7476
126
    if (isConstantOrConstantVector(Shl))
7477
126
      return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
7478
313k
  }
7479
313k
7480
313k
  if (N1C && 
!N1C->isOpaque()268k
)
7481
268k
    if (SDValue NewSHL = visitShiftByConstant(N, N1C))
7482
621
      return NewSHL;
7483
312k
7484
312k
  return SDValue();
7485
312k
}
7486
7487
48.8k
SDValue DAGCombiner::visitSRA(SDNode *N) {
7488
48.8k
  SDValue N0 = N->getOperand(0);
7489
48.8k
  SDValue N1 = N->getOperand(1);
7490
48.8k
  if (SDValue V = DAG.simplifyShift(N0, N1))
7491
5
    return V;
7492
48.8k
7493
48.8k
  EVT VT = N0.getValueType();
7494
48.8k
  unsigned OpSizeInBits = VT.getScalarSizeInBits();
7495
48.8k
7496
48.8k
  // Arithmetic shifting an all-sign-bit value is a no-op.
7497
48.8k
  // fold (sra 0, x) -> 0
7498
48.8k
  // fold (sra -1, x) -> -1
7499
48.8k
  if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
7500
458
    return N0;
7501
48.4k
7502
48.4k
  // fold vector ops
7503
48.4k
  if (VT.isVector())
7504
7.14k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
7505
5
      return FoldedVOp;
7506
48.4k
7507
48.4k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
7508
48.4k
7509
48.4k
  // fold (sra c1, c2) -> (sra c1, c2)
7510
48.4k
  // TODO - support non-uniform vector shift amounts.
7511
48.4k
  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7512
48.4k
  if (N0C && 
N1C157
&&
!N1C->isOpaque()0
)
7513
0
    return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
7514
48.4k
7515
48.4k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
7516
3
    return NewSel;
7517
48.4k
7518
48.4k
  // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
7519
48.4k
  // sext_inreg.
7520
48.4k
  if (N1C && 
N0.getOpcode() == ISD::SHL40.9k
&&
N1 == N0.getOperand(1)6.86k
) {
7521
5.05k
    unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
7522
5.05k
    EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
7523
5.05k
    if (VT.isVector())
7524
305
      ExtVT = EVT::getVectorVT(*DAG.getContext(),
7525
305
                               ExtVT, VT.getVectorNumElements());
7526
5.05k
    if ((!LegalOperations ||
7527
5.05k
         
TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)3.69k
))
7528
1.38k
      return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7529
1.38k
                         N0.getOperand(0), DAG.getValueType(ExtVT));
7530
47.0k
  }
7531
47.0k
7532
47.0k
  // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
7533
47.0k
  // clamp (add c1, c2) to max shift.
7534
47.0k
  if (N0.getOpcode() == ISD::SRA) {
7535
611
    SDLoc DL(N);
7536
611
    EVT ShiftVT = N1.getValueType();
7537
611
    EVT ShiftSVT = ShiftVT.getScalarType();
7538
611
    SmallVector<SDValue, 16> ShiftValues;
7539
611
7540
611
    auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7541
508
      APInt c1 = LHS->getAPIntValue();
7542
508
      APInt c2 = RHS->getAPIntValue();
7543
508
      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7544
508
      APInt Sum = c1 + c2;
7545
508
      unsigned ShiftSum =
7546
508
          Sum.uge(OpSizeInBits) ? 
(OpSizeInBits - 1)418
:
Sum.getZExtValue()90
;
7547
508
      ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
7548
508
      return true;
7549
508
    };
7550
611
    if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
7551
461
      SDValue ShiftValue;
7552
461
      if (VT.isVector())
7553
17
        ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
7554
444
      else
7555
444
        ShiftValue = ShiftValues[0];
7556
461
      return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
7557
461
    }
7558
46.5k
  }
7559
46.5k
7560
46.5k
  // fold (sra (shl X, m), (sub result_size, n))
7561
46.5k
  // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
7562
46.5k
  // result_size - n != m.
7563
46.5k
  // If truncate is free for the target sext(shl) is likely to result in better
7564
46.5k
  // code.
7565
46.5k
  if (N0.getOpcode() == ISD::SHL && 
N1C5.72k
) {
7566
5.47k
    // Get the two constanst of the shifts, CN0 = m, CN = n.
7567
5.47k
    const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
7568
5.47k
    if (N01C) {
7569
5.44k
      LLVMContext &Ctx = *DAG.getContext();
7570
5.44k
      // Determine what the truncate's result bitsize and type would be.
7571
5.44k
      EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
7572
5.44k
7573
5.44k
      if (VT.isVector())
7574
202
        TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
7575
5.44k
7576
5.44k
      // Determine the residual right-shift amount.
7577
5.44k
      int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
7578
5.44k
7579
5.44k
      // If the shift is not a no-op (in which case this should be just a sign
7580
5.44k
      // extend already), the truncated to type is legal, sign_extend is legal
7581
5.44k
      // on that type, and the truncate to that type is both legal and free,
7582
5.44k
      // perform the transform.
7583
5.44k
      if ((ShiftAmt > 0) &&
7584
5.44k
          
TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT)1.60k
&&
7585
5.44k
          
TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT)83
&&
7586
5.44k
          
TLI.isTruncateFree(VT, TruncVT)83
) {
7587
11
        SDLoc DL(N);
7588
11
        SDValue Amt = DAG.getConstant(ShiftAmt, DL,
7589
11
            getShiftAmountTy(N0.getOperand(0).getValueType()));
7590
11
        SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
7591
11
                                    N0.getOperand(0), Amt);
7592
11
        SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
7593
11
                                    Shift);
7594
11
        return DAG.getNode(ISD::SIGN_EXTEND, DL,
7595
11
                           N->getValueType(0), Trunc);
7596
11
      }
7597
46.5k
    }
7598
5.47k
  }
7599
46.5k
7600
46.5k
  // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
7601
46.5k
  if (N1.getOpcode() == ISD::TRUNCATE &&
7602
46.5k
      
N1.getOperand(0).getOpcode() == ISD::AND447
) {
7603
15
    if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7604
15
      return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
7605
46.5k
  }
7606
46.5k
7607
46.5k
  // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
7608
46.5k
  // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
7609
46.5k
  //      if c1 is equal to the number of bits the trunc removes
7610
46.5k
  // TODO - support non-uniform vector shift amounts.
7611
46.5k
  if (N0.getOpcode() == ISD::TRUNCATE &&
7612
46.5k
      
(5.43k
N0.getOperand(0).getOpcode() == ISD::SRL5.43k
||
7613
5.43k
       
N0.getOperand(0).getOpcode() == ISD::SRA2.95k
) &&
7614
46.5k
      
N0.getOperand(0).hasOneUse()2.49k
&&
7615
46.5k
      
N0.getOperand(0).getOperand(1).hasOneUse()1.96k
&&
N1C266
) {
7616
260
    SDValue N0Op0 = N0.getOperand(0);
7617
260
    if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
7618
260
      EVT LargeVT = N0Op0.getValueType();
7619
260
      unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
7620
260
      if (LargeShift->getAPIntValue() == TruncBits) {
7621
254
        SDLoc DL(N);
7622
254
        SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
7623
254
                                      getShiftAmountTy(LargeVT));
7624
254
        SDValue SRA =
7625
254
            DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
7626
254
        return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
7627
254
      }
7628
46.2k
    }
7629
260
  }
7630
46.2k
7631
46.2k
  // Simplify, based on bits shifted out of the LHS.
7632
46.2k
  // TODO - support non-uniform vector shift amounts.
7633
46.2k
  if (N1C && 
SimplifyDemandedBits(SDValue(N, 0))38.8k
)
7634
2.42k
    return SDValue(N, 0);
7635
43.8k
7636
43.8k
  // If the sign bit is known to be zero, switch this to a SRL.
7637
43.8k
  if (DAG.SignBitIsZero(N0))
7638
119
    return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
7639
43.7k
7640
43.7k
  if (N1C && 
!N1C->isOpaque()36.4k
)
7641
36.4k
    if (SDValue NewSRA = visitShiftByConstant(N, N1C))
7642
12
      return NewSRA;
7643
43.7k
7644
43.7k
  return SDValue();
7645
43.7k
}
7646
7647
275k
SDValue DAGCombiner::visitSRL(SDNode *N) {
7648
275k
  SDValue N0 = N->getOperand(0);
7649
275k
  SDValue N1 = N->getOperand(1);
7650
275k
  if (SDValue V = DAG.simplifyShift(N0, N1))
7651
1.75k
    return V;
7652
273k
7653
273k
  EVT VT = N0.getValueType();
7654
273k
  unsigned OpSizeInBits = VT.getScalarSizeInBits();
7655
273k
7656
273k
  // fold vector ops
7657
273k
  if (VT.isVector())
7658
13.7k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
7659
1
      return FoldedVOp;
7660
273k
7661
273k
  ConstantSDNode *N1C = isConstOrConstSplat(N1);
7662
273k
7663
273k
  // fold (srl c1, c2) -> c1 >>u c2
7664
273k
  // TODO - support non-uniform vector shift amounts.
7665
273k
  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7666
273k
  if (N0C && 
N1C5.87k
&&
!N1C->isOpaque()2.54k
)
7667
2.54k
    return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
7668
271k
7669
271k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
7670
3
    return NewSel;
7671
271k
7672
271k
  // if (srl x, c) is known to be zero, return 0
7673
271k
  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
7674
239k
                                   APInt::getAllOnesValue(OpSizeInBits)))
7675
135
    return DAG.getConstant(0, SDLoc(N), VT);
7676
271k
7677
271k
  // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
7678
271k
  if (N0.getOpcode() == ISD::SRL) {
7679
6.29k
    auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7680
6.29k
                                          ConstantSDNode *RHS) {
7681
4.59k
      APInt c1 = LHS->getAPIntValue();
7682
4.59k
      APInt c2 = RHS->getAPIntValue();
7683
4.59k
      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7684
4.59k
      return (c1 + c2).uge(OpSizeInBits);
7685
4.59k
    };
7686
6.29k
    if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7687
3
      return DAG.getConstant(0, SDLoc(N), VT);
7688
6.29k
7689
6.29k
    auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7690
6.29k
                                       ConstantSDNode *RHS) {
7691
4.60k
      APInt c1 = LHS->getAPIntValue();
7692
4.60k
      APInt c2 = RHS->getAPIntValue();
7693
4.60k
      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7694
4.60k
      return (c1 + c2).ult(OpSizeInBits);
7695
4.60k
    };
7696
6.29k
    if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7697
4.58k
      SDLoc DL(N);
7698
4.58k
      EVT ShiftVT = N1.getValueType();
7699
4.58k
      SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7700
4.58k
      return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
7701
4.58k
    }
7702
266k
  }
7703
266k
7704
266k
  // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
7705
266k
  // TODO - support non-uniform vector shift amounts.
7706
266k
  if (N1C && 
N0.getOpcode() == ISD::TRUNCATE235k
&&
7707
266k
      
N0.getOperand(0).getOpcode() == ISD::SRL18.3k
) {
7708
5.70k
    if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
7709
5.70k
      uint64_t c1 = N001C->getZExtValue();
7710
5.70k
      uint64_t c2 = N1C->getZExtValue();
7711
5.70k
      EVT InnerShiftVT = N0.getOperand(0).getValueType();
7712
5.70k
      EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
7713
5.70k
      uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
7714
5.70k
      // This is only valid if the OpSizeInBits + c1 = size of inner shift.
7715
5.70k
      if (c1 + OpSizeInBits == InnerShiftSize) {
7716
4.10k
        SDLoc DL(N0);
7717
4.10k
        if (c1 + c2 >= InnerShiftSize)
7718
0
          return DAG.getConstant(0, DL, VT);
7719
4.10k
        return DAG.getNode(ISD::TRUNCATE, DL, VT,
7720
4.10k
                           DAG.getNode(ISD::SRL, DL, InnerShiftVT,
7721
4.10k
                                       N0.getOperand(0).getOperand(0),
7722
4.10k
                                       DAG.getConstant(c1 + c2, DL,
7723
4.10k
                                                       ShiftCountVT)));
7724
4.10k
      }
7725
5.70k
    }
7726
5.70k
  }
7727
262k
7728
262k
  // fold (srl (shl x, c), c) -> (and x, cst2)
7729
262k
  // TODO - (srl (shl x, c1), c2).
7730
262k
  if (N0.getOpcode() == ISD::SHL && 
N0.getOperand(1) == N13.27k
&&
7731
262k
      
isConstantOrConstantVector(N1, /* NoOpaques */ true)1.69k
) {
7732
509
    SDLoc DL(N);
7733
509
    SDValue Mask =
7734
509
        DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
7735
509
    AddToWorklist(Mask.getNode());
7736
509
    return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
7737
509
  }
7738
261k
7739
261k
  // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
7740
261k
  // TODO - support non-uniform vector shift amounts.
7741
261k
  if (N1C && 
N0.getOpcode() == ISD::ANY_EXTEND230k
) {
7742
766
    // Shifting in all undef bits?
7743
766
    EVT SmallVT = N0.getOperand(0).getValueType();
7744
766
    unsigned BitSize = SmallVT.getScalarSizeInBits();
7745
766
    if (N1C->getAPIntValue().uge(BitSize))
7746
0
      return DAG.getUNDEF(VT);
7747
766
7748
766
    if (!LegalTypes || 
TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)573
) {
7749
338
      uint64_t ShiftAmt = N1C->getZExtValue();
7750
338
      SDLoc DL0(N0);
7751
338
      SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
7752
338
                                       N0.getOperand(0),
7753
338
                          DAG.getConstant(ShiftAmt, DL0,
7754
338
                                          getShiftAmountTy(SmallVT)));
7755
338
      AddToWorklist(SmallShift.getNode());
7756
338
      APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
7757
338
      SDLoc DL(N);
7758
338
      return DAG.getNode(ISD::AND, DL, VT,
7759
338
                         DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
7760
338
                         DAG.getConstant(Mask, DL, VT));
7761
338
    }
7762
261k
  }
7763
261k
7764
261k
  // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
7765
261k
  // bit, which is unmodified by sra.
7766
261k
  if (N1C && 
N1C->getAPIntValue() == (OpSizeInBits - 1)230k
) {
7767
8.18k
    if (N0.getOpcode() == ISD::SRA)
7768
828
      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
7769
260k
  }
7770
260k
7771
260k
  // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
7772
260k
  if (N1C && 
N0.getOpcode() == ISD::CTLZ229k
&&
7773
260k
      
N1C->getAPIntValue() == Log2_32(OpSizeInBits)899
) {
7774
899
    KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
7775
899
7776
899
    // If any of the input bits are KnownOne, then the input couldn't be all
7777
899
    // zeros, thus the result of the srl will always be zero.
7778
899
    if (Known.One.getBoolValue()) 
return DAG.getConstant(0, SDLoc(N0), VT)0
;
7779
899
7780
899
    // If all of the bits input the to ctlz node are known to be zero, then
7781
899
    // the result of the ctlz is "32" and the result of the shift is one.
7782
899
    APInt UnknownBits = ~Known.Zero;
7783
899
    if (UnknownBits == 0) 
return DAG.getConstant(1, SDLoc(N0), VT)0
;
7784
899
7785
899
    // Otherwise, check to see if there is exactly one bit input to the ctlz.
7786
899
    if (UnknownBits.isPowerOf2()) {
7787
13
      // Okay, we know that only that the single bit specified by UnknownBits
7788
13
      // could be set on input to the CTLZ node. If this bit is set, the SRL
7789
13
      // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
7790
13
      // to an SRL/XOR pair, which is likely to simplify more.
7791
13
      unsigned ShAmt = UnknownBits.countTrailingZeros();
7792
13
      SDValue Op = N0.getOperand(0);
7793
13
7794
13
      if (ShAmt) {
7795
3
        SDLoc DL(N0);
7796
3
        Op = DAG.getNode(ISD::SRL, DL, VT, Op,
7797
3
                  DAG.getConstant(ShAmt, DL,
7798
3
                                  getShiftAmountTy(Op.getValueType())));
7799
3
        AddToWorklist(Op.getNode());
7800
3
      }
7801
13
7802
13
      SDLoc DL(N);
7803
13
      return DAG.getNode(ISD::XOR, DL, VT,
7804
13
                         Op, DAG.getConstant(1, DL, VT));
7805
13
    }
7806
260k
  }
7807
260k
7808
260k
  // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
7809
260k
  if (N1.getOpcode() == ISD::TRUNCATE &&
7810
260k
      
N1.getOperand(0).getOpcode() == ISD::AND5.84k
) {
7811
1.01k
    if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7812
1.00k
      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
7813
259k
  }
7814
259k
7815
259k
  // fold operands of srl based on knowledge that the low bits are not
7816
259k
  // demanded.
7817
259k
  // TODO - support non-uniform vector shift amounts.
7818
259k
  if (N1C && 
SimplifyDemandedBits(SDValue(N, 0))229k
)
7819
8.93k
    return SDValue(N, 0);
7820
250k
7821
250k
  if (N1C && 
!N1C->isOpaque()220k
)
7822
220k
    if (SDValue NewSRL = visitShiftByConstant(N, N1C))
7823
679
      return NewSRL;
7824
250k
7825
250k
  // Attempt to convert a srl of a load into a narrower zero-extending load.
7826
250k
  if (SDValue NarrowLoad = ReduceLoadWidth(N))
7827
162
    return NarrowLoad;
7828
249k
7829
249k
  // Here is a common situation. We want to optimize:
7830
249k
  //
7831
249k
  //   %a = ...
7832
249k
  //   %b = and i32 %a, 2
7833
249k
  //   %c = srl i32 %b, 1
7834
249k
  //   brcond i32 %c ...
7835
249k
  //
7836
249k
  // into
7837
249k
  //
7838
249k
  //   %a = ...
7839
249k
  //   %b = and %a, 2
7840
249k
  //   %c = setcc eq %b, 0
7841
249k
  //   brcond %c ...
7842
249k
  //
7843
249k
  // However when after the source operand of SRL is optimized into AND, the SRL
7844
249k
  // itself may not be optimized further. Look for it and add the BRCOND into
7845
249k
  // the worklist.
7846
249k
  if (N->hasOneUse()) {
7847
232k
    SDNode *Use = *N->use_begin();
7848
232k
    if (Use->getOpcode() == ISD::BRCOND)
7849
4
      AddToWorklist(Use);
7850
232k
    else if (Use->getOpcode() == ISD::TRUNCATE && 
Use->hasOneUse()50.3k
) {
7851
45.6k
      // Also look pass the truncate.
7852
45.6k
      Use = *Use->use_begin();
7853
45.6k
      if (Use->getOpcode() == ISD::BRCOND)
7854
2
        AddToWorklist(Use);
7855
45.6k
    }
7856
232k
  }
7857
249k
7858
249k
  return SDValue();
7859
249k
}
7860
7861
922
SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
7862
922
  EVT VT = N->getValueType(0);
7863
922
  SDValue N0 = N->getOperand(0);
7864
922
  SDValue N1 = N->getOperand(1);
7865
922
  SDValue N2 = N->getOperand(2);
7866
922
  bool IsFSHL = N->getOpcode() == ISD::FSHL;
7867
922
  unsigned BitWidth = VT.getScalarSizeInBits();
7868
922
7869
922
  // fold (fshl N0, N1, 0) -> N0
7870
922
  // fold (fshr N0, N1, 0) -> N1
7871
922
  if (isPowerOf2_32(BitWidth))
7872
922
    if (DAG.MaskedValueIsZero(
7873
922
            N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
7874
12
      return IsFSHL ? 
N06
:
N16
;
7875
910
7876
2.59k
  
auto IsUndefOrZero = [](SDValue V) 910
{
7877
2.59k
    return V.isUndef() || 
isNullOrNullSplat(V, /*AllowUndefs*/ true)2.57k
;
7878
2.59k
  };
7879
910
7880
910
  // TODO - support non-uniform vector shift amounts.
7881
910
  if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
7882
441
    EVT ShAmtTy = N2.getValueType();
7883
441
7884
441
    // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
7885
441
    if (Cst->getAPIntValue().uge(BitWidth)) {
7886
16
      uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
7887
16
      return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
7888
16
                         DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
7889
16
    }
7890
425
7891
425
    unsigned ShAmt = Cst->getZExtValue();
7892
425
    if (ShAmt == 0)
7893
0
      return IsFSHL ? N0 : N1;
7894
425
7895
425
    // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
7896
425
    // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
7897
425
    // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
7898
425
    // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
7899
425
    if (IsUndefOrZero(N0))
7900
8
      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
7901
8
                         DAG.getConstant(IsFSHL ? 
BitWidth - ShAmt4
:
ShAmt4
,
7902
8
                                         SDLoc(N), ShAmtTy));
7903
417
    if (IsUndefOrZero(N1))
7904
8
      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
7905
8
                         DAG.getConstant(IsFSHL ? 
ShAmt4
:
BitWidth - ShAmt4
,
7906
8
                                         SDLoc(N), ShAmtTy));
7907
878
  }
7908
878
7909
878
  // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
7910
878
  // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
7911
878
  // iff We know the shift amount is in range.
7912
878
  // TODO: when is it worth doing SUB(BW, N2) as well?
7913
878
  if (isPowerOf2_32(BitWidth)) {
7914
878
    APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
7915
878
    if (IsUndefOrZero(N0) && 
!IsFSHL12
&&
DAG.MaskedValueIsZero(N2, ~ModuloBits)6
)
7916
2
      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
7917
876
    if (IsUndefOrZero(N1) && 
IsFSHL12
&&
DAG.MaskedValueIsZero(N2, ~ModuloBits)6
)
7918
2
      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
7919
874
  }
7920
874
7921
874
  // fold (fshl N0, N0, N2) -> (rotl N0, N2)
7922
874
  // fold (fshr N0, N0, N2) -> (rotr N0, N2)
7923
874
  // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
7924
874
  // is legal as well we might be better off avoiding non-constant (BW - N2).
7925
874
  unsigned RotOpc = IsFSHL ? 
ISD::ROTL443
:
ISD::ROTR431
;
7926
874
  if (N0 == N1 && 
hasOperation(RotOpc, VT)90
)
7927
90
    return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
7928
784
7929
784
  // Simplify, based on bits shifted out of N0/N1.
7930
784
  if (SimplifyDemandedBits(SDValue(N, 0)))
7931
8
    return SDValue(N, 0);
7932
776
7933
776
  return SDValue();
7934
776
}
7935
7936
2.37k
SDValue DAGCombiner::visitABS(SDNode *N) {
7937
2.37k
  SDValue N0 = N->getOperand(0);
7938
2.37k
  EVT VT = N->getValueType(0);
7939
2.37k
7940
2.37k
  // fold (abs c1) -> c2
7941
2.37k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7942
0
    return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
7943
2.37k
  // fold (abs (abs x)) -> (abs x)
7944
2.37k
  if (N0.getOpcode() == ISD::ABS)
7945
12
    return N0;
7946
2.36k
  // fold (abs x) -> x iff not-negative
7947
2.36k
  if (DAG.SignBitIsZero(N0))
7948
8
    return N0;
7949
2.35k
  return SDValue();
7950
2.35k
}
7951
7952
2.28k
SDValue DAGCombiner::visitBSWAP(SDNode *N) {
7953
2.28k
  SDValue N0 = N->getOperand(0);
7954
2.28k
  EVT VT = N->getValueType(0);
7955
2.28k
7956
2.28k
  // fold (bswap c1) -> c2
7957
2.28k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7958
2
    return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
7959
2.28k
  // fold (bswap (bswap x)) -> x
7960
2.28k
  if (N0.getOpcode() == ISD::BSWAP)
7961
41
    return N0->getOperand(0);
7962
2.24k
  return SDValue();
7963
2.24k
}
7964
7965
772
SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
7966
772
  SDValue N0 = N->getOperand(0);
7967
772
  EVT VT = N->getValueType(0);
7968
772
7969
772
  // fold (bitreverse c1) -> c2
7970
772
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7971
0
    return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
7972
772
  // fold (bitreverse (bitreverse x)) -> x
7973
772
  if (N0.getOpcode() == ISD::BITREVERSE)
7974
6
    return N0.getOperand(0);
7975
766
  return SDValue();
7976
766
}
7977
7978
5.87k
SDValue DAGCombiner::visitCTLZ(SDNode *N) {
7979
5.87k
  SDValue N0 = N->getOperand(0);
7980
5.87k
  EVT VT = N->getValueType(0);
7981
5.87k
7982
5.87k
  // fold (ctlz c1) -> c2
7983
5.87k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7984
2
    return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
7985
5.87k
7986
5.87k
  // If the value is known never to be zero, switch to the undef version.
7987
5.87k
  if (!LegalOperations || 
TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)4.38k
) {
7988
1.48k
    if (DAG.isKnownNeverZero(N0))
7989
7
      return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7990
5.86k
  }
7991
5.86k
7992
5.86k
  return SDValue();
7993
5.86k
}
7994
7995
3.09k
SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
7996
3.09k
  SDValue N0 = N->getOperand(0);
7997
3.09k
  EVT VT = N->getValueType(0);
7998
3.09k
7999
3.09k
  // fold (ctlz_zero_undef c1) -> c2
8000
3.09k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8001
0
    return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8002
3.09k
  return SDValue();
8003
3.09k
}
8004
8005
1.03k
SDValue DAGCombiner::visitCTTZ(SDNode *N) {
8006
1.03k
  SDValue N0 = N->getOperand(0);
8007
1.03k
  EVT VT = N->getValueType(0);
8008
1.03k
8009
1.03k
  // fold (cttz c1) -> c2
8010
1.03k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8011
0
    return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
8012
1.03k
8013
1.03k
  // If the value is known never to be zero, switch to the undef version.
8014
1.03k
  if (!LegalOperations || 
TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)400
) {
8015
630
    if (DAG.isKnownNeverZero(N0))
8016
17
      return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8017
1.01k
  }
8018
1.01k
8019
1.01k
  return SDValue();
8020
1.01k
}
8021
8022
1.55k
SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
8023
1.55k
  SDValue N0 = N->getOperand(0);
8024
1.55k
  EVT VT = N->getValueType(0);
8025
1.55k
8026
1.55k
  // fold (cttz_zero_undef c1) -> c2
8027
1.55k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8028
0
    return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8029
1.55k
  return SDValue();
8030
1.55k
}
8031
8032
2.81k
SDValue DAGCombiner::visitCTPOP(SDNode *N) {
8033
2.81k
  SDValue N0 = N->getOperand(0);
8034
2.81k
  EVT VT = N->getValueType(0);
8035
2.81k
8036
2.81k
  // fold (ctpop c1) -> c2
8037
2.81k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8038
0
    return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
8039
2.81k
  return SDValue();
8040
2.81k
}
8041
8042
// FIXME: This should be checking for no signed zeros on individual operands, as
8043
// well as no nans.
8044
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
8045
                                         SDValue RHS,
8046
70.5k
                                         const TargetLowering &TLI) {
8047
70.5k
  const TargetOptions &Options = DAG.getTarget().Options;
8048
70.5k
  EVT VT = LHS.getValueType();
8049
70.5k
8050
70.5k
  return Options.NoSignedZerosFPMath && 
VT.isFloatingPoint()449
&&
8051
70.5k
         
TLI.isProfitableToCombineMinNumMaxNum(VT)445
&&
8052
70.5k
         
DAG.isKnownNeverNaN(LHS)445
&&
DAG.isKnownNeverNaN(RHS)234
;
8053
70.5k
}
8054
8055
/// Generate Min/Max node
8056
static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
8057
                                   SDValue RHS, SDValue True, SDValue False,
8058
                                   ISD::CondCode CC, const TargetLowering &TLI,
8059
204
                                   SelectionDAG &DAG) {
8060
204
  if (!(LHS == True && 
RHS == False164
) &&
!(40
LHS == False40
&&
RHS == True3
))
8061
37
    return SDValue();
8062
167
8063
167
  EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
8064
167
  switch (CC) {
8065
167
  case ISD::SETOLT:
8066
89
  case ISD::SETOLE:
8067
89
  case ISD::SETLT:
8068
89
  case ISD::SETLE:
8069
89
  case ISD::SETULT:
8070
89
  case ISD::SETULE: {
8071
89
    // Since it's known never nan to get here already, either fminnum or
8072
89
    // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
8073
89
    // expanded in terms of it.
8074
89
    unsigned IEEEOpcode = (LHS == True) ? 
ISD::FMINNUM_IEEE88
:
ISD::FMAXNUM_IEEE1
;
8075
89
    if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8076
64
      return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8077
25
8078
25
    unsigned Opcode = (LHS == True) ? 
ISD::FMINNUM24
:
ISD::FMAXNUM1
;
8079
25
    if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8080
6
      return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8081
19
    return SDValue();
8082
19
  }
8083
78
  case ISD::SETOGT:
8084
78
  case ISD::SETOGE:
8085
78
  case ISD::SETGT:
8086
78
  case ISD::SETGE:
8087
78
  case ISD::SETUGT:
8088
78
  case ISD::SETUGE: {
8089
78
    unsigned IEEEOpcode = (LHS == True) ? 
ISD::FMAXNUM_IEEE76
:
ISD::FMINNUM_IEEE2
;
8090
78
    if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8091
60
      return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8092
18
8093
18
    unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : 
ISD::FMINNUM0
;
8094
18
    if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8095
5
      return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8096
13
    return SDValue();
8097
13
  }
8098
13
  default:
8099
0
    return SDValue();
8100
167
  }
8101
167
}
8102
8103
83.2k
SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
8104
83.2k
  SDValue Cond = N->getOperand(0);
8105
83.2k
  SDValue N1 = N->getOperand(1);
8106
83.2k
  SDValue N2 = N->getOperand(2);
8107
83.2k
  EVT VT = N->getValueType(0);
8108
83.2k
  EVT CondVT = Cond.getValueType();
8109
83.2k
  SDLoc DL(N);
8110
83.2k
8111
83.2k
  if (!VT.isInteger())
8112
13.0k
    return SDValue();
8113
70.1k
8114
70.1k
  auto *C1 = dyn_cast<ConstantSDNode>(N1);
8115
70.1k
  auto *C2 = dyn_cast<ConstantSDNode>(N2);
8116
70.1k
  if (!C1 || 
!C218.4k
)
8117
59.3k
    return SDValue();
8118
10.8k
8119
10.8k
  // Only do this before legalization to avoid conflicting with target-specific
8120
10.8k
  // transforms in the other direction (create a select from a zext/sext). There
8121
10.8k
  // is also a target-independent combine here in DAGCombiner in the other
8122
10.8k
  // direction for (select Cond, -1, 0) when the condition is not i1.
8123
10.8k
  if (CondVT == MVT::i1 && 
!LegalOperations6.69k
) {
8124
5.02k
    if (C1->isNullValue() && 
C2->isOne()275
) {
8125
54
      // select Cond, 0, 1 --> zext (!Cond)
8126
54
      SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8127
54
      if (VT != MVT::i1)
8128
54
        NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
8129
54
      return NotCond;
8130
54
    }
8131
4.97k
    if (C1->isNullValue() && 
C2->isAllOnesValue()221
) {
8132
37
      // select Cond, 0, -1 --> sext (!Cond)
8133
37
      SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8134
37
      if (VT != MVT::i1)
8135
37
        NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
8136
37
      return NotCond;
8137
37
    }
8138
4.93k
    if (C1->isOne() && 
C2->isNullValue()347
) {
8139
105
      // select Cond, 1, 0 --> zext (Cond)
8140
105
      if (VT != MVT::i1)
8141
105
        Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8142
105
      return Cond;
8143
105
    }
8144
4.83k
    if (C1->isAllOnesValue() && 
C2->isNullValue()444
) {
8145
63
      // select Cond, -1, 0 --> sext (Cond)
8146
63
      if (VT != MVT::i1)
8147
63
        Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8148
63
      return Cond;
8149
63
    }
8150
4.76k
8151
4.76k
    // For any constants that differ by 1, we can transform the select into an
8152
4.76k
    // extend and add. Use a target hook because some targets may prefer to
8153
4.76k
    // transform in the other direction.
8154
4.76k
    if (TLI.convertSelectOfConstantsToMath(VT)) {
8155
1.58k
      if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
8156
171
        // select Cond, C1, C1-1 --> add (zext Cond), C1-1
8157
171
        if (VT != MVT::i1)
8158
171
          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8159
171
        return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8160
171
      }
8161
1.41k
      if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
8162
194
        // select Cond, C1, C1+1 --> add (sext Cond), C1+1
8163
194
        if (VT != MVT::i1)
8164
194
          Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8165
194
        return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8166
194
      }
8167
4.40k
    }
8168
4.40k
8169
4.40k
    return SDValue();
8170
4.40k
  }
8171
5.81k
8172
5.81k
  // fold (select Cond, 0, 1) -> (xor Cond, 1)
8173
5.81k
  // We can't do this reliably if integer based booleans have different contents
8174
5.81k
  // to floating point based booleans. This is because we can't tell whether we
8175
5.81k
  // have an integer-based boolean or a floating-point-based boolean unless we
8176
5.81k
  // can find the SETCC that produced it and inspect its operands. This is
8177
5.81k
  // fairly easy if C is the SETCC node, but it can potentially be
8178
5.81k
  // undiscoverable (or not reasonably discoverable). For example, it could be
8179
5.81k
  // in another basic block or it could require searching a complicated
8180
5.81k
  // expression.
8181
5.81k
  if (CondVT.isInteger() &&
8182
5.81k
      TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
8183
5.81k
          TargetLowering::ZeroOrOneBooleanContent &&
8184
5.81k
      TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
8185
4.00k
          TargetLowering::ZeroOrOneBooleanContent &&
8186
5.81k
      
C1->isNullValue()4.00k
&&
C2->isOne()535
) {
8187
0
    SDValue NotCond =
8188
0
        DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
8189
0
    if (VT.bitsEq(CondVT))
8190
0
      return NotCond;
8191
0
    return DAG.getZExtOrTrunc(NotCond, DL, VT);
8192
0
  }
8193
5.81k
8194
5.81k
  return SDValue();
8195
5.81k
}
8196
8197
83.3k
SDValue DAGCombiner::visitSELECT(SDNode *N) {
8198
83.3k
  SDValue N0 = N->getOperand(0);
8199
83.3k
  SDValue N1 = N->getOperand(1);
8200
83.3k
  SDValue N2 = N->getOperand(2);
8201
83.3k
  EVT VT = N->getValueType(0);
8202
83.3k
  EVT VT0 = N0.getValueType();
8203
83.3k
  SDLoc DL(N);
8204
83.3k
  SDNodeFlags Flags = N->getFlags();
8205
83.3k
8206
83.3k
  if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8207
129
    return V;
8208
83.2k
8209
83.2k
  // fold (select X, X, Y) -> (or X, Y)
8210
83.2k
  // fold (select X, 1, Y) -> (or C, Y)
8211
83.2k
  if (VT == VT0 && 
VT == MVT::i19.06k
&&
(91
N0 == N191
||
isOneConstant(N1)87
))
8212
9
    return DAG.getNode(ISD::OR, DL, VT, N0, N2);
8213
83.2k
8214
83.2k
  if (SDValue V = foldSelectOfConstants(N))
8215
624
    return V;
8216
82.6k
8217
82.6k
  // fold (select C, 0, X) -> (and (not C), X)
8218
82.6k
  if (VT == VT0 && 
VT == MVT::i19.05k
&&
isNullConstant(N1)82
) {
8219
7
    SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8220
7
    AddToWorklist(NOTNode.getNode());
8221
7
    return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
8222
7
  }
8223
82.6k
  // fold (select C, X, 1) -> (or (not C), X)
8224
82.6k
  if (VT == VT0 && 
VT == MVT::i19.04k
&&
isOneConstant(N2)75
) {
8225
2
    SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8226
2
    AddToWorklist(NOTNode.getNode());
8227
2
    return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
8228
2
  }
8229
82.6k
  // fold (select X, Y, X) -> (and X, Y)
8230
82.6k
  // fold (select X, Y, 0) -> (and X, Y)
8231
82.6k
  if (VT == VT0 && 
VT == MVT::i19.04k
&&
(73
N0 == N273
||
isNullConstant(N2)71
))
8232
15
    return DAG.getNode(ISD::AND, DL, VT, N0, N1);
8233
82.5k
8234
82.5k
  // If we can fold this based on the true/false value, do so.
8235
82.5k
  if (SimplifySelectOps(N, N1, N2))
8236
149
    return SDValue(N, 0); // Don't revisit N.
8237
82.4k
8238
82.4k
  if (VT0 == MVT::i1) {
8239
57.5k
    // The code in this block deals with the following 2 equivalences:
8240
57.5k
    //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
8241
57.5k
    //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
8242
57.5k
    // The target can specify its preferred form with the
8243
57.5k
    // shouldNormalizeToSelectSequence() callback. However we always transform
8244
57.5k
    // to the right anyway if we find the inner select exists in the DAG anyway
8245
57.5k
    // and we always transform to the left side if we know that we can further
8246
57.5k
    // optimize the combination of the conditions.
8247
57.5k
    bool normalizeToSequence =
8248
57.5k
        TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
8249
57.5k
    // select (and Cond0, Cond1), X, Y
8250
57.5k
    //   -> select Cond0, (select Cond1, X, Y), Y
8251
57.5k
    if (N0->getOpcode() == ISD::AND && 
N0->hasOneUse()1.51k
) {
8252
871
      SDValue Cond0 = N0->getOperand(0);
8253
871
      SDValue Cond1 = N0->getOperand(1);
8254
871
      SDValue InnerSelect =
8255
871
          DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
8256
871
      if (normalizeToSequence || 
!InnerSelect.use_empty()679
)
8257
192
        return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
8258
192
                           InnerSelect, N2, Flags);
8259
679
      // Cleanup on failure.
8260
679
      if (InnerSelect.use_empty())
8261
679
        recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8262
679
    }
8263
57.5k
    // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
8264
57.5k
    
if (57.3k
N0->getOpcode() == ISD::OR57.3k
&&
N0->hasOneUse()275
) {
8265
256
      SDValue Cond0 = N0->getOperand(0);
8266
256
      SDValue Cond1 = N0->getOperand(1);
8267
256
      SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
8268
256
                                        Cond1, N1, N2, Flags);
8269
256
      if (normalizeToSequence || 
!InnerSelect.use_empty()146
)
8270
111
        return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
8271
111
                           InnerSelect, Flags);
8272
145
      // Cleanup on failure.
8273
145
      if (InnerSelect.use_empty())
8274
145
        recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8275
145
    }
8276
57.3k
8277
57.3k
    // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
8278
57.3k
    
if (57.2k
N1->getOpcode() == ISD::SELECT57.2k
&&
N1->hasOneUse()3.23k
) {
8279
2.37k
      SDValue N1_0 = N1->getOperand(0);
8280
2.37k
      SDValue N1_1 = N1->getOperand(1);
8281
2.37k
      SDValue N1_2 = N1->getOperand(2);
8282
2.37k
      if (N1_2 == N2 && 
N0.getValueType() == N1_0.getValueType()470
) {
8283
469
        // Create the actual and node if we can generate good code for it.
8284
469
        if (!normalizeToSequence) {
8285
160
          SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
8286
160
          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
8287
160
                             N2, Flags);
8288
160
        }
8289
309
        // Otherwise see if we can optimize the "and" to a better pattern.
8290
309
        if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
8291
11
          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
8292
11
                             N2, Flags);
8293
11
        }
8294
57.0k
      }
8295
2.37k
    }
8296
57.0k
    // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
8297
57.0k
    if (N2->getOpcode() == ISD::SELECT && 
N2->hasOneUse()3.22k
) {
8298
1.93k
      SDValue N2_0 = N2->getOperand(0);
8299
1.93k
      SDValue N2_1 = N2->getOperand(1);
8300
1.93k
      SDValue N2_2 = N2->getOperand(2);
8301
1.93k
      if (N2_1 == N1 && 
N0.getValueType() == N2_0.getValueType()215
) {
8302
215
        // Create the actual or node if we can generate good code for it.
8303
215
        if (!normalizeToSequence) {
8304
32
          SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
8305
32
          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, 
8306
32
                             N2_2, Flags);
8307
32
        }
8308
183
        // Otherwise see if we can optimize to a better pattern.
8309
183
        if (SDValue Combined = visitORLike(N0, N2_0, N))
8310
1
          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
8311
1
                             N2_2, Flags);
8312
81.9k
      }
8313
1.93k
    }
8314
57.0k
  }
8315
81.9k
8316
81.9k
  // select (not Cond), N1, N2 -> select Cond, N2, N1
8317
81.9k
  if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
8318
196
    SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
8319
196
    SelectOp->setFlags(Flags);
8320
196
    return SelectOp;
8321
196
  }
8322
81.7k
8323
81.7k
  // Fold selects based on a setcc into other things, such as min/max/abs.
8324
81.7k
  if (N0.getOpcode() == ISD::SETCC) {
8325
69.9k
    SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
8326
69.9k
    ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8327
69.9k
8328
69.9k
    // select (fcmp lt x, y), x, y -> fminnum x, y
8329
69.9k
    // select (fcmp gt x, y), x, y -> fmaxnum x, y
8330
69.9k
    //
8331
69.9k
    // This is OK if we don't care what happens if either operand is a NaN.
8332
69.9k
    if (N0.hasOneUse() && 
isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI)56.7k
)
8333
158
      if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
8334
121
                                                CC, TLI, DAG))
8335
121
        return FMinMax;
8336
69.8k
8337
69.8k
    // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
8338
69.8k
    // This is conservatively limited to pre-legal-operations to give targets
8339
69.8k
    // a chance to reverse the transform if they want to do that. Also, it is
8340
69.8k
    // unlikely that the pattern would be formed late, so it's probably not
8341
69.8k
    // worth going through the other checks.
8342
69.8k
    if (!LegalOperations && 
TLI.isOperationLegalOrCustom(ISD::UADDO, VT)58.1k
&&
8343
69.8k
        
CC == ISD::SETUGT42.1k
&&
N0.hasOneUse()7.19k
&&
isAllOnesConstant(N1)6.34k
&&
8344
69.8k
        
N2.getOpcode() == ISD::ADD103
&&
Cond0 == N2.getOperand(0)92
) {
8345
28
      auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
8346
28
      auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
8347
28
      if (C && 
NotC10
&&
C->getAPIntValue() == ~NotC->getAPIntValue()10
) {
8348
10
        // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
8349
10
        // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
8350
10
        //
8351
10
        // The IR equivalent of this transform would have this form:
8352
10
        //   %a = add %x, C
8353
10
        //   %c = icmp ugt %x, ~C
8354
10
        //   %r = select %c, -1, %a
8355
10
        //   =>
8356
10
        //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
8357
10
        //   %u0 = extractvalue %u, 0
8358
10
        //   %u1 = extractvalue %u, 1
8359
10
        //   %r = select %u1, -1, %u0
8360
10
        SDVTList VTs = DAG.getVTList(VT, VT0);
8361
10
        SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
8362
10
        return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
8363
10
      }
8364
69.8k
    }
8365
69.8k
8366
69.8k
    if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
8367
69.8k
        
(69.4k
!LegalOperations69.4k
&&
8368
69.4k
         
TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)57.7k
)) {
8369
26.6k
      // Any flags available in a select/setcc fold will be on the setcc as they
8370
26.6k
      // migrated from fcmp
8371
26.6k
      Flags = N0.getNode()->getFlags();
8372
26.6k
      SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
8373
26.6k
                                       N2, N0.getOperand(2));
8374
26.6k
      SelectNode->setFlags(Flags);
8375
26.6k
      return SelectNode;
8376
26.6k
    }
8377
43.1k
8378
43.1k
    return SimplifySelect(DL, N0, N1, N2);
8379
43.1k
  }
8380
11.7k
8381
11.7k
  return SDValue();
8382
11.7k
}
8383
8384
static
8385
36
std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
8386
36
  SDLoc DL(N);
8387
36
  EVT LoVT, HiVT;
8388
36
  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
8389
36
8390
36
  // Split the inputs.
8391
36
  SDValue Lo, Hi, LL, LH, RL, RH;
8392
36
  std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
8393
36
  std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
8394
36
8395
36
  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
8396
36
  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
8397
36
8398
36
  return std::make_pair(Lo, Hi);
8399
36
}
8400
8401
// This function assumes all the vselect's arguments are CONCAT_VECTOR
8402
// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
8403
45
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
8404
45
  SDLoc DL(N);
8405
45
  SDValue Cond = N->getOperand(0);
8406
45
  SDValue LHS = N->getOperand(1);
8407
45
  SDValue RHS = N->getOperand(2);
8408
45
  EVT VT = N->getValueType(0);
8409
45
  int NumElems = VT.getVectorNumElements();
8410
45
  assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
8411
45
         RHS.getOpcode() == ISD::CONCAT_VECTORS &&
8412
45
         Cond.getOpcode() == ISD::BUILD_VECTOR);
8413
45
8414
45
  // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
8415
45
  // binary ones here.
8416
45
  if (LHS->getNumOperands() != 2 || 
RHS->getNumOperands() != 234
)
8417
11
    return SDValue();
8418
34
8419
34
  // We're sure we have an even number of elements due to the
8420
34
  // concat_vectors we have as arguments to vselect.
8421
34
  // Skip BV elements until we find one that's not an UNDEF
8422
34
  // After we find an UNDEF element, keep looping until we get to half the
8423
34
  // length of the BV and see if all the non-undef nodes are the same.
8424
34
  ConstantSDNode *BottomHalf = nullptr;
8425
116
  for (int i = 0; i < NumElems / 2; 
++i82
) {
8426
101
    if (Cond->getOperand(i)->isUndef())
8427
0
      continue;
8428
101
8429
101
    if (BottomHalf == nullptr)
8430
34
      BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
8431
67
    else if (Cond->getOperand(i).getNode() != BottomHalf)
8432
19
      return SDValue();
8433
101
  }
8434
34
8435
34
  // Do the same for the second half of the BuildVector
8436
34
  ConstantSDNode *TopHalf = nullptr;
8437
66
  for (int i = NumElems / 2; i < NumElems; 
++i51
) {
8438
54
    if (Cond->getOperand(i)->isUndef())
8439
0
      continue;
8440
54
8441
54
    if (TopHalf == nullptr)
8442
15
      TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
8443
39
    else if (Cond->getOperand(i).getNode() != TopHalf)
8444
3
      return SDValue();
8445
54
  }
8446
15
8447
15
  assert(TopHalf && BottomHalf &&
8448
12
         "One half of the selector was all UNDEFs and the other was all the "
8449
12
         "same value. This should have been addressed before this function.");
8450
12
  return DAG.getNode(
8451
12
      ISD::CONCAT_VECTORS, DL, VT,
8452
12
      BottomHalf->isNullValue() ? 
RHS->getOperand(0)8
:
LHS->getOperand(0)4
,
8453
12
      TopHalf->isNullValue() ? 
RHS->getOperand(1)4
:
LHS->getOperand(1)8
);
8454
15
}
8455
8456
293
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
8457
293
  MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
8458
293
  SDValue Mask = MSC->getMask();
8459
293
  SDValue Data = MSC->getValue();
8460
293
  SDValue Chain = MSC->getChain();
8461
293
  SDLoc DL(N);
8462
293
8463
293
  // Zap scatters with a zero mask.
8464
293
  if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8465
6
    return Chain;
8466
287
8467
287
  if (Level >= AfterLegalizeTypes)
8468
166
    return SDValue();
8469
121
8470
121
  // If the MSCATTER data type requires splitting and the mask is provided by a
8471
121
  // SETCC, then split both nodes and its operands before legalization. This
8472
121
  // prevents the type legalizer from unrolling SETCC into scalar comparisons
8473
121
  // and enables future optimizations (e.g. min/max pattern matching on X86).
8474
121
  if (Mask.getOpcode() != ISD::SETCC)
8475
96
    return SDValue();
8476
25
8477
25
  // Check if any splitting is required.
8478
25
  if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
8479
25
      TargetLowering::TypeSplitVector)
8480
20
    return SDValue();
8481
5
  SDValue MaskLo, MaskHi;
8482
5
  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8483
5
8484
5
  EVT LoVT, HiVT;
8485
5
  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
8486
5
8487
5
  EVT MemoryVT = MSC->getMemoryVT();
8488
5
  unsigned Alignment = MSC->getOriginalAlignment();
8489
5
8490
5
  EVT LoMemVT, HiMemVT;
8491
5
  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8492
5
8493
5
  SDValue DataLo, DataHi;
8494
5
  std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
8495
5
8496
5
  SDValue Scale = MSC->getScale();
8497
5
  SDValue BasePtr = MSC->getBasePtr();
8498
5
  SDValue IndexLo, IndexHi;
8499
5
  std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
8500
5
8501
5
  MachineMemOperand *MMO = DAG.getMachineFunction().
8502
5
    getMachineMemOperand(MSC->getPointerInfo(),
8503
5
                          MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
8504
5
                          Alignment, MSC->getAAInfo(), MSC->getRanges());
8505
5
8506
5
  SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
8507
5
  SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
8508
5
                                    DataLo.getValueType(), DL, OpsLo, MMO);
8509
5
8510
5
  // The order of the Scatter operation after split is well defined. The "Hi"
8511
5
  // part comes after the "Lo". So these two operations should be chained one
8512
5
  // after another.
8513
5
  SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
8514
5
  return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
8515
5
                              DL, OpsHi, MMO);
8516
5
}
8517
8518
1.69k
SDValue DAGCombiner::visitMSTORE(SDNode *N) {
8519
1.69k
  MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
8520
1.69k
  SDValue Mask = MST->getMask();
8521
1.69k
  SDValue Data = MST->getValue();
8522
1.69k
  SDValue Chain = MST->getChain();
8523
1.69k
  EVT VT = Data.getValueType();
8524
1.69k
  SDLoc DL(N);
8525
1.69k
8526
1.69k
  // Zap masked stores with a zero mask.
8527
1.69k
  if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8528
5
    return Chain;
8529
1.69k
8530
1.69k
  if (Level >= AfterLegalizeTypes)
8531
1.00k
    return SDValue();
8532
686
8533
686
  // If the MSTORE data type requires splitting and the mask is provided by a
8534
686
  // SETCC, then split both nodes and its operands before legalization. This
8535
686
  // prevents the type legalizer from unrolling SETCC into scalar comparisons
8536
686
  // and enables future optimizations (e.g. min/max pattern matching on X86).
8537
686
  if (Mask.getOpcode() == ISD::SETCC) {
8538
365
    // Check if any splitting is required.
8539
365
    if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8540
365
        TargetLowering::TypeSplitVector)
8541
358
      return SDValue();
8542
7
8543
7
    SDValue MaskLo, MaskHi, Lo, Hi;
8544
7
    std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8545
7
8546
7
    SDValue Ptr   = MST->getBasePtr();
8547
7
8548
7
    EVT MemoryVT = MST->getMemoryVT();
8549
7
    unsigned Alignment = MST->getOriginalAlignment();
8550
7
8551
7
    EVT LoMemVT, HiMemVT;
8552
7
    std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8553
7
8554
7
    SDValue DataLo, DataHi;
8555
7
    std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
8556
7
8557
7
    MachineMemOperand *MMO = DAG.getMachineFunction().
8558
7
      getMachineMemOperand(MST->getPointerInfo(),
8559
7
                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
8560
7
                           Alignment, MST->getAAInfo(), MST->getRanges());
8561
7
8562
7
    Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
8563
7
                            MST->isTruncatingStore(),
8564
7
                            MST->isCompressingStore());
8565
7
8566
7
    Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
8567
7
                                     MST->isCompressingStore());
8568
7
    unsigned HiOffset = LoMemVT.getStoreSize();
8569
7
8570
7
    MMO = DAG.getMachineFunction().getMachineMemOperand(
8571
7
        MST->getPointerInfo().getWithOffset(HiOffset),
8572
7
        MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment,
8573
7
        MST->getAAInfo(), MST->getRanges());
8574
7
8575
7
    Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
8576
7
                            MST->isTruncatingStore(),
8577
7
                            MST->isCompressingStore());
8578
7
8579
7
    AddToWorklist(Lo.getNode());
8580
7
    AddToWorklist(Hi.getNode());
8581
7
8582
7
    return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8583
7
  }
8584
321
  return SDValue();
8585
321
}
8586
8587
998
SDValue DAGCombiner::visitMGATHER(SDNode *N) {
8588
998
  MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
8589
998
  SDValue Mask = MGT->getMask();
8590
998
  SDLoc DL(N);
8591
998
8592
998
  // Zap gathers with a zero mask.
8593
998
  if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8594
4
    return CombineTo(N, MGT->getPassThru(), MGT->getChain());
8595
994
8596
994
  if (Level >= AfterLegalizeTypes)
8597
499
    return SDValue();
8598
495
8599
495
  // If the MGATHER result requires splitting and the mask is provided by a
8600
495
  // SETCC, then split both nodes and its operands before legalization. This
8601
495
  // prevents the type legalizer from unrolling SETCC into scalar comparisons
8602
495
  // and enables future optimizations (e.g. min/max pattern matching on X86).
8603
495
8604
495
  if (Mask.getOpcode() != ISD::SETCC)
8605
462
    return SDValue();
8606
33
8607
33
  EVT VT = N->getValueType(0);
8608
33
8609
33
  // Check if any splitting is required.
8610
33
  if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8611
33
      TargetLowering::TypeSplitVector)
8612
27
    return SDValue();
8613
6
8614
6
  SDValue MaskLo, MaskHi, Lo, Hi;
8615
6
  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8616
6
8617
6
  SDValue PassThru = MGT->getPassThru();
8618
6
  SDValue PassThruLo, PassThruHi;
8619
6
  std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
8620
6
8621
6
  EVT LoVT, HiVT;
8622
6
  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
8623
6
8624
6
  SDValue Chain = MGT->getChain();
8625
6
  EVT MemoryVT = MGT->getMemoryVT();
8626
6
  unsigned Alignment = MGT->getOriginalAlignment();
8627
6
8628
6
  EVT LoMemVT, HiMemVT;
8629
6
  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8630
6
8631
6
  SDValue Scale = MGT->getScale();
8632
6
  SDValue BasePtr = MGT->getBasePtr();
8633
6
  SDValue Index = MGT->getIndex();
8634
6
  SDValue IndexLo, IndexHi;
8635
6
  std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
8636
6
8637
6
  MachineMemOperand *MMO = DAG.getMachineFunction().
8638
6
    getMachineMemOperand(MGT->getPointerInfo(),
8639
6
                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
8640
6
                          Alignment, MGT->getAAInfo(), MGT->getRanges());
8641
6
8642
6
  SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
8643
6
  Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
8644
6
                           MMO);
8645
6
8646
6
  SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
8647
6
  Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
8648
6
                           MMO);
8649
6
8650
6
  AddToWorklist(Lo.getNode());
8651
6
  AddToWorklist(Hi.getNode());
8652
6
8653
6
  // Build a factor node to remember that this load is independent of the
8654
6
  // other one.
8655
6
  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8656
6
                      Hi.getValue(1));
8657
6
8658
6
  // Legalized the chain result - switch anything that used the old chain to
8659
6
  // use the new one.
8660
6
  DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
8661
6
8662
6
  SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8663
6
8664
6
  SDValue RetOps[] = { GatherRes, Chain };
8665
6
  return DAG.getMergeValues(RetOps, DL);
8666
6
}
8667
8668
1.68k
SDValue DAGCombiner::visitMLOAD(SDNode *N) {
8669
1.68k
  MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
8670
1.68k
  SDValue Mask = MLD->getMask();
8671
1.68k
  SDLoc DL(N);
8672
1.68k
8673
1.68k
  // Zap masked loads with a zero mask.
8674
1.68k
  if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8675
7
    return CombineTo(N, MLD->getPassThru(), MLD->getChain());
8676
1.67k
8677
1.67k
  if (Level >= AfterLegalizeTypes)
8678
945
    return SDValue();
8679
732
8680
732
  // If the MLOAD result requires splitting and the mask is provided by a
8681
732
  // SETCC, then split both nodes and its operands before legalization. This
8682
732
  // prevents the type legalizer from unrolling SETCC into scalar comparisons
8683
732
  // and enables future optimizations (e.g. min/max pattern matching on X86).
8684
732
  if (Mask.getOpcode() == ISD::SETCC) {
8685
163
    EVT VT = N->getValueType(0);
8686
163
8687
163
    // Check if any splitting is required.
8688
163
    if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8689
163
        TargetLowering::TypeSplitVector)
8690
145
      return SDValue();
8691
18
8692
18
    SDValue MaskLo, MaskHi, Lo, Hi;
8693
18
    std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8694
18
8695
18
    SDValue PassThru = MLD->getPassThru();
8696
18
    SDValue PassThruLo, PassThruHi;
8697
18
    std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
8698
18
8699
18
    EVT LoVT, HiVT;
8700
18
    std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
8701
18
8702
18
    SDValue Chain = MLD->getChain();
8703
18
    SDValue Ptr   = MLD->getBasePtr();
8704
18
    EVT MemoryVT = MLD->getMemoryVT();
8705
18
    unsigned Alignment = MLD->getOriginalAlignment();
8706
18
8707
18
    EVT LoMemVT, HiMemVT;
8708
18
    std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8709
18
8710
18
    MachineMemOperand *MMO = DAG.getMachineFunction().
8711
18
    getMachineMemOperand(MLD->getPointerInfo(),
8712
18
                         MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
8713
18
                         Alignment, MLD->getAAInfo(), MLD->getRanges());
8714
18
8715
18
    Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
8716
18
                           MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
8717
18
8718
18
    Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
8719
18
                                     MLD->isExpandingLoad());
8720
18
    unsigned HiOffset = LoMemVT.getStoreSize();
8721
18
8722
18
    MMO = DAG.getMachineFunction().getMachineMemOperand(
8723
18
        MLD->getPointerInfo().getWithOffset(HiOffset),
8724
18
        MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment,
8725
18
        MLD->getAAInfo(), MLD->getRanges());
8726
18
8727
18
    Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
8728
18
                           MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
8729
18
8730
18
    AddToWorklist(Lo.getNode());
8731
18
    AddToWorklist(Hi.getNode());
8732
18
8733
18
    // Build a factor node to remember that this load is independent of the
8734
18
    // other one.
8735
18
    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8736
18
                        Hi.getValue(1));
8737
18
8738
18
    // Legalized the chain result - switch anything that used the old chain to
8739
18
    // use the new one.
8740
18
    DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
8741
18
8742
18
    SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8743
18
8744
18
    SDValue RetOps[] = { LoadRes, Chain };
8745
18
    return DAG.getMergeValues(RetOps, DL);
8746
18
  }
8747
569
  return SDValue();
8748
569
}
8749
8750
/// A vector select of 2 constant vectors can be simplified to math/logic to
8751
/// avoid a variable select instruction and possibly avoid constant loads.
8752
47.1k
SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
8753
47.1k
  SDValue Cond = N->getOperand(0);
8754
47.1k
  SDValue N1 = N->getOperand(1);
8755
47.1k
  SDValue N2 = N->getOperand(2);
8756
47.1k
  EVT VT = N->getValueType(0);
8757
47.1k
  if (!Cond.hasOneUse() || 
Cond.getScalarValueSizeInBits() != 139.8k
||
8758
47.1k
      
!TLI.convertSelectOfConstantsToMath(VT)28.7k
||
8759
47.1k
      
!ISD::isBuildVectorOfConstantSDNodes(N1.getNode())2.23k
||
8760
47.1k
      
!ISD::isBuildVectorOfConstantSDNodes(N2.getNode())687
)
8761
46.8k
    return SDValue();
8762
280
8763
280
  // Check if we can use the condition value to increment/decrement a single
8764
280
  // constant value. This simplifies a select to an add and removes a constant
8765
280
  // load/materialization from the general case.
8766
280
  bool AllAddOne = true;
8767
280
  bool AllSubOne = true;
8768
280
  unsigned Elts = VT.getVectorNumElements();
8769
3.21k
  for (unsigned i = 0; i != Elts; 
++i2.93k
) {
8770
2.93k
    SDValue N1Elt = N1.getOperand(i);
8771
2.93k
    SDValue N2Elt = N2.getOperand(i);
8772
2.93k
    if (N1Elt.isUndef() || 
N2Elt.isUndef()2.90k
)
8773
28
      continue;
8774
2.90k
8775
2.90k
    const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
8776
2.90k
    const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
8777
2.90k
    if (C1 != C2 + 1)
8778
2.81k
      AllAddOne = false;
8779
2.90k
    if (C1 != C2 - 1)
8780
2.76k
      AllSubOne = false;
8781
2.90k
  }
8782
280
8783
280
  // Further simplifications for the extra-special cases where the constants are
8784
280
  // all 0 or all -1 should be implemented as folds of these patterns.
8785
280
  SDLoc DL(N);
8786
280
  if (AllAddOne || 
AllSubOne262
) {
8787
40
    // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
8788
40
    // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
8789
40
    auto ExtendOpcode = AllAddOne ? 
ISD::ZERO_EXTEND18
:
ISD::SIGN_EXTEND22
;
8790
40
    SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
8791
40
    return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
8792
40
  }
8793
240
8794
240
  // The general case for select-of-constants:
8795
240
  // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
8796
240
  // ...but that only makes sense if a vselect is slower than 2 logic ops, so
8797
240
  // leave that to a machine-specific pass.
8798
240
  return SDValue();
8799
240
}
8800
8801
48.1k
SDValue DAGCombiner::visitVSELECT(SDNode *N) {
8802
48.1k
  SDValue N0 = N->getOperand(0);
8803
48.1k
  SDValue N1 = N->getOperand(1);
8804
48.1k
  SDValue N2 = N->getOperand(2);
8805
48.1k
  EVT VT = N->getValueType(0);
8806
48.1k
  SDLoc DL(N);
8807
48.1k
8808
48.1k
  if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8809
43
    return V;
8810
48.1k
8811
48.1k
  // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
8812
48.1k
  if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
8813
44
    return DAG.getSelect(DL, VT, F, N2, N1);
8814
48.0k
8815
48.0k
  // Canonicalize integer abs.
8816
48.0k
  // vselect (setg[te] X,  0),  X, -X ->
8817
48.0k
  // vselect (setgt    X, -1),  X, -X ->
8818
48.0k
  // vselect (setl[te] X,  0), -X,  X ->
8819
48.0k
  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
8820
48.0k
  if (N0.getOpcode() == ISD::SETCC) {
8821
13.9k
    SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
8822
13.9k
    ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8823
13.9k
    bool isAbs = false;
8824
13.9k
    bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
8825
13.9k
8826
13.9k
    if (((RHSIsAllZeros && 
(6.51k
CC == ISD::SETGT6.51k
||
CC == ISD::SETGE6.43k
)) ||
8827
13.9k
         
(13.8k
ISD::isBuildVectorAllOnes(RHS.getNode())13.8k
&&
CC == ISD::SETGT180
)) &&
8828
13.9k
        
N1 == LHS89
&&
N2.getOpcode() == ISD::SUB40
&&
N1 == N2.getOperand(1)0
)
8829
0
      isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
8830
13.9k
    else if ((RHSIsAllZeros && 
(6.51k
CC == ISD::SETLT6.51k
||
CC == ISD::SETLE6.28k
)) &&
8831
13.9k
             
N2 == LHS276
&&
N1.getOpcode() == ISD::SUB43
&&
N2 == N1.getOperand(1)43
)
8832
43
      isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8833
13.9k
8834
13.9k
    if (isAbs) {
8835
43
      EVT VT = LHS.getValueType();
8836
43
      if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
8837
21
        return DAG.getNode(ISD::ABS, DL, VT, LHS);
8838
22
8839
22
      SDValue Shift = DAG.getNode(
8840
22
          ISD::SRA, DL, VT, LHS,
8841
22
          DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
8842
22
      SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
8843
22
      AddToWorklist(Shift.getNode());
8844
22
      AddToWorklist(Add.getNode());
8845
22
      return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
8846
22
    }
8847
13.8k
8848
13.8k
    // vselect x, y (fcmp lt x, y) -> fminnum x, y
8849
13.8k
    // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
8850
13.8k
    //
8851
13.8k
    // This is OK if we don't care about what happens if either operand is a
8852
13.8k
    // NaN.
8853
13.8k
    //
8854
13.8k
    if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0),
8855
13.7k
                                                       N0.getOperand(1), TLI)) {
8856
46
      if (SDValue FMinMax = combineMinNumMaxNum(
8857
14
              DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
8858
14
        return FMinMax;
8859
13.8k
    }
8860
13.8k
8861
13.8k
    // If this select has a condition (setcc) with narrower operands than the
8862
13.8k
    // select, try to widen the compare to match the select width.
8863
13.8k
    // TODO: This should be extended to handle any constant.
8864
13.8k
    // TODO: This could be extended to handle non-loading patterns, but that
8865
13.8k
    //       requires thorough testing to avoid regressions.
8866
13.8k
    if (isNullOrNullSplat(RHS)) {
8867
4.06k
      EVT NarrowVT = LHS.getValueType();
8868
4.06k
      EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
8869
4.06k
      EVT SetCCVT = getSetCCResultType(LHS.getValueType());
8870
4.06k
      unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
8871
4.06k
      unsigned WideWidth = WideVT.getScalarSizeInBits();
8872
4.06k
      bool IsSigned = isSignedIntSetCC(CC);
8873
4.06k
      auto LoadExtOpcode = IsSigned ? 
ISD::SEXTLOAD305
:
ISD::ZEXTLOAD3.75k
;
8874
4.06k
      if (LHS.getOpcode() == ISD::LOAD && 
LHS.hasOneUse()189
&&
8875
4.06k
          
SetCCWidth != 198
&&
SetCCWidth < WideWidth59
&&
8876
4.06k
          
TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT)32
&&
8877
4.06k
          
TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)16
) {
8878
15
        // Both compare operands can be widened for free. The LHS can use an
8879
15
        // extended load, and the RHS is a constant:
8880
15
        //   vselect (ext (setcc load(X), C)), N1, N2 -->
8881
15
        //   vselect (setcc extload(X), C'), N1, N2
8882
15
        auto ExtOpcode = IsSigned ? 
ISD::SIGN_EXTEND4
:
ISD::ZERO_EXTEND11
;
8883
15
        SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
8884
15
        SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
8885
15
        EVT WideSetCCVT = getSetCCResultType(WideVT);
8886
15
        SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
8887
15
        return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
8888
15
      }
8889
48.0k
    }
8890
13.8k
  }
8891
48.0k
8892
48.0k
  if (SimplifySelectOps(N, N1, N2))
8893
6
    return SDValue(N, 0);  // Don't revisit N.
8894
48.0k
8895
48.0k
  // Fold (vselect (build_vector all_ones), N1, N2) -> N1
8896
48.0k
  if (ISD::isBuildVectorAllOnes(N0.getNode()))
8897
91
    return N1;
8898
47.9k
  // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
8899
47.9k
  if (ISD::isBuildVectorAllZeros(N0.getNode()))
8900
792
    return N2;
8901
47.1k
8902
47.1k
  // The ConvertSelectToConcatVector function is assuming both the above
8903
47.1k
  // checks for (vselect (build_vector all{ones,zeros) ...) have been made
8904
47.1k
  // and addressed.
8905
47.1k
  if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
8906
47.1k
      
N2.getOpcode() == ISD::CONCAT_VECTORS697
&&
8907
47.1k
      
ISD::isBuildVectorOfConstantSDNodes(N0.getNode())190
) {
8908
45
    if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
8909
12
      return CV;
8910
47.1k
  }
8911
47.1k
8912
47.1k
  if (SDValue V = foldVSelectOfConstants(N))
8913
40
    return V;
8914
47.0k
8915
47.0k
  return SDValue();
8916
47.0k
}
8917
8918
55.2k
SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
8919
55.2k
  SDValue N0 = N->getOperand(0);
8920
55.2k
  SDValue N1 = N->getOperand(1);
8921
55.2k
  SDValue N2 = N->getOperand(2);
8922
55.2k
  SDValue N3 = N->getOperand(3);
8923
55.2k
  SDValue N4 = N->getOperand(4);
8924
55.2k
  ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
8925
55.2k
8926
55.2k
  // fold select_cc lhs, rhs, x, x, cc -> x
8927
55.2k
  if (N2 == N3)
8928
27
    return N2;
8929
55.2k
8930
55.2k
  // Determine if the condition we're dealing with is constant
8931
55.2k
  if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
8932
2.80k
                                  CC, SDLoc(N), false)) {
8933
2.80k
    AddToWorklist(SCC.getNode());
8934
2.80k
8935
2.80k
    if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
8936
31
      if (!SCCC->isNullValue())
8937
17
        return N2;    // cond always true -> true val
8938
14
      else
8939
14
        return N3;    // cond always false -> false val
8940
2.77k
    } else if (SCC->isUndef()) {
8941
0
      // When the condition is UNDEF, just return the first operand. This is
8942
0
      // coherent the DAG creation, no setcc node is created in this case
8943
0
      return N2;
8944
2.77k
    } else if (SCC.getOpcode() == ISD::SETCC) {
8945
2.71k
      // Fold to a simpler select_cc
8946
2.71k
      SDValue SelectOp = DAG.getNode(
8947
2.71k
          ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
8948
2.71k
          SCC.getOperand(1), N2, N3, SCC.getOperand(2));
8949
2.71k
      SelectOp->setFlags(SCC->getFlags());
8950
2.71k
      return SelectOp;
8951
2.71k
    }
8952
52.4k
  }
8953
52.4k
8954
52.4k
  // If we can fold this based on the true/false value, do so.
8955
52.4k
  if (SimplifySelectOps(N, N2, N3))
8956
0
    return SDValue(N, 0);  // Don't revisit N.
8957
52.4k
8958
52.4k
  // fold select_cc into other things, such as min/max/abs
8959
52.4k
  return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
8960
52.4k
}
8961
8962
654k
SDValue DAGCombiner::visitSETCC(SDNode *N) {
8963
654k
  // setcc is very commonly used as an argument to brcond. This pattern
8964
654k
  // also lend itself to numerous combines and, as a result, it is desired
8965
654k
  // we keep the argument to a brcond as a setcc as much as possible.
8966
654k
  bool PreferSetCC =
8967
654k
      N->hasOneUse() && 
N->use_begin()->getOpcode() == ISD::BRCOND646k
;
8968
654k
8969
654k
  SDValue Combined = SimplifySetCC(
8970
654k
      N->getValueType(0), N->getOperand(0), N->getOperand(1),
8971
654k
      cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
8972
654k
8973
654k
  if (!Combined)
8974
614k
    return SDValue();
8975
40.8k
8976
40.8k
  // If we prefer to have a setcc, and we don't, we'll try our best to
8977
40.8k
  // recreate one using rebuildSetCC.
8978
40.8k
  if (PreferSetCC && 
Combined.getOpcode() != ISD::SETCC35.7k
) {
8979
1.46k
    SDValue NewSetCC = rebuildSetCC(Combined);
8980
1.46k
8981
1.46k
    // We don't have anything interesting to combine to.
8982
1.46k
    if (NewSetCC.getNode() == N)
8983
485
      return SDValue();
8984
982
8985
982
    if (NewSetCC)
8986
0
      return NewSetCC;
8987
40.3k
  }
8988
40.3k
8989
40.3k
  return Combined;
8990
40.3k
}
8991
8992
2.14k
SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
8993
2.14k
  SDValue LHS = N->getOperand(0);
8994
2.14k
  SDValue RHS = N->getOperand(1);
8995
2.14k
  SDValue Carry = N->getOperand(2);
8996
2.14k
  SDValue Cond = N->getOperand(3);
8997
2.14k
8998
2.14k
  // If Carry is false, fold to a regular SETCC.
8999
2.14k
  if (isNullConstant(Carry))
9000
222
    return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
9001
1.91k
9002
1.91k
  return SDValue();
9003
1.91k
}
9004
9005
/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
9006
/// a build_vector of constants.
9007
/// This function is called by the DAGCombiner when visiting sext/zext/aext
9008
/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
9009
/// Vector extends are not folded if operations are legal; this is to
9010
/// avoid introducing illegal build_vector dag nodes.
9011
static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
9012
458k
                                         SelectionDAG &DAG, bool LegalTypes) {
9013
458k
  unsigned Opcode = N->getOpcode();
9014
458k
  SDValue N0 = N->getOperand(0);
9015
458k
  EVT VT = N->getValueType(0);
9016
458k
  SDLoc DL(N);
9017
458k
9018
458k
  assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
9019
458k
         Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
9020
458k
         Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
9021
458k
         && "Expected EXTEND dag node in input!");
9022
458k
9023
458k
  // fold (sext c1) -> c1
9024
458k
  // fold (zext c1) -> c1
9025
458k
  // fold (aext c1) -> c1
9026
458k
  if (isa<ConstantSDNode>(N0))
9027
644
    return DAG.getNode(Opcode, DL, VT, N0);
9028
458k
9029
458k
  // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9030
458k
  // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
9031
458k
  // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9032
458k
  if (N0->getOpcode() == ISD::SELECT) {
9033
8.88k
    SDValue Op1 = N0->getOperand(1);
9034
8.88k
    SDValue Op2 = N0->getOperand(2);
9035
8.88k
    if (isa<ConstantSDNode>(Op1) && 
isa<ConstantSDNode>(Op2)419
&&
9036
8.88k
        
(256
Opcode != ISD::ZERO_EXTEND256
||
!TLI.isZExtFree(N0.getValueType(), VT)90
)) {
9037
241
      // For any_extend, choose sign extension of the constants to allow a
9038
241
      // possible further transform to sign_extend_inreg.i.e.
9039
241
      //
9040
241
      // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
9041
241
      // t2: i64 = any_extend t1
9042
241
      // -->
9043
241
      // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
9044
241
      // -->
9045
241
      // t4: i64 = sign_extend_inreg t3
9046
241
      unsigned FoldOpc = Opcode;
9047
241
      if (FoldOpc == ISD::ANY_EXTEND)
9048
136
        FoldOpc = ISD::SIGN_EXTEND;
9049
241
      return DAG.getSelect(DL, VT, N0->getOperand(0),
9050
241
                           DAG.getNode(FoldOpc, DL, VT, Op1),
9051
241
                           DAG.getNode(FoldOpc, DL, VT, Op2));
9052
241
    }
9053
457k
  }
9054
457k
9055
457k
  // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
9056
457k
  // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
9057
457k
  // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
9058
457k
  EVT SVT = VT.getScalarType();
9059
457k
  if (!(VT.isVector() && 
(107k
!LegalTypes107k
||
TLI.isTypeLegal(SVT)82.5k
) &&
9060
457k
      
ISD::isBuildVectorOfConstantSDNodes(N0.getNode())105k
))
9061
457k
    return SDValue();
9062
84
9063
84
  // We can fold this node into a build_vector.
9064
84
  unsigned VTBits = SVT.getSizeInBits();
9065
84
  unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
9066
84
  SmallVector<SDValue, 8> Elts;
9067
84
  unsigned NumElts = VT.getVectorNumElements();
9068
84
9069
84
  // For zero-extensions, UNDEF elements still guarantee to have the upper
9070
84
  // bits set to zero.
9071
84
  bool IsZext =
9072
84
      Opcode == ISD::ZERO_EXTEND || 
Opcode == ISD::ZERO_EXTEND_VECTOR_INREG81
;
9073
84
9074
513
  for (unsigned i = 0; i != NumElts; 
++i429
) {
9075
429
    SDValue Op = N0.getOperand(i);
9076
429
    if (Op.isUndef()) {
9077
54
      Elts.push_back(IsZext ? 
DAG.getConstant(0, DL, SVT)26
:
DAG.getUNDEF(SVT)28
);
9078
54
      continue;
9079
54
    }
9080
375
9081
375
    SDLoc DL(Op);
9082
375
    // Get the constant value and if needed trunc it to the size of the type.
9083
375
    // Nodes like build_vector might have constants wider than the scalar type.
9084
375
    APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
9085
375
    if (Opcode == ISD::SIGN_EXTEND || 
Opcode == ISD::SIGN_EXTEND_VECTOR_INREG327
)
9086
132
      Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
9087
243
    else
9088
243
      Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
9089
375
  }
9090
84
9091
84
  return DAG.getBuildVector(VT, DL, Elts);
9092
84
}
9093
9094
// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
9095
// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
9096
// transformation. Returns true if extension are possible and the above
9097
// mentioned transformation is profitable.
9098
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
9099
                                    unsigned ExtOpc,
9100
                                    SmallVectorImpl<SDNode *> &ExtendNodes,
9101
23.6k
                                    const TargetLowering &TLI) {
9102
23.6k
  bool HasCopyToRegUses = false;
9103
23.6k
  bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
9104
23.6k
  for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
9105
23.6k
                            UE = N0.getNode()->use_end();
9106
73.5k
       UI != UE; 
++UI49.8k
) {
9107
51.5k
    SDNode *User = *UI;
9108
51.5k
    if (User == N)
9109
22.4k
      continue;
9110
29.1k
    if (UI.getUse().getResNo() != N0.getResNo())
9111
4.53k
      continue;
9112
24.6k
    // FIXME: Only extend SETCC N, N and SETCC N, c for now.
9113
24.6k
    if (ExtOpc != ISD::ANY_EXTEND && 
User->getOpcode() == ISD::SETCC24.2k
) {
9114
9.48k
      ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
9115
9.48k
      if (ExtOpc == ISD::ZERO_EXTEND && 
ISD::isSignedIntSetCC(CC)5.09k
)
9116
153
        // Sign bits will be lost after a zext.
9117
153
        return false;
9118
9.33k
      bool Add = false;
9119
27.1k
      for (unsigned i = 0; i != 2; 
++i17.7k
) {
9120
18.4k
        SDValue UseOp = User->getOperand(i);
9121
18.4k
        if (UseOp == N0)
9122
9.09k
          continue;
9123
9.33k
        if (!isa<ConstantSDNode>(UseOp))
9124
652
          return false;
9125
8.68k
        Add = true;
9126
8.68k
      }
9127
9.33k
      
if (8.68k
Add8.68k
)
9128
8.68k
        ExtendNodes.push_back(User);
9129
8.68k
      continue;
9130
15.1k
    }
9131
15.1k
    // If truncates aren't free and there are users we can't
9132
15.1k
    // extend, it isn't worthwhile.
9133
15.1k
    if (!isTruncFree)
9134
896
      return false;
9135
14.2k
    // Remember if this value is live-out.
9136
14.2k
    if (User->getOpcode() == ISD::CopyToReg)
9137
3.53k
      HasCopyToRegUses = true;
9138
14.2k
  }
9139
23.6k
9140
23.6k
  
if (21.9k
HasCopyToRegUses21.9k
) {
9141
3.52k
    bool BothLiveOut = false;
9142
3.52k
    for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
9143
5.93k
         UI != UE; 
++UI2.40k
) {
9144
3.90k
      SDUse &Use = UI.getUse();
9145
3.90k
      if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
9146
1.49k
        BothLiveOut = true;
9147
1.49k
        break;
9148
1.49k
      }
9149
3.90k
    }
9150
3.52k
    if (BothLiveOut)
9151
1.49k
      // Both unextended and extended values are live out. There had better be
9152
1.49k
      // a good reason for the transformation.
9153
1.49k
      return ExtendNodes.size();
9154
20.4k
  }
9155
20.4k
  return true;
9156
20.4k
}
9157
9158
void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
9159
                                  SDValue OrigLoad, SDValue ExtLoad,
9160
81.2k
                                  ISD::NodeType ExtType) {
9161
81.2k
  // Extend SetCC uses if necessary.
9162
81.2k
  SDLoc DL(ExtLoad);
9163
81.2k
  for (SDNode *SetCC : SetCCs) {
9164
8.65k
    SmallVector<SDValue, 4> Ops;
9165
8.65k
9166
25.9k
    for (unsigned j = 0; j != 2; 
++j17.3k
) {
9167
17.3k
      SDValue SOp = SetCC->getOperand(j);
9168
17.3k
      if (SOp == OrigLoad)
9169
8.65k
        Ops.push_back(ExtLoad);
9170
8.65k
      else
9171
8.65k
        Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
9172
17.3k
    }
9173
8.65k
9174
8.65k
    Ops.push_back(SetCC->getOperand(2));
9175
8.65k
    CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
9176
8.65k
  }
9177
81.2k
}
9178
9179
// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
9180
247k
SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
9181
247k
  SDValue N0 = N->getOperand(0);
9182
247k
  EVT DstVT = N->getValueType(0);
9183
247k
  EVT SrcVT = N0.getValueType();
9184
247k
9185
247k
  assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9186
247k
          N->getOpcode() == ISD::ZERO_EXTEND) &&
9187
247k
         "Unexpected node type (not an extend)!");
9188
247k
9189
247k
  // fold (sext (load x)) to multiple smaller sextloads; same for zext.
9190
247k
  // For example, on a target with legal v4i32, but illegal v8i32, turn:
9191
247k
  //   (v8i32 (sext (v8i16 (load x))))
9192
247k
  // into:
9193
247k
  //   (v8i32 (concat_vectors (v4i32 (sextload x)),
9194
247k
  //                          (v4i32 (sextload (x + 16)))))
9195
247k
  // Where uses of the original load, i.e.:
9196
247k
  //   (v8i16 (load x))
9197
247k
  // are replaced with:
9198
247k
  //   (v8i16 (truncate
9199
247k
  //     (v8i32 (concat_vectors (v4i32 (sextload x)),
9200
247k
  //                            (v4i32 (sextload (x + 16)))))))
9201
247k
  //
9202
247k
  // This combine is only applicable to illegal, but splittable, vectors.
9203
247k
  // All legal types, and illegal non-vector types, are handled elsewhere.
9204
247k
  // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
9205
247k
  //
9206
247k
  if (N0->getOpcode() != ISD::LOAD)
9207
236k
    return SDValue();
9208
10.9k
9209
10.9k
  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9210
10.9k
9211
10.9k
  if (!ISD::isNON_EXTLoad(LN0) || 
!ISD::isUNINDEXEDLoad(LN0)10.6k
||
9212
10.9k
      
!N0.hasOneUse()10.6k
||
LN0->isVolatile()7.57k
||
!DstVT.isVector()7.50k
||
9213
10.9k
      
!DstVT.isPow2VectorType()7.25k
||
!TLI.isVectorLoadExtDesirable(SDValue(N, 0))7.08k
)
9214
9.93k
    return SDValue();
9215
1.05k
9216
1.05k
  SmallVector<SDNode *, 4> SetCCs;
9217
1.05k
  if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
9218
0
    return SDValue();
9219
1.05k
9220
1.05k
  ISD::LoadExtType ExtType =
9221
1.05k
      N->getOpcode() == ISD::SIGN_EXTEND ? 
ISD::SEXTLOAD532
:
ISD::ZEXTLOAD519
;
9222
1.05k
9223
1.05k
  // Try to split the vector types to get down to legal types.
9224
1.05k
  EVT SplitSrcVT = SrcVT;
9225
1.05k
  EVT SplitDstVT = DstVT;
9226
2.71k
  while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
9227
2.71k
         
SplitSrcVT.getVectorNumElements() > 12.45k
) {
9228
1.66k
    SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
9229
1.66k
    SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
9230
1.66k
  }
9231
1.05k
9232
1.05k
  if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
9233
785
    return SDValue();
9234
266
9235
266
  SDLoc DL(N);
9236
266
  const unsigned NumSplits =
9237
266
      DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
9238
266
  const unsigned Stride = SplitSrcVT.getStoreSize();
9239
266
  SmallVector<SDValue, 4> Loads;
9240
266
  SmallVector<SDValue, 4> Chains;
9241
266
9242
266
  SDValue BasePtr = LN0->getBasePtr();
9243
906
  for (unsigned Idx = 0; Idx < NumSplits; 
Idx++640
) {
9244
640
    const unsigned Offset = Idx * Stride;
9245
640
    const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
9246
640
9247
640
    SDValue SplitLoad = DAG.getExtLoad(
9248
640
        ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
9249
640
        LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
9250
640
        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9251
640
9252
640
    BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
9253
640
                          DAG.getConstant(Stride, DL, BasePtr.getValueType()));
9254
640
9255
640
    Loads.push_back(SplitLoad.getValue(0));
9256
640
    Chains.push_back(SplitLoad.getValue(1));
9257
640
  }
9258
266
9259
266
  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
9260
266
  SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
9261
266
9262
266
  // Simplify TF.
9263
266
  AddToWorklist(NewChain.getNode());
9264
266
9265
266
  CombineTo(N, NewValue);
9266
266
9267
266
  // Replace uses of the original load (before extension)
9268
266
  // with a truncate of the concatenated sextloaded vectors.
9269
266
  SDValue Trunc =
9270
266
      DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
9271
266
  ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
9272
266
  CombineTo(N0.getNode(), Trunc, NewChain);
9273
266
  return SDValue(N, 0); // Return N so it doesn't get rechecked!
9274
266
}
9275
9276
// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9277
//      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9278
137k
SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
9279
137k
  assert(N->getOpcode() == ISD::ZERO_EXTEND);
9280
137k
  EVT VT = N->getValueType(0);
9281
137k
  EVT OrigVT = N->getOperand(0).getValueType();
9282
137k
  if (TLI.isZExtFree(OrigVT, VT))
9283
60.3k
    return SDValue();
9284
76.9k
9285
76.9k
  // and/or/xor
9286
76.9k
  SDValue N0 = N->getOperand(0);
9287
76.9k
  if (!(N0.getOpcode() == ISD::AND || 
N0.getOpcode() == ISD::OR62.0k
||
9288
76.9k
        
N0.getOpcode() == ISD::XOR60.7k
) ||
9289
76.9k
      
N0.getOperand(1).getOpcode() != ISD::Constant18.2k
||
9290
76.9k
      
(2.32k
LegalOperations2.32k
&&
!TLI.isOperationLegal(N0.getOpcode(), VT)622
))
9291
74.6k
    return SDValue();
9292
2.32k
9293
2.32k
  // shl/shr
9294
2.32k
  SDValue N1 = N0->getOperand(0);
9295
2.32k
  if (!(N1.getOpcode() == ISD::SHL || 
N1.getOpcode() == ISD::SRL2.27k
) ||
9296
2.32k
      
N1.getOperand(1).getOpcode() != ISD::Constant130
||
9297
2.32k
      
(130
LegalOperations130
&&
!TLI.isOperationLegal(N1.getOpcode(), VT)4
))
9298
2.19k
    return SDValue();
9299
130
9300
130
  // load
9301
130
  if (!isa<LoadSDNode>(N1.getOperand(0)))
9302
67
    return SDValue();
9303
63
  LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
9304
63
  EVT MemVT = Load->getMemoryVT();
9305
63
  if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
9306
63
      
Load->getExtensionType() == ISD::SEXTLOAD57
||
Load->isIndexed()56
)
9307
7
    return SDValue();
9308
56
9309
56
9310
56
  // If the shift op is SHL, the logic op must be AND, otherwise the result
9311
56
  // will be wrong.
9312
56
  if (N1.getOpcode() == ISD::SHL && 
N0.getOpcode() != ISD::AND3
)
9313
2
    return SDValue();
9314
54
9315
54
  if (!N0.hasOneUse() || !N1.hasOneUse())
9316
0
    return SDValue();
9317
54
9318
54
  SmallVector<SDNode*, 4> SetCCs;
9319
54
  if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
9320
54
                               ISD::ZERO_EXTEND, SetCCs, TLI))
9321
0
    return SDValue();
9322
54
9323
54
  // Actually do the transformation.
9324
54
  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
9325
54
                                   Load->getChain(), Load->getBasePtr(),
9326
54
                                   Load->getMemoryVT(), Load->getMemOperand());
9327
54
9328
54
  SDLoc DL1(N1);
9329
54
  SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
9330
54
                              N1.getOperand(1));
9331
54
9332
54
  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9333
54
  Mask = Mask.zext(VT.getSizeInBits());
9334
54
  SDLoc DL0(N0);
9335
54
  SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
9336
54
                            DAG.getConstant(Mask, DL0, VT));
9337
54
9338
54
  ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9339
54
  CombineTo(N, And);
9340
54
  if (SDValue(Load, 0).hasOneUse()) {
9341
10
    DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
9342
44
  } else {
9343
44
    SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
9344
44
                                Load->getValueType(0), ExtLoad);
9345
44
    CombineTo(Load, Trunc, ExtLoad.getValue(1));
9346
44
  }
9347
54
9348
54
  // N0 is dead at this point.
9349
54
  recursivelyDeleteUnusedNodes(N0.getNode());
9350
54
9351
54
  return SDValue(N,0); // Return N so it doesn't get rechecked!
9352
54
}
9353
9354
/// If we're narrowing or widening the result of a vector select and the final
9355
/// size is the same size as a setcc (compare) feeding the select, then try to
9356
/// apply the cast operation to the select's operands because matching vector
9357
/// sizes for a select condition and other operands should be more efficient.
9358
656k
SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
9359
656k
  unsigned CastOpcode = Cast->getOpcode();
9360
656k
  assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
9361
656k
          CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
9362
656k
          CastOpcode == ISD::FP_ROUND) &&
9363
656k
         "Unexpected opcode for vector select narrowing/widening");
9364
656k
9365
656k
  // We only do this transform before legal ops because the pattern may be
9366
656k
  // obfuscated by target-specific operations after legalization. Do not create
9367
656k
  // an illegal select op, however, because that may be difficult to lower.
9368
656k
  EVT VT = Cast->getValueType(0);
9369
656k
  if (LegalOperations || 
!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)350k
)
9370
410k
    return SDValue();
9371
245k
9372
245k
  SDValue VSel = Cast->getOperand(0);
9373
245k
  if (VSel.getOpcode() != ISD::VSELECT || 
!VSel.hasOneUse()44
||
9374
245k
      
VSel.getOperand(0).getOpcode() != ISD::SETCC44
)
9375
245k
    return SDValue();
9376
44
9377
44
  // Does the setcc have the same vector size as the casted select?
9378
44
  SDValue SetCC = VSel.getOperand(0);
9379
44
  EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
9380
44
  if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
9381
28
    return SDValue();
9382
16
9383
16
  // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
9384
16
  SDValue A = VSel.getOperand(1);
9385
16
  SDValue B = VSel.getOperand(2);
9386
16
  SDValue CastA, CastB;
9387
16
  SDLoc DL(Cast);
9388
16
  if (CastOpcode == ISD::FP_ROUND) {
9389
4
    // FP_ROUND (fptrunc) has an extra flag operand to pass along.
9390
4
    CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
9391
4
    CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
9392
12
  } else {
9393
12
    CastA = DAG.getNode(CastOpcode, DL, VT, A);
9394
12
    CastB = DAG.getNode(CastOpcode, DL, VT, B);
9395
12
  }
9396
16
  return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
9397
16
}
9398
9399
// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9400
// fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9401
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
9402
                                     const TargetLowering &TLI, EVT VT,
9403
                                     bool LegalOperations, SDNode *N,
9404
246k
                                     SDValue N0, ISD::LoadExtType ExtLoadType) {
9405
246k
  SDNode *N0Node = N0.getNode();
9406
246k
  bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? 
ISD::isSEXTLoad(N0Node)109k
9407
246k
                                                   : 
ISD::isZEXTLoad(N0Node)137k
;
9408
246k
  if ((!isAExtLoad && 
!ISD::isEXTLoad(N0Node)246k
) ||
9409
246k
      
!ISD::isUNINDEXEDLoad(N0Node)304
||
!N0.hasOneUse()304
)
9410
246k
    return SDValue();
9411
110
9412
110
  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9413
110
  EVT MemVT = LN0->getMemoryVT();
9414
110
  if ((LegalOperations || 
LN0->isVolatile()17
||
VT.isVector()17
) &&
9415
110
      
!TLI.isLoadExtLegal(ExtLoadType, VT, MemVT)99
)
9416
80
    return SDValue();
9417
30
9418
30
  SDValue ExtLoad =
9419
30
      DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9420
30
                     LN0->getBasePtr(), MemVT, LN0->getMemOperand());
9421
30
  Combiner.CombineTo(N, ExtLoad);
9422
30
  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9423
30
  if (LN0->use_empty())
9424
30
    Combiner.recursivelyDeleteUnusedNodes(LN0);
9425
30
  return SDValue(N, 0); // Return N so it doesn't get rechecked!
9426
30
}
9427
9428
// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9429
// Only generate vector extloads when 1) they're legal, and 2) they are
9430
// deemed desirable by the target.
9431
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
9432
                                  const TargetLowering &TLI, EVT VT,
9433
                                  bool LegalOperations, SDNode *N, SDValue N0,
9434
                                  ISD::LoadExtType ExtLoadType,
9435
321k
                                  ISD::NodeType ExtOpc) {
9436
321k
  if (!ISD::isNON_EXTLoad(N0.getNode()) ||
9437
321k
      
!ISD::isUNINDEXEDLoad(N0.getNode())85.1k
||
9438
321k
      
(85.1k
(85.1k
LegalOperations85.1k
||
VT.isVector()82.1k
||
9439
85.1k
        
cast<LoadSDNode>(N0)->isVolatile()75.1k
) &&
9440
85.1k
       
!TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())10.4k
))
9441
242k
    return {};
9442
78.8k
9443
78.8k
  bool DoXform = true;
9444
78.8k
  SmallVector<SDNode *, 4> SetCCs;
9445
78.8k
  if (!N0.hasOneUse())
9446
22.1k
    DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
9447
78.8k
  if (VT.isVector())
9448
2.73k
    DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
9449
78.8k
  if (!DoXform)
9450
4.37k
    return {};
9451
74.4k
9452
74.4k
  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9453
74.4k
  SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9454
74.4k
                                   LN0->getBasePtr(), N0.getValueType(),
9455
74.4k
                                   LN0->getMemOperand());
9456
74.4k
  Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
9457
74.4k
  // If the load value is used only by N, replace it via CombineTo N.
9458
74.4k
  bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
9459
74.4k
  Combiner.CombineTo(N, ExtLoad);
9460
74.4k
  if (NoReplaceTrunc) {
9461
63.8k
    DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9462
63.8k
    Combiner.recursivelyDeleteUnusedNodes(LN0);
9463
63.8k
  } else {
9464
10.6k
    SDValue Trunc =
9465
10.6k
        DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
9466
10.6k
    Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9467
10.6k
  }
9468
74.4k
  return SDValue(N, 0); // Return N so it doesn't get rechecked!
9469
74.4k
}
9470
9471
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
9472
246k
                                       bool LegalOperations) {
9473
246k
  assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9474
246k
          N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
9475
246k
9476
246k
  SDValue SetCC = N->getOperand(0);
9477
246k
  if (LegalOperations || 
SetCC.getOpcode() != ISD::SETCC119k
||
9478
246k
      
!SetCC.hasOneUse()21.3k
||
SetCC.getValueType() != MVT::i120.9k
)
9479
234k
    return SDValue();
9480
11.6k
9481
11.6k
  SDValue X = SetCC.getOperand(0);
9482
11.6k
  SDValue Ones = SetCC.getOperand(1);
9483
11.6k
  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
9484
11.6k
  EVT VT = N->getValueType(0);
9485
11.6k
  EVT XVT = X.getValueType();
9486
11.6k
  // setge X, C is canonicalized to setgt, so we do not need to match that
9487
11.6k
  // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
9488
11.6k
  // not require the 'not' op.
9489
11.6k
  if (CC == ISD::SETGT && 
isAllOnesConstant(Ones)406
&&
VT == XVT80
) {
9490
53
    // Invert and smear/shift the sign bit:
9491
53
    // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
9492
53
    // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
9493
53
    SDLoc DL(N);
9494
53
    SDValue NotX = DAG.getNOT(DL, X, VT);
9495
53
    SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
9496
53
    auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? 
ISD::SRA11
:
ISD::SRL42
;
9497
53
    return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
9498
53
  }
9499
11.5k
  return SDValue();
9500
11.5k
}
9501
9502
149k
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
9503
149k
  SDValue N0 = N->getOperand(0);
9504
149k
  EVT VT = N->getValueType(0);
9505
149k
  SDLoc DL(N);
9506
149k
9507
149k
  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9508
140
    return Res;
9509
148k
9510
148k
  // fold (sext (sext x)) -> (sext x)
9511
148k
  // fold (sext (aext x)) -> (sext x)
9512
148k
  if (N0.getOpcode() == ISD::SIGN_EXTEND || 
N0.getOpcode() == ISD::ANY_EXTEND148k
)
9513
6
    return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
9514
148k
9515
148k
  if (N0.getOpcode() == ISD::TRUNCATE) {
9516
8.39k
    // fold (sext (truncate (load x))) -> (sext (smaller load x))
9517
8.39k
    // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
9518
8.39k
    if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9519
22
      SDNode *oye = N0.getOperand(0).getNode();
9520
22
      if (NarrowLoad.getNode() != N0.getNode()) {
9521
22
        CombineTo(N0.getNode(), NarrowLoad);
9522
22
        // CombineTo deleted the truncate, if needed, but not what's under it.
9523
22
        AddToWorklist(oye);
9524
22
      }
9525
22
      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9526
22
    }
9527
8.37k
9528
8.37k
    // See if the value being truncated is already sign extended.  If so, just
9529
8.37k
    // eliminate the trunc/sext pair.
9530
8.37k
    SDValue Op = N0.getOperand(0);
9531
8.37k
    unsigned OpBits   = Op.getScalarValueSizeInBits();
9532
8.37k
    unsigned MidBits  = N0.getScalarValueSizeInBits();
9533
8.37k
    unsigned DestBits = VT.getScalarSizeInBits();
9534
8.37k
    unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
9535
8.37k
9536
8.37k
    if (OpBits == DestBits) {
9537
5.11k
      // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
9538
5.11k
      // bits, it is already ready.
9539
5.11k
      if (NumSignBits > DestBits-MidBits)
9540
1.23k
        return Op;
9541
3.25k
    } else if (OpBits < DestBits) {
9542
2.75k
      // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
9543
2.75k
      // bits, just sext from i32.
9544
2.75k
      if (NumSignBits > OpBits-MidBits)
9545
93
        return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
9546
508
    } else {
9547
508
      // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
9548
508
      // bits, just truncate to i32.
9549
508
      if (NumSignBits > OpBits-MidBits)
9550
77
        return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
9551
6.97k
    }
9552
6.97k
9553
6.97k
    // fold (sext (truncate x)) -> (sextinreg x).
9554
6.97k
    if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
9555
6.82k
                                                 N0.getValueType())) {
9556
6.82k
      if (OpBits < DestBits)
9557
2.65k
        Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
9558
4.16k
      else if (OpBits > DestBits)
9559
431
        Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
9560
6.82k
      return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
9561
6.82k
                         DAG.getValueType(N0.getValueType()));
9562
6.82k
    }
9563
140k
  }
9564
140k
9565
140k
  // Try to simplify (sext (load x)).
9566
140k
  if (SDValue foldedExt =
9567
31.0k
          tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9568
31.0k
                             ISD::SEXTLOAD, ISD::SIGN_EXTEND))
9569
31.0k
    return foldedExt;
9570
109k
9571
109k
  // fold (sext (load x)) to multiple smaller sextloads.
9572
109k
  // Only on illegal but splittable vectors.
9573
109k
  if (SDValue ExtLoad = CombineExtLoad(N))
9574
136
    return ExtLoad;
9575
109k
9576
109k
  // Try to simplify (sext (sextload x)).
9577
109k
  if (SDValue foldedExt = tryToFoldExtOfExtload(
9578
7
          DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
9579
7
    return foldedExt;
9580
109k
9581
109k
  // fold (sext (and/or/xor (load x), cst)) ->
9582
109k
  //      (and/or/xor (sextload x), (sext cst))
9583
109k
  if ((N0.getOpcode() == ISD::AND || 
N0.getOpcode() == ISD::OR108k
||
9584
109k
       
N0.getOpcode() == ISD::XOR107k
) &&
9585
109k
      
isa<LoadSDNode>(N0.getOperand(0))3.04k
&&
9586
109k
      
N0.getOperand(1).getOpcode() == ISD::Constant19
&&
9587
109k
      
(4
!LegalOperations4
&&
TLI.isOperationLegal(N0.getOpcode(), VT)1
)) {
9588
0
    LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9589
0
    EVT MemVT = LN00->getMemoryVT();
9590
0
    if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
9591
0
      LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
9592
0
      SmallVector<SDNode*, 4> SetCCs;
9593
0
      bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9594
0
                                             ISD::SIGN_EXTEND, SetCCs, TLI);
9595
0
      if (DoXform) {
9596
0
        SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
9597
0
                                         LN00->getChain(), LN00->getBasePtr(),
9598
0
                                         LN00->getMemoryVT(),
9599
0
                                         LN00->getMemOperand());
9600
0
        APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9601
0
        Mask = Mask.sext(VT.getSizeInBits());
9602
0
        SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9603
0
                                  ExtLoad, DAG.getConstant(Mask, DL, VT));
9604
0
        ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
9605
0
        bool NoReplaceTruncAnd = !N0.hasOneUse();
9606
0
        bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9607
0
        CombineTo(N, And);
9608
0
        // If N0 has multiple uses, change other uses as well.
9609
0
        if (NoReplaceTruncAnd) {
9610
0
          SDValue TruncAnd =
9611
0
              DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9612
0
          CombineTo(N0.getNode(), TruncAnd);
9613
0
        }
9614
0
        if (NoReplaceTrunc) {
9615
0
          DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9616
0
        } else {
9617
0
          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9618
0
                                      LN00->getValueType(0), ExtLoad);
9619
0
          CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9620
0
        }
9621
0
        return SDValue(N,0); // Return N so it doesn't get rechecked!
9622
0
      }
9623
109k
    }
9624
0
  }
9625
109k
9626
109k
  if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9627
11
    return V;
9628
109k
9629
109k
  if (N0.getOpcode() == ISD::SETCC) {
9630
6.43k
    SDValue N00 = N0.getOperand(0);
9631
6.43k
    SDValue N01 = N0.getOperand(1);
9632
6.43k
    ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9633
6.43k
    EVT N00VT = N0.getOperand(0).getValueType();
9634
6.43k
9635
6.43k
    // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
9636
6.43k
    // Only do this before legalize for now.
9637
6.43k
    if (VT.isVector() && 
!LegalOperations4.01k
&&
9638
6.43k
        TLI.getBooleanContents(N00VT) ==
9639
3.87k
            TargetLowering::ZeroOrNegativeOneBooleanContent) {
9640
3.84k
      // On some architectures (such as SSE/NEON/etc) the SETCC result type is
9641
3.84k
      // of the same size as the compared operands. Only optimize sext(setcc())
9642
3.84k
      // if this is the case.
9643
3.84k
      EVT SVT = getSetCCResultType(N00VT);
9644
3.84k
9645
3.84k
      // If we already have the desired type, don't change it.
9646
3.84k
      if (SVT != N0.getValueType()) {
9647
3.48k
        // We know that the # elements of the results is the same as the
9648
3.48k
        // # elements of the compare (and the # elements of the compare result
9649
3.48k
        // for that matter).  Check to see that they are the same size.  If so,
9650
3.48k
        // we know that the element size of the sext'd result matches the
9651
3.48k
        // element size of the compare operands.
9652
3.48k
        if (VT.getSizeInBits() == SVT.getSizeInBits())
9653
3.09k
          return DAG.getSetCC(DL, VT, N00, N01, CC);
9654
390
9655
390
        // If the desired elements are smaller or larger than the source
9656
390
        // elements, we can use a matching integer vector type and then
9657
390
        // truncate/sign extend.
9658
390
        EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
9659
390
        if (SVT == MatchingVecType) {
9660
390
          SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
9661
390
          return DAG.getSExtOrTrunc(VsetCC, DL, VT);
9662
390
        }
9663
2.94k
      }
9664
3.84k
    }
9665
2.94k
9666
2.94k
    // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
9667
2.94k
    // Here, T can be 1 or -1, depending on the type of the setcc and
9668
2.94k
    // getBooleanContents().
9669
2.94k
    unsigned SetCCWidth = N0.getScalarValueSizeInBits();
9670
2.94k
9671
2.94k
    // To determine the "true" side of the select, we need to know the high bit
9672
2.94k
    // of the value returned by the setcc if it evaluates to true.
9673
2.94k
    // If the type of the setcc is i1, then the true case of the select is just
9674
2.94k
    // sext(i1 1), that is, -1.
9675
2.94k
    // If the type of the setcc is larger (say, i8) then the value of the high
9676
2.94k
    // bit depends on getBooleanContents(), so ask TLI for a real "true" value
9677
2.94k
    // of the appropriate width.
9678
2.94k
    SDValue ExtTrueVal = (SetCCWidth == 1)
9679
2.94k
                             ? 
DAG.getAllOnesConstant(DL, VT)2.86k
9680
2.94k
                             : 
DAG.getBoolConstant(true, DL, VT, N00VT)85
;
9681
2.94k
    SDValue Zero = DAG.getConstant(0, DL, VT);
9682
2.94k
    if (SDValue SCC =
9683
16
            SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
9684
16
      return SCC;
9685
2.93k
9686
2.93k
    if (!VT.isVector() && 
!TLI.convertSelectOfConstantsToMath(VT)2.40k
) {
9687
905
      EVT SetCCVT = getSetCCResultType(N00VT);
9688
905
      // Don't do this transform for i1 because there's a select transform
9689
905
      // that would reverse it.
9690
905
      // TODO: We should not do this transform at all without a target hook
9691
905
      // because a sext is likely cheaper than a select?
9692
905
      if (SetCCVT.getScalarSizeInBits() != 1 &&
9693
905
          
(132
!LegalOperations132
||
TLI.isOperationLegal(ISD::SETCC, N00VT)0
)) {
9694
132
        SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
9695
132
        return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
9696
132
      }
9697
105k
    }
9698
2.93k
  }
9699
105k
9700
105k
  // fold (sext x) -> (zext x) if the sign bit is known zero.
9701
105k
  if ((!LegalOperations || 
TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)66.6k
) &&
9702
105k
      
DAG.SignBitIsZero(N0)101k
)
9703
954
    return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
9704
104k
9705
104k
  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9706
4
    return NewVSel;
9707
104k
9708
104k
  // Eliminate this sign extend by doing a negation in the destination type:
9709
104k
  // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
9710
104k
  if (N0.getOpcode() == ISD::SUB && 
N0.hasOneUse()1.14k
&&
9711
104k
      
isNullOrNullSplat(N0.getOperand(0))1.06k
&&
9712
104k
      
N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND146
&&
9713
104k
      
TLI.isOperationLegalOrCustom(ISD::SUB, VT)42
) {
9714
32
    SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
9715
32
    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
9716
32
  }
9717
104k
  // Eliminate this sign extend by doing a decrement in the destination type:
9718
104k
  // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
9719
104k
  if (N0.getOpcode() == ISD::ADD && 
N0.hasOneUse()18.4k
&&
9720
104k
      
isAllOnesOrAllOnesSplat(N0.getOperand(1))17.1k
&&
9721
104k
      
N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND186
&&
9722
104k
      
TLI.isOperationLegalOrCustom(ISD::ADD, VT)20
) {
9723
10
    SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
9724
10
    return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
9725
10
  }
9726
104k
9727
104k
  return SDValue();
9728
104k
}
9729
9730
// isTruncateOf - If N is a truncate of some other value, return true, record
9731
// the value being truncated in Op and which of Op's bits are zero/one in Known.
9732
// This function computes KnownBits to avoid a duplicated call to
9733
// computeKnownBits in the caller.
9734
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
9735
205k
                         KnownBits &Known) {
9736
205k
  if (N->getOpcode() == ISD::TRUNCATE) {
9737
17.7k
    Op = N->getOperand(0);
9738
17.7k
    Known = DAG.computeKnownBits(Op);
9739
17.7k
    return true;
9740
17.7k
  }
9741
187k
9742
187k
  if (N.getOpcode() != ISD::SETCC ||
9743
187k
      
N.getValueType().getScalarType() != MVT::i124.0k
||
9744
187k
      
cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE18.9k
)
9745
178k
    return false;
9746
9.57k
9747
9.57k
  SDValue Op0 = N->getOperand(0);
9748
9.57k
  SDValue Op1 = N->getOperand(1);
9749
9.57k
  assert(Op0.getValueType() == Op1.getValueType());
9750
9.57k
9751
9.57k
  if (isNullOrNullSplat(Op0))
9752
0
    Op = Op1;
9753
9.57k
  else if (isNullOrNullSplat(Op1))
9754
8.67k
    Op = Op0;
9755
898
  else
9756
898
    return false;
9757
8.67k
9758
8.67k
  Known = DAG.computeKnownBits(Op);
9759
8.67k
9760
8.67k
  return (Known.Zero | 1).isAllOnesValue();
9761
8.67k
}
9762
9763
206k
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
9764
206k
  SDValue N0 = N->getOperand(0);
9765
206k
  EVT VT = N->getValueType(0);
9766
206k
9767
206k
  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9768
538
    return Res;
9769
205k
9770
205k
  // fold (zext (zext x)) -> (zext x)
9771
205k
  // fold (zext (aext x)) -> (zext x)
9772
205k
  if (N0.getOpcode() == ISD::ZERO_EXTEND || 
N0.getOpcode() == ISD::ANY_EXTEND205k
)
9773
279
    return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
9774
279
                       N0.getOperand(0));
9775
205k
9776
205k
  // fold (zext (truncate x)) -> (zext x) or
9777
205k
  //      (zext (truncate x)) -> (truncate x)
9778
205k
  // This is valid when the truncated bits of x are already zero.
9779
205k
  SDValue Op;
9780
205k
  KnownBits Known;
9781
205k
  if (isTruncateOf(DAG, N0, Op, Known)) {
9782
24.2k
    APInt TruncatedBits =
9783
24.2k
      (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
9784
2
      APInt(Op.getScalarValueSizeInBits(), 0) :
9785
24.2k
      APInt::getBitsSet(Op.getScalarValueSizeInBits(),
9786
24.2k
                        N0.getScalarValueSizeInBits(),
9787
24.2k
                        std::min(Op.getScalarValueSizeInBits(),
9788
24.2k
                                 VT.getScalarSizeInBits()));
9789
24.2k
    if (TruncatedBits.isSubsetOf(Known.Zero))
9790
18.1k
      return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9791
187k
  }
9792
187k
9793
187k
  // fold (zext (truncate x)) -> (and x, mask)
9794
187k
  if (N0.getOpcode() == ISD::TRUNCATE) {
9795
6.16k
    // fold (zext (truncate (load x))) -> (zext (smaller load x))
9796
6.16k
    // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
9797
6.16k
    if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9798
27
      SDNode *oye = N0.getOperand(0).getNode();
9799
27
      if (NarrowLoad.getNode() != N0.getNode()) {
9800
27
        CombineTo(N0.getNode(), NarrowLoad);
9801
27
        // CombineTo deleted the truncate, if needed, but not what's under it.
9802
27
        AddToWorklist(oye);
9803
27
      }
9804
27
      return SDValue(N, 0); // Return N so it doesn't get rechecked!
9805
27
    }
9806
6.13k
9807
6.13k
    EVT SrcVT = N0.getOperand(0).getValueType();
9808
6.13k
    EVT MinVT = N0.getValueType();
9809
6.13k
9810
6.13k
    // Try to mask before the extension to avoid having to generate a larger mask,
9811
6.13k
    // possibly over several sub-vectors.
9812
6.13k
    if (SrcVT.bitsLT(VT) && 
VT.isVector()1.23k
) {
9813
67
      if (!LegalOperations || 
(0
TLI.isOperationLegal(ISD::AND, SrcVT)0
&&
9814
67
                               
TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)0
)) {
9815
67
        SDValue Op = N0.getOperand(0);
9816
67
        Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9817
67
        AddToWorklist(Op.getNode());
9818
67
        SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9819
67
        // Transfer the debug info; the new node is equivalent to N0.
9820
67
        DAG.transferDbgValues(N0, ZExtOrTrunc);
9821
67
        return ZExtOrTrunc;
9822
67
      }
9823
6.06k
    }
9824
6.06k
9825
6.06k
    if (!LegalOperations || 
TLI.isOperationLegal(ISD::AND, VT)1.70k
) {
9826
6.06k
      SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9827
6.06k
      AddToWorklist(Op.getNode());
9828
6.06k
      SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9829
6.06k
      // We may safely transfer the debug info describing the truncate node over
9830
6.06k
      // to the equivalent and operation.
9831
6.06k
      DAG.transferDbgValues(N0, And);
9832
6.06k
      return And;
9833
6.06k
    }
9834
181k
  }
9835
181k
9836
181k
  // Fold (zext (and (trunc x), cst)) -> (and x, cst),
9837
181k
  // if either of the casts is not free.
9838
181k
  if (N0.getOpcode() == ISD::AND &&
9839
181k
      
N0.getOperand(0).getOpcode() == ISD::TRUNCATE18.1k
&&
9840
181k
      
N0.getOperand(1).getOpcode() == ISD::Constant814
&&
9841
181k
      
(520
!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9842
520
                           N0.getValueType()) ||
9843
520
       
!TLI.isZExtFree(N0.getValueType(), VT)169
)) {
9844
436
    SDValue X = N0.getOperand(0).getOperand(0);
9845
436
    X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
9846
436
    APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9847
436
    Mask = Mask.zext(VT.getSizeInBits());
9848
436
    SDLoc DL(N);
9849
436
    return DAG.getNode(ISD::AND, DL, VT,
9850
436
                       X, DAG.getConstant(Mask, DL, VT));
9851
436
  }
9852
180k
9853
180k
  // Try to simplify (zext (load x)).
9854
180k
  if (SDValue foldedExt =
9855
43.4k
          tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9856
43.4k
                             ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
9857
43.4k
    return foldedExt;
9858
137k
9859
137k
  // fold (zext (load x)) to multiple smaller zextloads.
9860
137k
  // Only on illegal but splittable vectors.
9861
137k
  if (SDValue ExtLoad = CombineExtLoad(N))
9862
130
    return ExtLoad;
9863
137k
9864
137k
  // fold (zext (and/or/xor (load x), cst)) ->
9865
137k
  //      (and/or/xor (zextload x), (zext cst))
9866
137k
  // Unless (and (load x) cst) will match as a zextload already and has
9867
137k
  // additional users.
9868
137k
  if ((N0.getOpcode() == ISD::AND || 
N0.getOpcode() == ISD::OR119k
||
9869
137k
       
N0.getOpcode() == ISD::XOR117k
) &&
9870
137k
      
isa<LoadSDNode>(N0.getOperand(0))25.1k
&&
9871
137k
      
N0.getOperand(1).getOpcode() == ISD::Constant384
&&
9872
137k
      
(214
!LegalOperations214
&&
TLI.isOperationLegal(N0.getOpcode(), VT)131
)) {
9873
92
    LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9874
92
    EVT MemVT = LN00->getMemoryVT();
9875
92
    if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
9876
92
        
LN00->getExtensionType() != ISD::SEXTLOAD63
&&
LN00->isUnindexed()63
) {
9877
63
      bool DoXform = true;
9878
63
      SmallVector<SDNode*, 4> SetCCs;
9879
63
      if (!N0.hasOneUse()) {
9880
21
        if (N0.getOpcode() == ISD::AND) {
9881
16
          auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
9882
16
          EVT LoadResultTy = AndC->getValueType(0);
9883
16
          EVT ExtVT;
9884
16
          if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
9885
0
            DoXform = false;
9886
16
        }
9887
21
      }
9888
63
      if (DoXform)
9889
63
        DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9890
63
                                          ISD::ZERO_EXTEND, SetCCs, TLI);
9891
63
      if (DoXform) {
9892
63
        SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
9893
63
                                         LN00->getChain(), LN00->getBasePtr(),
9894
63
                                         LN00->getMemoryVT(),
9895
63
                                         LN00->getMemOperand());
9896
63
        APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9897
63
        Mask = Mask.zext(VT.getSizeInBits());
9898
63
        SDLoc DL(N);
9899
63
        SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9900
63
                                  ExtLoad, DAG.getConstant(Mask, DL, VT));
9901
63
        ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9902
63
        bool NoReplaceTruncAnd = !N0.hasOneUse();
9903
63
        bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9904
63
        CombineTo(N, And);
9905
63
        // If N0 has multiple uses, change other uses as well.
9906
63
        if (NoReplaceTruncAnd) {
9907
21
          SDValue TruncAnd =
9908
21
              DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9909
21
          CombineTo(N0.getNode(), TruncAnd);
9910
21
        }
9911
63
        if (NoReplaceTrunc) {
9912
55
          DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9913
55
        } else {
9914
8
          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9915
8
                                      LN00->getValueType(0), ExtLoad);
9916
8
          CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9917
8
        }
9918
63
        return SDValue(N,0); // Return N so it doesn't get rechecked!
9919
63
      }
9920
137k
    }
9921
92
  }
9922
137k
9923
137k
  // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9924
137k
  //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9925
137k
  if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
9926
54
    return ZExtLoad;
9927
137k
9928
137k
  // Try to simplify (zext (zextload x)).
9929
137k
  if (SDValue foldedExt = tryToFoldExtOfExtload(
9930
23
          DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
9931
23
    return foldedExt;
9932
137k
9933
137k
  if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9934
42
    return V;
9935
137k
9936
137k
  if (N0.getOpcode() == ISD::SETCC) {
9937
17.4k
    // Only do this before legalize for now.
9938
17.4k
    if (!LegalOperations && 
VT.isVector()15.8k
&&
9939
17.4k
        
N0.getValueType().getVectorElementType() == MVT::i1763
) {
9940
762
      EVT N00VT = N0.getOperand(0).getValueType();
9941
762
      if (getSetCCResultType(N00VT) == N0.getValueType())
9942
193
        return SDValue();
9943
569
9944
569
      // We know that the # elements of the results is the same as the #
9945
569
      // elements of the compare (and the # elements of the compare result for
9946
569
      // that matter). Check to see that they are the same size. If so, we know
9947
569
      // that the element size of the sext'd result matches the element size of
9948
569
      // the compare operands.
9949
569
      SDLoc DL(N);
9950
569
      SDValue VecOnes = DAG.getConstant(1, DL, VT);
9951
569
      if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
9952
530
        // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
9953
530
        SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
9954
530
                                     N0.getOperand(1), N0.getOperand(2));
9955
530
        return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
9956
530
      }
9957
39
9958
39
      // If the desired elements are smaller or larger than the source
9959
39
      // elements we can use a matching integer vector type and then
9960
39
      // truncate/sign extend.
9961
39
      EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9962
39
      SDValue VsetCC =
9963
39
          DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
9964
39
                      N0.getOperand(1), N0.getOperand(2));
9965
39
      return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
9966
39
                         VecOnes);
9967
39
    }
9968
16.6k
9969
16.6k
    // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9970
16.6k
    SDLoc DL(N);
9971
16.6k
    if (SDValue SCC = SimplifySelectCC(
9972
74
            DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9973
74
            DAG.getConstant(0, DL, VT),
9974
74
            cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9975
74
      return SCC;
9976
136k
  }
9977
136k
9978
136k
  // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
9979
136k
  if ((N0.getOpcode() == ISD::SHL || 
N0.getOpcode() == ISD::SRL132k
) &&
9980
136k
      
isa<ConstantSDNode>(N0.getOperand(1))8.99k
&&
9981
136k
      
N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND8.56k
&&
9982
136k
      
N0.hasOneUse()24
) {
9983
14
    SDValue ShAmt = N0.getOperand(1);
9984
14
    if (N0.getOpcode() == ISD::SHL) {
9985
12
      SDValue InnerZExt = N0.getOperand(0);
9986
12
      // If the original shl may be shifting out bits, do not perform this
9987
12
      // transformation.
9988
12
      unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
9989
12
        InnerZExt.getOperand(0).getValueSizeInBits();
9990
12
      if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
9991
0
        return SDValue();
9992
14
    }
9993
14
9994
14
    SDLoc DL(N);
9995
14
9996
14
    // Ensure that the shift amount is wide enough for the shifted value.
9997
14
    if (VT.getSizeInBits() >= 256)
9998
0
      ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
9999
14
10000
14
    return DAG.getNode(N0.getOpcode(), DL, VT,
10001
14
                       DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
10002
14
                       ShAmt);
10003
14
  }
10004
136k
10005
136k
  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10006
2
    return NewVSel;
10007
136k
10008
136k
  return SDValue();
10009
136k
}
10010
10011
88.0k
SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
10012
88.0k
  SDValue N0 = N->getOperand(0);
10013
88.0k
  EVT VT = N->getValueType(0);
10014
88.0k
10015
88.0k
  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10016
249
    return Res;
10017
87.8k
10018
87.8k
  // fold (aext (aext x)) -> (aext x)
10019
87.8k
  // fold (aext (zext x)) -> (zext x)
10020
87.8k
  // fold (aext (sext x)) -> (sext x)
10021
87.8k
  if (N0.getOpcode() == ISD::ANY_EXTEND  ||
10022
87.8k
      
N0.getOpcode() == ISD::ZERO_EXTEND87.7k
||
10023
87.8k
      
N0.getOpcode() == ISD::SIGN_EXTEND87.7k
)
10024
30
    return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10025
87.7k
10026
87.7k
  // fold (aext (truncate (load x))) -> (aext (smaller load x))
10027
87.7k
  // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
10028
87.7k
  if (N0.getOpcode() == ISD::TRUNCATE) {
10029
6.24k
    if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10030
14
      SDNode *oye = N0.getOperand(0).getNode();
10031
14
      if (NarrowLoad.getNode() != N0.getNode()) {
10032
14
        CombineTo(N0.getNode(), NarrowLoad);
10033
14
        // CombineTo deleted the truncate, if needed, but not what's under it.
10034
14
        AddToWorklist(oye);
10035
14
      }
10036
14
      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10037
14
    }
10038
87.7k
  }
10039
87.7k
10040
87.7k
  // fold (aext (truncate x))
10041
87.7k
  if (N0.getOpcode() == ISD::TRUNCATE)
10042
6.23k
    return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10043
81.5k
10044
81.5k
  // Fold (aext (and (trunc x), cst)) -> (and x, cst)
10045
81.5k
  // if the trunc is not free.
10046
81.5k
  if (N0.getOpcode() == ISD::AND &&
10047
81.5k
      
N0.getOperand(0).getOpcode() == ISD::TRUNCATE8.74k
&&
10048
81.5k
      
N0.getOperand(1).getOpcode() == ISD::Constant2.78k
&&
10049
81.5k
      !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
10050
2.56k
                          N0.getValueType())) {
10051
49
    SDLoc DL(N);
10052
49
    SDValue X = N0.getOperand(0).getOperand(0);
10053
49
    X = DAG.getAnyExtOrTrunc(X, DL, VT);
10054
49
    APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
10055
49
    Mask = Mask.zext(VT.getSizeInBits());
10056
49
    return DAG.getNode(ISD::AND, DL, VT,
10057
49
                       X, DAG.getConstant(Mask, DL, VT));
10058
49
  }
10059
81.4k
10060
81.4k
  // fold (aext (load x)) -> (aext (truncate (extload x)))
10061
81.4k
  // None of the supported targets knows how to perform load and any_ext
10062
81.4k
  // on vectors in one instruction.  We only perform this transformation on
10063
81.4k
  // scalars.
10064
81.4k
  if (ISD::isNON_EXTLoad(N0.getNode()) && 
!VT.isVector()6.96k
&&
10065
81.4k
      
ISD::isUNINDEXEDLoad(N0.getNode())6.86k
&&
10066
81.4k
      
TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())6.86k
) {
10067
6.51k
    bool DoXform = true;
10068
6.51k
    SmallVector<SDNode*, 4> SetCCs;
10069
6.51k
    if (!N0.hasOneUse())
10070
314
      DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
10071
314
                                        TLI);
10072
6.51k
    if (DoXform) {
10073
6.35k
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10074
6.35k
      SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10075
6.35k
                                       LN0->getChain(),
10076
6.35k
                                       LN0->getBasePtr(), N0.getValueType(),
10077
6.35k
                                       LN0->getMemOperand());
10078
6.35k
      ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
10079
6.35k
      // If the load value is used only by N, replace it via CombineTo N.
10080
6.35k
      bool NoReplaceTrunc = N0.hasOneUse();
10081
6.35k
      CombineTo(N, ExtLoad);
10082
6.35k
      if (NoReplaceTrunc) {
10083
6.20k
        DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10084
6.20k
        recursivelyDeleteUnusedNodes(LN0);
10085
6.20k
      } else {
10086
151
        SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
10087
151
                                    N0.getValueType(), ExtLoad);
10088
151
        CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10089
151
      }
10090
6.35k
      return SDValue(N, 0); // Return N so it doesn't get rechecked!
10091
6.35k
    }
10092
75.1k
  }
10093
75.1k
10094
75.1k
  // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
10095
75.1k
  // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
10096
75.1k
  // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
10097
75.1k
  if (N0.getOpcode() == ISD::LOAD && 
!ISD::isNON_EXTLoad(N0.getNode())1.35k
&&
10098
75.1k
      
ISD::isUNINDEXEDLoad(N0.getNode())746
&&
N0.hasOneUse()746
) {
10099
323
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10100
323
    ISD::LoadExtType ExtType = LN0->getExtensionType();
10101
323
    EVT MemVT = LN0->getMemoryVT();
10102
323
    if (!LegalOperations || 
TLI.isLoadExtLegal(ExtType, VT, MemVT)120
) {
10103
274
      SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
10104
274
                                       VT, LN0->getChain(), LN0->getBasePtr(),
10105
274
                                       MemVT, LN0->getMemOperand());
10106
274
      CombineTo(N, ExtLoad);
10107
274
      DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10108
274
      recursivelyDeleteUnusedNodes(LN0);
10109
274
      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10110
274
    }
10111
74.8k
  }
10112
74.8k
10113
74.8k
  if (N0.getOpcode() == ISD::SETCC) {
10114
9.00k
    // For vectors:
10115
9.00k
    // aext(setcc) -> vsetcc
10116
9.00k
    // aext(setcc) -> truncate(vsetcc)
10117
9.00k
    // aext(setcc) -> aext(vsetcc)
10118
9.00k
    // Only do this before legalize for now.
10119
9.00k
    if (VT.isVector() && 
!LegalOperations740
) {
10120
701
      EVT N00VT = N0.getOperand(0).getValueType();
10121
701
      if (getSetCCResultType(N00VT) == N0.getValueType())
10122
100
        return SDValue();
10123
601
10124
601
      // We know that the # elements of the results is the same as the
10125
601
      // # elements of the compare (and the # elements of the compare result
10126
601
      // for that matter).  Check to see that they are the same size.  If so,
10127
601
      // we know that the element size of the sext'd result matches the
10128
601
      // element size of the compare operands.
10129
601
      if (VT.getSizeInBits() == N00VT.getSizeInBits())
10130
446
        return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
10131
446
                             N0.getOperand(1),
10132
446
                             cast<CondCodeSDNode>(N0.getOperand(2))->get());
10133
155
10134
155
      // If the desired elements are smaller or larger than the source
10135
155
      // elements we can use a matching integer vector type and then
10136
155
      // truncate/any extend
10137
155
      EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10138
155
      SDValue VsetCC =
10139
155
        DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
10140
155
                      N0.getOperand(1),
10141
155
                      cast<CondCodeSDNode>(N0.getOperand(2))->get());
10142
155
      return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
10143
155
    }
10144
8.29k
10145
8.29k
    // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10146
8.29k
    SDLoc DL(N);
10147
8.29k
    if (SDValue SCC = SimplifySelectCC(
10148
70
            DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10149
70
            DAG.getConstant(0, DL, VT),
10150
70
            cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10151
70
      return SCC;
10152
74.0k
  }
10153
74.0k
10154
74.0k
  return SDValue();
10155
74.0k
}
10156
10157
528k
SDValue DAGCombiner::visitAssertExt(SDNode *N) {
10158
528k
  unsigned Opcode = N->getOpcode();
10159
528k
  SDValue N0 = N->getOperand(0);
10160
528k
  SDValue N1 = N->getOperand(1);
10161
528k
  EVT AssertVT = cast<VTSDNode>(N1)->getVT();
10162
528k
10163
528k
  // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
10164
528k
  if (N0.getOpcode() == Opcode &&
10165
528k
      
AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT()93
)
10166
0
    return N0;
10167
528k
10168
528k
  if (N0.getOpcode() == ISD::TRUNCATE && 
N0.hasOneUse()2.89k
&&
10169
528k
      
N0.getOperand(0).getOpcode() == Opcode2.89k
) {
10170
2.83k
    // We have an assert, truncate, assert sandwich. Make one stronger assert
10171
2.83k
    // by asserting on the smallest asserted type to the larger source type.
10172
2.83k
    // This eliminates the later assert:
10173
2.83k
    // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
10174
2.83k
    // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
10175
2.83k
    SDValue BigA = N0.getOperand(0);
10176
2.83k
    EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10177
2.83k
    assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10178
2.83k
           "Asserting zero/sign-extended bits to a type larger than the "
10179
2.83k
           "truncated destination does not provide information");
10180
2.83k
10181
2.83k
    SDLoc DL(N);
10182
2.83k
    EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : 
BigA_AssertVT0
;
10183
2.83k
    SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
10184
2.83k
    SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10185
2.83k
                                    BigA.getOperand(0), MinAssertVTVal);
10186
2.83k
    return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10187
2.83k
  }
10188
525k
10189
525k
  // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
10190
525k
  // than X. Just move the AssertZext in front of the truncate and drop the
10191
525k
  // AssertSExt.
10192
525k
  if (N0.getOpcode() == ISD::TRUNCATE && 
N0.hasOneUse()55
&&
10193
525k
      
N0.getOperand(0).getOpcode() == ISD::AssertSext55
&&
10194
525k
      
Opcode == ISD::AssertZext7
) {
10195
7
    SDValue BigA = N0.getOperand(0);
10196
7
    EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10197
7
    assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10198
7
           "Asserting zero/sign-extended bits to a type larger than the "
10199
7
           "truncated destination does not provide information");
10200
7
10201
7
    if (AssertVT.bitsLT(BigA_AssertVT)) {
10202
7
      SDLoc DL(N);
10203
7
      SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10204
7
                                      BigA.getOperand(0), N1);
10205
7
      return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10206
7
    }
10207
525k
  }
10208
525k
10209
525k
  return SDValue();
10210
525k
}
10211
10212
/// If the result of a wider load is shifted to right of N  bits and then
10213
/// truncated to a narrower type and where N is a multiple of number of bits of
10214
/// the narrower type, transform it to a narrower load from address + N / num of
10215
/// bits of new type. Also narrow the load if the result is masked with an AND
10216
/// to effectively produce a smaller type. If the result is to be extended, also
10217
/// fold the extension to form a extending load.
10218
786k
SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
10219
786k
  unsigned Opc = N->getOpcode();
10220
786k
10221
786k
  ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
10222
786k
  SDValue N0 = N->getOperand(0);
10223
786k
  EVT VT = N->getValueType(0);
10224
786k
  EVT ExtVT = VT;
10225
786k
10226
786k
  // This transformation isn't valid for vector loads.
10227
786k
  if (VT.isVector())
10228
77.1k
    return SDValue();
10229
709k
10230
709k
  unsigned ShAmt = 0;
10231
709k
  bool HasShiftedOffset = false;
10232
709k
  // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
10233
709k
  // extended to VT.
10234
709k
  if (Opc == ISD::SIGN_EXTEND_INREG) {
10235
53.9k
    ExtType = ISD::SEXTLOAD;
10236
53.9k
    ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10237
655k
  } else if (Opc == ISD::SRL) {
10238
237k
    // Another special-case: SRL is basically zero-extending a narrower value,
10239
237k
    // or it maybe shifting a higher subword, half or byte into the lowest
10240
237k
    // bits.
10241
237k
    ExtType = ISD::ZEXTLOAD;
10242
237k
    N0 = SDValue(N, 0);
10243
237k
10244
237k
    auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
10245
237k
    auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10246
237k
    if (!N01 || 
!LN0213k
)
10247
211k
      return SDValue();
10248
25.7k
10249
25.7k
    uint64_t ShiftAmt = N01->getZExtValue();
10250
25.7k
    uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
10251
25.7k
    if (LN0->getExtensionType() != ISD::SEXTLOAD && 
MemoryWidth > ShiftAmt25.6k
)
10252
25.6k
      ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
10253
133
    else
10254
133
      ExtVT = EVT::getIntegerVT(*DAG.getContext(),
10255
133
                                VT.getSizeInBits() - ShiftAmt);
10256
418k
  } else if (Opc == ISD::AND) {
10257
52.5k
    // An AND with a constant mask is the same as a truncate + zero-extend.
10258
52.5k
    auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
10259
52.5k
    if (!AndC)
10260
0
      return SDValue();
10261
52.5k
10262
52.5k
    const APInt &Mask = AndC->getAPIntValue();
10263
52.5k
    unsigned ActiveBits = 0;
10264
52.5k
    if (Mask.isMask()) {
10265
38.9k
      ActiveBits = Mask.countTrailingOnes();
10266
38.9k
    } else 
if (13.6k
Mask.isShiftedMask()13.6k
) {
10267
11.4k
      ShAmt = Mask.countTrailingZeros();
10268
11.4k
      APInt ShiftedMask = Mask.lshr(ShAmt);
10269
11.4k
      ActiveBits = ShiftedMask.countTrailingOnes();
10270
11.4k
      HasShiftedOffset = true;
10271
11.4k
    } else
10272
2.21k
      return SDValue();
10273
50.3k
10274
50.3k
    ExtType = ISD::ZEXTLOAD;
10275
50.3k
    ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
10276
50.3k
  }
10277
709k
10278
709k
  
if (495k
N0.getOpcode() == ISD::SRL495k
&&
N0.hasOneUse()91.6k
) {
10279
81.7k
    SDValue SRL = N0;
10280
81.7k
    if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
10281
79.6k
      ShAmt = ConstShift->getZExtValue();
10282
79.6k
      unsigned EVTBits = ExtVT.getSizeInBits();
10283
79.6k
      // Is the shift amount a multiple of size of VT?
10284
79.6k
      if ((ShAmt & (EVTBits-1)) == 0) {
10285
61.0k
        N0 = N0.getOperand(0);
10286
61.0k
        // Is the load width a multiple of size of VT?
10287
61.0k
        if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
10288
60
          return SDValue();
10289
79.6k
      }
10290
79.6k
10291
79.6k
      // At this point, we must have a load or else we can't do the transform.
10292
79.6k
      if (!isa<LoadSDNode>(N0)) 
return SDValue()54.0k
;
10293
25.5k
10294
25.5k
      auto *LN0 = cast<LoadSDNode>(N0);
10295
25.5k
10296
25.5k
      // Because a SRL must be assumed to *need* to zero-extend the high bits
10297
25.5k
      // (as opposed to anyext the high bits), we can't combine the zextload
10298
25.5k
      // lowering of SRL and an sextload.
10299
25.5k
      if (LN0->getExtensionType() == ISD::SEXTLOAD)
10300
152
        return SDValue();
10301
25.4k
10302
25.4k
      // If the shift amount is larger than the input type then we're not
10303
25.4k
      // accessing any of the loaded bytes.  If the load was a zextload/extload
10304
25.4k
      // then the result of the shift+trunc is zero/undef (handled elsewhere).
10305
25.4k
      if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
10306
4
        return SDValue();
10307
25.4k
10308
25.4k
      // If the SRL is only used by a masking AND, we may be able to adjust
10309
25.4k
      // the ExtVT to make the AND redundant.
10310
25.4k
      SDNode *Mask = *(SRL->use_begin());
10311
25.4k
      if (Mask->getOpcode() == ISD::AND &&
10312
25.4k
          
isa<ConstantSDNode>(Mask->getOperand(1))9.02k
) {
10313
9.01k
        const APInt &ShiftMask =
10314
9.01k
          cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
10315
9.01k
        if (ShiftMask.isMask()) {
10316
7.69k
          EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
10317
7.69k
                                           ShiftMask.countTrailingOnes());
10318
7.69k
          // If the mask is smaller, recompute the type.
10319
7.69k
          if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
10320
7.69k
              
TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT)6.72k
)
10321
945
            ExtVT = MaskedVT;
10322
7.69k
        }
10323
9.01k
      }
10324
25.4k
    }
10325
81.7k
  }
10326
495k
10327
495k
  // If the load is shifted left (and the result isn't shifted back right),
10328
495k
  // we can fold the truncate through the shift.
10329
495k
  unsigned ShLeftAmt = 0;
10330
441k
  if (ShAmt == 0 && 
N0.getOpcode() == ISD::SHL404k
&&
N0.hasOneUse()1.38k
&&
10331
441k
      
ExtVT == VT1.10k
&&
TLI.isNarrowingProfitable(N0.getValueType(), VT)857
) {
10332
348
    if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
10333
40
      ShLeftAmt = N01->getZExtValue();
10334
40
      N0 = N0.getOperand(0);
10335
40
    }
10336
348
  }
10337
441k
10338
441k
  // If we haven't found a load, we can't narrow it.
10339
441k
  if (!isa<LoadSDNode>(N0))
10340
319k
    return SDValue();
10341
121k
10342
121k
  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10343
121k
  if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
10344
114k
    return SDValue();
10345
7.93k
10346
7.93k
  auto AdjustBigEndianShift = [&](unsigned ShAmt) {
10347
325
    unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
10348
325
    unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
10349
325
    return LVTStoreBits - EVTStoreBits - ShAmt;
10350
325
  };
10351
7.93k
10352
7.93k
  // For big endian targets, we need to adjust the offset to the pointer to
10353
7.93k
  // load the correct bytes.
10354
7.93k
  if (DAG.getDataLayout().isBigEndian())
10355
308
    ShAmt = AdjustBigEndianShift(ShAmt);
10356
7.93k
10357
7.93k
  EVT PtrType = N0.getOperand(1).getValueType();
10358
7.93k
  uint64_t PtrOff = ShAmt / 8;
10359
7.93k
  unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
10360
7.93k
  SDLoc DL(LN0);
10361
7.93k
  // The original load itself didn't wrap, so an offset within it doesn't.
10362
7.93k
  SDNodeFlags Flags;
10363
7.93k
  Flags.setNoUnsignedWrap(true);
10364
7.93k
  SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
10365
7.93k
                               PtrType, LN0->getBasePtr(),
10366
7.93k
                               DAG.getConstant(PtrOff, DL, PtrType),
10367
7.93k
                               Flags);
10368
7.93k
  AddToWorklist(NewPtr.getNode());
10369
7.93k
10370
7.93k
  SDValue Load;
10371
7.93k
  if (ExtType == ISD::NON_EXTLOAD)
10372
3.25k
    Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
10373
3.25k
                       LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10374
3.25k
                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10375
4.67k
  else
10376
4.67k
    Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
10377
4.67k
                          LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
10378
4.67k
                          NewAlign, LN0->getMemOperand()->getFlags(),
10379
4.67k
                          LN0->getAAInfo());
10380
7.93k
10381
7.93k
  // Replace the old load's chain with the new load's chain.
10382
7.93k
  WorklistRemover DeadNodes(*this);
10383
7.93k
  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10384
7.93k
10385
7.93k
  // Shift the result left, if we've swallowed a left shift.
10386
7.93k
  SDValue Result = Load;
10387
7.93k
  if (ShLeftAmt != 0) {
10388
16
    EVT ShImmTy = getShiftAmountTy(Result.getValueType());
10389
16
    if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
10390
0
      ShImmTy = VT;
10391
16
    // If the shift amount is as large as the result size (but, presumably,
10392
16
    // no larger than the source) then the useful bits of the result are
10393
16
    // zero; we can't simply return the shortened shift, because the result
10394
16
    // of that operation is undefined.
10395
16
    SDLoc DL(N0);
10396
16
    if (ShLeftAmt >= VT.getSizeInBits())
10397
7
      Result = DAG.getConstant(0, DL, VT);
10398
9
    else
10399
9
      Result = DAG.getNode(ISD::SHL, DL, VT,
10400
9
                          Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
10401
16
  }
10402
7.93k
10403
7.93k
  if (HasShiftedOffset) {
10404
99
    // Recalculate the shift amount after it has been altered to calculate
10405
99
    // the offset.
10406
99
    if (DAG.getDataLayout().isBigEndian())
10407
17
      ShAmt = AdjustBigEndianShift(ShAmt);
10408
99
10409
99
    // We're using a shifted mask, so the load now has an offset. This means
10410
99
    // that data has been loaded into the lower bytes than it would have been
10411
99
    // before, so we need to shl the loaded data into the correct position in the
10412
99
    // register.
10413
99
    SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
10414
99
    Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
10415
99
    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
10416
99
  }
10417
7.93k
10418
7.93k
  // Return the new loaded value.
10419
7.93k
  return Result;
10420
7.93k
}
10421
10422
68.8k
SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
10423
68.8k
  SDValue N0 = N->getOperand(0);
10424
68.8k
  SDValue N1 = N->getOperand(1);
10425
68.8k
  EVT VT = N->getValueType(0);
10426
68.8k
  EVT EVT = cast<VTSDNode>(N1)->getVT();
10427
68.8k
  unsigned VTBits = VT.getScalarSizeInBits();
10428
68.8k
  unsigned EVTBits = EVT.getScalarSizeInBits();
10429
68.8k
10430
68.8k
  if (N0.isUndef())
10431
0
    return DAG.getUNDEF(VT);
10432
68.8k
10433
68.8k
  // fold (sext_in_reg c1) -> c1
10434
68.8k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10435
12
    return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
10436
68.8k
10437
68.8k
  // If the input is already sign extended, just drop the extension.
10438
68.8k
  if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
10439
2.97k
    return N0;
10440
65.8k
10441
65.8k
  // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
10442
65.8k
  if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
10443
65.8k
      
EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())4
)
10444
4
    return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
10445
4
                       N0.getOperand(0), N1);
10446
65.8k
10447
65.8k
  // fold (sext_in_reg (sext x)) -> (sext x)
10448
65.8k
  // fold (sext_in_reg (aext x)) -> (sext x)
10449
65.8k
  // if x is small enough or if we know that x has more than 1 sign bit and the
10450
65.8k
  // sign_extend_inreg is extending from one of them.
10451
65.8k
  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
10452
10.3k
    SDValue N00 = N0.getOperand(0);
10453
10.3k
    unsigned N00Bits = N00.getScalarValueSizeInBits();
10454
10.3k
    if ((N00Bits <= EVTBits ||
10455
10.3k
         
(N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits10.3k
) &&
10456
10.3k
        
(317
!LegalOperations317
||
TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)26
))
10457
315
      return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
10458
65.5k
  }
10459
65.5k
10460
65.5k
  // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
10461
65.5k
  if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
10462
65.5k
       
N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG65.3k
||
10463
65.5k
       
N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG65.3k
) &&
10464
65.5k
      
N0.getOperand(0).getScalarValueSizeInBits() == EVTBits154
) {
10465
83
    if (!LegalOperations ||
10466
83
        
TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)0
)
10467
83
      return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
10468
83
                         N0.getOperand(0));
10469
65.4k
  }
10470
65.4k
10471
65.4k
  // fold (sext_in_reg (zext x)) -> (sext x)
10472
65.4k
  // iff we are extending the source sign bit.
10473
65.4k
  if (N0.getOpcode() == ISD::ZERO_EXTEND) {
10474
28
    SDValue N00 = N0.getOperand(0);
10475
28
    if (N00.getScalarValueSizeInBits() == EVTBits &&
10476
28
        
(4
!LegalOperations4
||
TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)0
))
10477
4
      return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
10478
65.4k
  }
10479
65.4k
10480
65.4k
  // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
10481
65.4k
  if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
10482
384
    return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
10483
65.0k
10484
65.0k
  // fold operands of sext_in_reg based on knowledge that the top bits are not
10485
65.0k
  // demanded.
10486
65.0k
  if (SimplifyDemandedBits(SDValue(N, 0)))
10487
2.52k
    return SDValue(N, 0);
10488
62.5k
10489
62.5k
  // fold (sext_in_reg (load x)) -> (smaller sextload x)
10490
62.5k
  // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
10491
62.5k
  if (SDValue NarrowLoad = ReduceLoadWidth(N))
10492
4.13k
    return NarrowLoad;
10493
58.3k
10494
58.3k
  // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
10495
58.3k
  // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
10496
58.3k
  // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
10497
58.3k
  if (N0.getOpcode() == ISD::SRL) {
10498
11.8k
    if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
10499
10.3k
      if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) {
10500
10.3k
        // We can turn this into an SRA iff the input to the SRL is already sign
10501
10.3k
        // extended enough.
10502
10.3k
        unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
10503
10.3k
        if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits)
10504
2.45k
          return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
10505
2.45k
                             N0.getOperand(1));
10506
55.9k
      }
10507
11.8k
  }
10508
55.9k
10509
55.9k
  // fold (sext_inreg (extload x)) -> (sextload x)
10510
55.9k
  // If sextload is not supported by target, we can only do the combine when
10511
55.9k
  // load has one use. Doing otherwise can block folding the extload with other
10512
55.9k
  // extends that the target does support.
10513
55.9k
  if (ISD::isEXTLoad(N0.getNode()) &&
10514
55.9k
      
ISD::isUNINDEXEDLoad(N0.getNode())2.62k
&&
10515
55.9k
      
EVT == cast<LoadSDNode>(N0)->getMemoryVT()2.62k
&&
10516
55.9k
      
(2.43k
(2.43k
!LegalOperations2.43k
&&
!cast<LoadSDNode>(N0)->isVolatile()1.50k
&&
10517
2.43k
        
N0.hasOneUse()1.46k
) ||
10518
2.43k
       
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT)1.38k
)) {
10519
1.35k
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10520
1.35k
    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10521
1.35k
                                     LN0->getChain(),
10522
1.35k
                                     LN0->getBasePtr(), EVT,
10523
1.35k
                                     LN0->getMemOperand());
10524
1.35k
    CombineTo(N, ExtLoad);
10525
1.35k
    CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10526
1.35k
    AddToWorklist(ExtLoad.getNode());
10527
1.35k
    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10528
1.35k
  }
10529
54.5k
  // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
10530
54.5k
  if (ISD::isZEXTLoad(N0.getNode()) && 
ISD::isUNINDEXEDLoad(N0.getNode())248
&&
10531
54.5k
      
N0.hasOneUse()248
&&
10532
54.5k
      
EVT == cast<LoadSDNode>(N0)->getMemoryVT()0
&&
10533
54.5k
      
(0
(0
!LegalOperations0
&&
!cast<LoadSDNode>(N0)->isVolatile()0
) ||
10534
0
       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
10535
0
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10536
0
    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10537
0
                                     LN0->getChain(),
10538
0
                                     LN0->getBasePtr(), EVT,
10539
0
                                     LN0->getMemOperand());
10540
0
    CombineTo(N, ExtLoad);
10541
0
    CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10542
0
    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10543
0
  }
10544
54.5k
10545
54.5k
  // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
10546
54.5k
  if (EVTBits <= 16 && 
N0.getOpcode() == ISD::OR51.9k
) {
10547
144
    if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
10548
8
                                           N0.getOperand(1), false))
10549
8
      return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
10550
8
                         BSwap, N1);
10551
54.5k
  }
10552
54.5k
10553
54.5k
  return SDValue();
10554
54.5k
}
10555
10556
5.13k
SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
10557
5.13k
  SDValue N0 = N->getOperand(0);
10558
5.13k
  EVT VT = N->getValueType(0);
10559
5.13k
10560
5.13k
  if (N0.isUndef())
10561
0
    return DAG.getUNDEF(VT);
10562
5.13k
10563
5.13k
  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10564
20
    return Res;
10565
5.11k
10566
5.11k
  if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10567
36
    return SDValue(N, 0);
10568
5.07k
10569
5.07k
  return SDValue();
10570
5.07k
}
10571
10572
10.2k
SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
10573
10.2k
  SDValue N0 = N->getOperand(0);
10574
10.2k
  EVT VT = N->getValueType(0);
10575
10.2k
10576
10.2k
  if (N0.isUndef())
10577
0
    return DAG.getUNDEF(VT);
10578
10.2k
10579
10.2k
  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10580
22
    return Res;
10581
10.1k
10582
10.1k
  if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10583
129
    return SDValue(N, 0);
10584
10.0k
10585
10.0k
  return SDValue();
10586
10.0k
}
10587
10588
469k
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
10589
469k
  SDValue N0 = N->getOperand(0);
10590
469k
  EVT VT = N->getValueType(0);
10591
469k
  EVT SrcVT = N0.getValueType();
10592
469k
  bool isLE = DAG.getDataLayout().isLittleEndian();
10593
469k
10594
469k
  // noop truncate
10595
469k
  if (SrcVT == VT)
10596
0
    return N0;
10597
469k
10598
469k
  // fold (truncate (truncate x)) -> (truncate x)
10599
469k
  if (N0.getOpcode() == ISD::TRUNCATE)
10600
3.61k
    return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10601
465k
10602
465k
  // fold (truncate c1) -> c1
10603
465k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
10604
3.75k
    SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
10605
3.75k
    if (C.getNode() != N)
10606
3.59k
      return C;
10607
461k
  }
10608
461k
10609
461k
  // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
10610
461k
  if (N0.getOpcode() == ISD::ZERO_EXTEND ||
10611
461k
      
N0.getOpcode() == ISD::SIGN_EXTEND460k
||
10612
461k
      
N0.getOpcode() == ISD::ANY_EXTEND459k
) {
10613
8.06k
    // if the source is smaller than the dest, we still need an extend.
10614
8.06k
    if (N0.getOperand(0).getValueType().bitsLT(VT))
10615
1.17k
      return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10616
6.89k
    // if the source is larger than the dest, than we just need the truncate.
10617
6.89k
    if (N0.getOperand(0).getValueType().bitsGT(VT))
10618
1.16k
      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10619
5.72k
    // if the source and dest are the same type, we can drop both the extend
10620
5.72k
    // and the truncate.
10621
5.72k
    return N0.getOperand(0);
10622
5.72k
  }
10623
453k
10624
453k
  // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
10625
453k
  if (N->hasOneUse() && 
(N->use_begin()->getOpcode() == ISD::ANY_EXTEND)392k
)
10626
1.48k
    return SDValue();
10627
452k
10628
452k
  // Fold extract-and-trunc into a narrow extract. For example:
10629
452k
  //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
10630
452k
  //   i32 y = TRUNCATE(i64 x)
10631
452k
  //        -- becomes --
10632
452k
  //   v16i8 b = BITCAST (v2i64 val)
10633
452k
  //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
10634
452k
  //
10635
452k
  // Note: We only run this optimization after type legalization (which often
10636
452k
  // creates this pattern) and before operation legalization after which
10637
452k
  // we need to be more careful about the vector instructions that we generate.
10638
452k
  if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
10639
452k
      
LegalTypes38.3k
&&
!LegalOperations38.0k
&&
N0->hasOneUse()30.6k
&&
VT != MVT::i126.1k
) {
10640
26.1k
    EVT VecTy = N0.getOperand(0).getValueType();
10641
26.1k
    EVT ExTy = N0.getValueType();
10642
26.1k
    EVT TrTy = N->getValueType(0);
10643
26.1k
10644
26.1k
    unsigned NumElem = VecTy.getVectorNumElements();
10645
26.1k
    unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
10646
26.1k
10647
26.1k
    EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
10648
26.1k
    assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
10649
26.1k
10650
26.1k
    SDValue EltNo = N0->getOperand(1);
10651
26.1k
    if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
10652
25.8k
      int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
10653
25.8k
      EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
10654
25.8k
      int Index = isLE ? 
(Elt*SizeRatio)25.7k
:
(Elt*SizeRatio + (SizeRatio-1))92
;
10655
25.8k
10656
25.8k
      SDLoc DL(N);
10657
25.8k
      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
10658
25.8k
                         DAG.getBitcast(NVT, N0.getOperand(0)),
10659
25.8k
                         DAG.getConstant(Index, DL, IndexTy));
10660
25.8k
    }
10661
426k
  }
10662
426k
10663
426k
  // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
10664
426k
  if (N0.getOpcode() == ISD::SELECT && 
N0.hasOneUse()571
) {
10665
342
    if ((!LegalOperations || 
TLI.isOperationLegal(ISD::SELECT, SrcVT)156
) &&
10666
342
        TLI.isTruncateFree(SrcVT, VT)) {
10667
183
      SDLoc SL(N0);
10668
183
      SDValue Cond = N0.getOperand(0);
10669
183
      SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10670
183
      SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
10671
183
      return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
10672
183
    }
10673
426k
  }
10674
426k
10675
426k
  // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
10676
426k
  if (N0.getOpcode() == ISD::SHL && 
N0.hasOneUse()2.15k
&&
10677
426k
      
(1.72k
!LegalOperations1.72k
||
TLI.isOperationLegal(ISD::SHL, VT)1.02k
) &&
10678
426k
      
TLI.isTypeDesirableForOp(ISD::SHL, VT)1.40k
) {
10679
900
    SDValue Amt = N0.getOperand(1);
10680
900
    KnownBits Known = DAG.computeKnownBits(Amt);
10681
900
    unsigned Size = VT.getScalarSizeInBits();
10682
900
    if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
10683
411
      SDLoc SL(N);
10684
411
      EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
10685
411
10686
411
      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10687
411
      if (AmtVT != Amt.getValueType()) {
10688
7
        Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
10689
7
        AddToWorklist(Amt.getNode());
10690
7
      }
10691
411
      return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
10692
411
    }
10693
425k
  }
10694
425k
10695
425k
  // Attempt to pre-truncate BUILD_VECTOR sources.
10696
425k
  if (N0.getOpcode() == ISD::BUILD_VECTOR && 
!LegalOperations187
&&
10697
425k
      
TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())71
) {
10698
24
    SDLoc DL(N);
10699
24
    EVT SVT = VT.getScalarType();
10700
24
    SmallVector<SDValue, 8> TruncOps;
10701
62
    for (const SDValue &Op : N0->op_values()) {
10702
62
      SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
10703
62
      TruncOps.push_back(TruncOp);
10704
62
    }
10705
24
    return DAG.getBuildVector(VT, DL, TruncOps);
10706
24
  }
10707
425k
10708
425k
  // Fold a series of buildvector, bitcast, and truncate if possible.
10709
425k
  // For example fold
10710
425k
  //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
10711
425k
  //   (2xi32 (buildvector x, y)).
10712
425k
  if (Level == AfterLegalizeVectorOps && 
VT.isVector()20.9k
&&
10713
425k
      
N0.getOpcode() == ISD::BITCAST7.13k
&&
N0.hasOneUse()21
&&
10714
425k
      
N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR21
&&
10715
425k
      
N0.getOperand(0).hasOneUse()3
) {
10716
3
    SDValue BuildVect = N0.getOperand(0);
10717
3
    EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
10718
3
    EVT TruncVecEltTy = VT.getVectorElementType();
10719
3
10720
3
    // Check that the element types match.
10721
3
    if (BuildVectEltTy == TruncVecEltTy) {
10722
3
      // Now we only need to compute the offset of the truncated elements.
10723
3
      unsigned BuildVecNumElts =  BuildVect.getNumOperands();
10724
3
      unsigned TruncVecNumElts = VT.getVectorNumElements();
10725
3
      unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
10726
3
10727
3
      assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
10728
3
             "Invalid number of elements");
10729
3
10730
3
      SmallVector<SDValue, 8> Opnds;
10731
9
      for (unsigned i = 0, e = BuildVecNumElts; i != e; 
i += TruncEltOffset6
)
10732
6
        Opnds.push_back(BuildVect.getOperand(i));
10733
3
10734
3
      return DAG.getBuildVector(VT, SDLoc(N), Opnds);
10735
3
    }
10736
425k
  }
10737
425k
10738
425k
  // See if we can simplify the input to this truncate through knowledge that
10739
425k
  // only the low bits are being used.
10740
425k
  // For example "trunc (or (shl x, 8), y)" // -> trunc y
10741
425k
  // Currently we only perform this optimization on scalars because vectors
10742
425k
  // may have different active low bits.
10743
425k
  if (!VT.isVector()) {
10744
371k
    APInt Mask =
10745
371k
        APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
10746
371k
    if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
10747
3.02k
      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
10748
422k
  }
10749
422k
10750
422k
  // fold (truncate (load x)) -> (smaller load x)
10751
422k
  // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
10752
422k
  if (!LegalTypes || 
TLI.isTypeDesirableForOp(N0.getOpcode(), VT)250k
) {
10753
400k
    if (SDValue Reduced = ReduceLoadWidth(N))
10754
3.19k
      return Reduced;
10755
397k
10756
397k
    // Handle the case where the load remains an extending load even
10757
397k
    // after truncation.
10758
397k
    if (N0.hasOneUse() && 
ISD::isUNINDEXEDLoad(N0.getNode())285k
) {
10759
3.21k
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10760
3.21k
      if (!LN0->isVolatile() &&
10761
3.21k
          
LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()2.95k
) {
10762
272
        SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
10763
272
                                         VT, LN0->getChain(), LN0->getBasePtr(),
10764
272
                                         LN0->getMemoryVT(),
10765
272
                                         LN0->getMemOperand());
10766
272
        DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
10767
272
        return NewLoad;
10768
272
      }
10769
419k
    }
10770
397k
  }
10771
419k
10772
419k
  // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
10773
419k
  // where ... are all 'undef'.
10774
419k
  if (N0.getOpcode() == ISD::CONCAT_VECTORS && 
!LegalTypes5.99k
) {
10775
3.68k
    SmallVector<EVT, 8> VTs;
10776
3.68k
    SDValue V;
10777
3.68k
    unsigned Idx = 0;
10778
3.68k
    unsigned NumDefs = 0;
10779
3.68k
10780
7.40k
    for (unsigned i = 0, e = N0.getNumOperands(); i != e; 
++i3.71k
) {
10781
7.38k
      SDValue X = N0.getOperand(i);
10782
7.38k
      if (!X.isUndef()) {
10783
7.35k
        V = X;
10784
7.35k
        Idx = i;
10785
7.35k
        NumDefs++;
10786
7.35k
      }
10787
7.38k
      // Stop if more than one members are non-undef.
10788
7.38k
      if (NumDefs > 1)
10789
3.67k
        break;
10790
3.71k
      VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
10791
3.71k
                                     VT.getVectorElementType(),
10792
3.71k
                                     X.getValueType().getVectorNumElements()));
10793
3.71k
    }
10794
3.68k
10795
3.68k
    if (NumDefs == 0)
10796
0
      return DAG.getUNDEF(VT);
10797
3.68k
10798
3.68k
    if (NumDefs == 1) {
10799
14
      assert(V.getNode() && "The single defined operand is empty!");
10800
14
      SmallVector<SDValue, 8> Opnds;
10801
58
      for (unsigned i = 0, e = VTs.size(); i != e; 
++i44
) {
10802
44
        if (i != Idx) {
10803
30
          Opnds.push_back(DAG.getUNDEF(VTs[i]));
10804
30
          continue;
10805
30
        }
10806
14
        SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
10807
14
        AddToWorklist(NV.getNode());
10808
14
        Opnds.push_back(NV);
10809
14
      }
10810
14
      return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
10811
14
    }
10812
419k
  }
10813
419k
10814
419k
  // Fold truncate of a bitcast of a vector to an extract of the low vector
10815
419k
  // element.
10816
419k
  //
10817
419k
  // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
10818
419k
  if (N0.getOpcode() == ISD::BITCAST && 
!VT.isVector()20.1k
) {
10819
19.8k
    SDValue VecSrc = N0.getOperand(0);
10820
19.8k
    EVT SrcVT = VecSrc.getValueType();
10821
19.8k
    if (SrcVT.isVector() && 
SrcVT.getScalarType() == VT18.0k
&&
10822
19.8k
        
(12.7k
!LegalOperations12.7k
||
10823
12.7k
         
TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT)11.8k
)) {
10824
11.3k
      SDLoc SL(N);
10825
11.3k
10826
11.3k
      EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
10827
11.3k
      unsigned Idx = isLE ? 
011.3k
:
SrcVT.getVectorNumElements() - 13
;
10828
11.3k
      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
10829
11.3k
                         VecSrc, DAG.getConstant(Idx, SL, IdxVT));
10830
11.3k
    }
10831
407k
  }
10832
407k
10833
407k
  // Simplify the operands using demanded-bits information.
10834
407k
  if (!VT.isVector() &&
10835
407k
      
SimplifyDemandedBits(SDValue(N, 0))353k
)
10836
16.8k
    return SDValue(N, 0);
10837
391k
10838
391k
  // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
10839
391k
  // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
10840
391k
  // When the adde's carry is not used.
10841
391k
  if ((N0.getOpcode() == ISD::ADDE || 
N0.getOpcode() == ISD::ADDCARRY391k
) &&
10842
391k
      
N0.hasOneUse()30
&&
!N0.getNode()->hasAnyUseOfValue(1)20
&&
10843
391k
      // We only do for addcarry before legalize operation
10844
391k
      
(20
(20
!LegalOperations20
&&
N0.getOpcode() == ISD::ADDCARRY18
) ||
10845
20
       
TLI.isOperationLegal(N0.getOpcode(), VT)5
)) {
10846
16
    SDLoc SL(N);
10847
16
    auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10848
16
    auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10849
16
    auto VTs = DAG.getVTList(VT, N0->getValueType(1));
10850
16
    return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
10851
16
  }
10852
391k
10853
391k
  // fold (truncate (extract_subvector(ext x))) ->
10854
391k
  //      (extract_subvector x)
10855
391k
  // TODO: This can be generalized to cover cases where the truncate and extract
10856
391k
  // do not fully cancel each other out.
10857
391k
  if (!LegalTypes && 
N0.getOpcode() == ISD::EXTRACT_SUBVECTOR165k
) {
10858
165
    SDValue N00 = N0.getOperand(0);
10859
165
    if (N00.getOpcode() == ISD::SIGN_EXTEND ||
10860
165
        
N00.getOpcode() == ISD::ZERO_EXTEND37
||
10861
165
        
N00.getOpcode() == ISD::ANY_EXTEND29
) {
10862
136
      if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
10863
136
          VT.getVectorElementType())
10864
136
        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
10865
136
                           N00.getOperand(0), N0.getOperand(1));
10866
390k
    }
10867
165
  }
10868
390k
10869
390k
  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10870
4
    return NewVSel;
10871
390k
10872
390k
  // Narrow a suitable binary operation with a non-opaque constant operand by
10873
390k
  // moving it ahead of the truncate. This is limited to pre-legalization
10874
390k
  // because targets may prefer a wider type during later combines and invert
10875
390k
  // this transform.
10876
390k
  switch (N0.getOpcode()) {
10877
390k
  case ISD::ADD:
10878
21.0k
  case ISD::SUB:
10879
21.0k
  case ISD::MUL:
10880
21.0k
  case ISD::AND:
10881
21.0k
  case ISD::OR:
10882
21.0k
  case ISD::XOR:
10883
21.0k
    if (!LegalOperations && 
N0.hasOneUse()12.7k
&&
10884
21.0k
        
(5.80k
isConstantOrConstantVector(N0.getOperand(0), true)5.80k
||
10885
5.80k
         
isConstantOrConstantVector(N0.getOperand(1), true)4.66k
)) {
10886
3.22k
      // TODO: We already restricted this to pre-legalization, but for vectors
10887
3.22k
      // we are extra cautious to not create an unsupported operation.
10888
3.22k
      // Target-specific changes are likely needed to avoid regressions here.
10889
3.22k
      if (VT.isScalarInteger() || 
TLI.isOperationLegal(N0.getOpcode(), VT)657
) {
10890
3.13k
        SDLoc DL(N);
10891
3.13k
        SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
10892
3.13k
        SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
10893
3.13k
        return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
10894
3.13k
      }
10895
387k
    }
10896
387k
  }
10897
387k
10898
387k
  return SDValue();
10899
387k
}
10900
10901
154k
static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
10902
154k
  SDValue Elt = N->getOperand(i);
10903
154k
  if (Elt.getOpcode() != ISD::MERGE_VALUES)
10904
153k
    return Elt.getNode();
10905
529
  return Elt.getOperand(Elt.getResNo()).getNode();
10906
529
}
10907
10908
/// build_pair (load, load) -> load
10909
/// if load locations are consecutive.
10910
77.0k
SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
10911
77.0k
  assert(N->getOpcode() == ISD::BUILD_PAIR);
10912
77.0k
10913
77.0k
  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
10914
77.0k
  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
10915
77.0k
10916
77.0k
  // A BUILD_PAIR is always having the least significant part in elt 0 and the
10917
77.0k
  // most significant part in elt 1. So when combining into one large load, we
10918
77.0k
  // need to consider the endianness.
10919
77.0k
  if (DAG.getDataLayout().isBigEndian())
10920
3.13k
    std::swap(LD1, LD2);
10921
77.0k
10922
77.0k
  if (!LD1 || 
!LD25.39k
||
!ISD::isNON_EXTLoad(LD1)3.87k
||
!LD1->hasOneUse()3.87k
||
10923
77.0k
      
LD1->getAddressSpace() != LD2->getAddressSpace()3.43k
)
10924
73.6k
    return SDValue();
10925
3.43k
  EVT LD1VT = LD1->getValueType(0);
10926
3.43k
  unsigned LD1Bytes = LD1VT.getStoreSize();
10927
3.43k
  if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
10928
3.43k
      DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
10929
3.28k
    unsigned Align = LD1->getAlignment();
10930
3.28k
    unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
10931
3.28k
        VT.getTypeForEVT(*DAG.getContext()));
10932
3.28k
10933
3.28k
    if (NewAlign <= Align &&
10934
3.28k
        
(2.94k
!LegalOperations2.94k
||
TLI.isOperationLegal(ISD::LOAD, VT)0
))
10935
2.94k
      return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
10936
2.94k
                         LD1->getPointerInfo(), Align);
10937
494
  }
10938
494
10939
494
  return SDValue();
10940
494
}
10941
10942
10
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
10943
10
  // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
10944
10
  // and Lo parts; on big-endian machines it doesn't.
10945
10
  return DAG.getDataLayout().isBigEndian() ? 
16
:
04
;
10946
10
}
10947
10948
static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
10949
530k
                                    const TargetLowering &TLI) {
10950
530k
  // If this is not a bitcast to an FP type or if the target doesn't have
10951
530k
  // IEEE754-compliant FP logic, we're done.
10952
530k
  EVT VT = N->getValueType(0);
10953
530k
  if (!VT.isFloatingPoint() || 
!TLI.hasBitPreservingFPLogic(VT)105k
)
10954
463k
    return SDValue();
10955
66.4k
10956
66.4k
  // TODO: Handle cases where the integer constant is a different scalar
10957
66.4k
  // bitwidth to the FP.
10958
66.4k
  SDValue N0 = N->getOperand(0);
10959
66.4k
  EVT SourceVT = N0.getValueType();
10960
66.4k
  if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
10961
19.5k
    return SDValue();
10962
46.9k
10963
46.9k
  unsigned FPOpcode;
10964
46.9k
  APInt SignMask;
10965
46.9k
  switch (N0.getOpcode()) {
10966
46.9k
  case ISD::AND:
10967
1.56k
    FPOpcode = ISD::FABS;
10968
1.56k
    SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
10969
1.56k
    break;
10970
46.9k
  case ISD::XOR:
10971
1.03k
    FPOpcode = ISD::FNEG;
10972
1.03k
    SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10973
1.03k
    break;
10974
46.9k
  case ISD::OR:
10975
5.31k
    FPOpcode = ISD::FABS;
10976
5.31k
    SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10977
5.31k
    break;
10978
46.9k
  default:
10979
39.0k
    return SDValue();
10980
7.91k
  }
10981
7.91k
10982
7.91k
  // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
10983
7.91k
  // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
10984
7.91k
  // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
10985
7.91k
  //   fneg (fabs X)
10986
7.91k
  SDValue LogicOp0 = N0.getOperand(0);
10987
7.91k
  ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
10988
7.91k
  if (LogicOp1 && 
LogicOp1->getAPIntValue() == SignMask1.17k
&&
10989
7.91k
      
LogicOp0.getOpcode() == ISD::BITCAST117
&&
10990
7.91k
      
LogicOp0.getOperand(0).getValueType() == VT109
) {
10991
109
    SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
10992
109
    NumFPLogicOpsConv++;
10993
109
    if (N0.getOpcode() == ISD::OR)
10994
16
      return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
10995
93
    return FPOp;
10996
93
  }
10997
7.80k
10998
7.80k
  return SDValue();
10999
7.80k
}
11000
11001
583k
SDValue DAGCombiner::visitBITCAST(SDNode *N) {
11002
583k
  SDValue N0 = N->getOperand(0);
11003
583k
  EVT VT = N->getValueType(0);
11004
583k
11005
583k
  if (N0.isUndef())
11006
120
    return DAG.getUNDEF(VT);
11007
583k
11008
583k
  // If the input is a BUILD_VECTOR with all constant elements, fold this now.
11009
583k
  // Only do this before legalize types, unless both types are integer and the
11010
583k
  // scalar type is legal. Only do this before legalize ops, since the target
11011
583k
  // maybe depending on the bitcast.
11012
583k
  // First check to see if this is all constant.
11013
583k
  // TODO: Support FP bitcasts after legalize types.
11014
583k
  if (VT.isVector() &&
11015
583k
      
(447k
!LegalTypes447k
||
11016
447k
       
(398k
!LegalOperations398k
&&
VT.isInteger()49.6k
&&
N0.getValueType().isInteger()46.3k
&&
11017
398k
        
TLI.isTypeLegal(VT.getVectorElementType())44.6k
)) &&
11018
583k
      
N0.getOpcode() == ISD::BUILD_VECTOR89.3k
&&
N0.getNode()->hasOneUse()5.43k
&&
11019
583k
      
cast<BuildVectorSDNode>(N0)->isConstant()5.28k
)
11020
997
    return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
11021
997
                                             VT.getVectorElementType());
11022
582k
11023
582k
  // If the input is a constant, let getNode fold it.
11024
582k
  if (isa<ConstantSDNode>(N0) || 
isa<ConstantFPSDNode>(N0)581k
) {
11025
1.09k
    // If we can't allow illegal operations, we need to check that this is just
11026
1.09k
    // a fp -> int or int -> conversion and that the resulting operation will
11027
1.09k
    // be legal.
11028
1.09k
    if (!LegalOperations ||
11029
1.09k
        
(859
isa<ConstantSDNode>(N0)859
&&
VT.isFloatingPoint()795
&&
!VT.isVector()86
&&
11030
859
         
TLI.isOperationLegal(ISD::ConstantFP, VT)15
) ||
11031
1.09k
        
(844
isa<ConstantFPSDNode>(N0)844
&&
VT.isInteger()64
&&
!VT.isVector()64
&&
11032
844
         
TLI.isOperationLegal(ISD::Constant, VT)27
)) {
11033
278
      SDValue C = DAG.getBitcast(VT, N0);
11034
278
      if (C.getNode() != N)
11035
99
        return C;
11036
582k
    }
11037
1.09k
  }
11038
582k
11039
582k
  // (conv (conv x, t1), t2) -> (conv x, t2)
11040
582k
  if (N0.getOpcode() == ISD::BITCAST)
11041
44.0k
    return DAG.getBitcast(VT, N0.getOperand(0));
11042
538k
11043
538k
  // fold (conv (load x)) -> (load (conv*)x)
11044
538k
  // If the resultant load doesn't need a higher alignment than the original!
11045
538k
  if (ISD::isNormalLoad(N0.getNode()) && 
N0.hasOneUse()66.1k
&&
11046
538k
      // Do not remove the cast if the types differ in endian layout.
11047
538k
      TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
11048
62.6k
          TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
11049
538k
      // If the load is volatile, we only want to change the load type if the
11050
538k
      // resulting load is legal. Otherwise we might increase the number of
11051
538k
      // memory accesses. We don't care if the original type was legal or not
11052
538k
      // as we assume software couldn't rely on the number of accesses of an
11053
538k
      // illegal type.
11054
538k
      
(62.6k
(62.6k
!LegalOperations62.6k
&&
!cast<LoadSDNode>(N0)->isVolatile()14.5k
) ||
11055
62.6k
       
TLI.isOperationLegal(ISD::LOAD, VT)48.7k
)) {
11056
17.2k
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11057
17.2k
11058
17.2k
    if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
11059
17.2k
                                    *LN0->getMemOperand())) {
11060
8.50k
      SDValue Load =
11061
8.50k
          DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
11062
8.50k
                      LN0->getPointerInfo(), LN0->getAlignment(),
11063
8.50k
                      LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11064
8.50k
      DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11065
8.50k
      return Load;
11066
8.50k
    }
11067
530k
  }
11068
530k
11069
530k
  if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
11070
109
    return V;
11071
529k
11072
529k
  // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
11073
529k
  // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
11074
529k
  //
11075
529k
  // For ppc_fp128:
11076
529k
  // fold (bitcast (fneg x)) ->
11077
529k
  //     flipbit = signbit
11078
529k
  //     (xor (bitcast x) (build_pair flipbit, flipbit))
11079
529k
  //
11080
529k
  // fold (bitcast (fabs x)) ->
11081
529k
  //     flipbit = (and (extract_element (bitcast x), 0), signbit)
11082
529k
  //     (xor (bitcast x) (build_pair flipbit, flipbit))
11083
529k
  // This often reduces constant pool loads.
11084
529k
  if (((N0.getOpcode() == ISD::FNEG && 
!TLI.isFNegFree(N0.getValueType())600
) ||
11085
529k
       
(529k
N0.getOpcode() == ISD::FABS529k
&&
!TLI.isFAbsFree(N0.getValueType())377
)) &&
11086
529k
      
N0.getNode()->hasOneUse()522
&&
VT.isInteger()419
&&
11087
529k
      
!VT.isVector()340
&&
!N0.getValueType().isVector()204
) {
11088
152
    SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
11089
152
    AddToWorklist(NewConv.getNode());
11090
152
11091
152
    SDLoc DL(N);
11092
152
    if (N0.getValueType() == MVT::ppcf128 && 
!LegalTypes10
) {
11093
10
      assert(VT.getSizeInBits() == 128);
11094
10
      SDValue SignBit = DAG.getConstant(
11095
10
          APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
11096
10
      SDValue FlipBit;
11097
10
      if (N0.getOpcode() == ISD::FNEG) {
11098
5
        FlipBit = SignBit;
11099
5
        AddToWorklist(FlipBit.getNode());
11100
5
      } else {
11101
5
        assert(N0.getOpcode() == ISD::FABS);
11102
5
        SDValue Hi =
11103
5
            DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
11104
5
                        DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11105
5
                                              SDLoc(NewConv)));
11106
5
        AddToWorklist(Hi.getNode());
11107
5
        FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
11108
5
        AddToWorklist(FlipBit.getNode());
11109
5
      }
11110
10
      SDValue FlipBits =
11111
10
          DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11112
10
      AddToWorklist(FlipBits.getNode());
11113
10
      return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
11114
10
    }
11115
142
    APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11116
142
    if (N0.getOpcode() == ISD::FNEG)
11117
55
      return DAG.getNode(ISD::XOR, DL, VT,
11118
55
                         NewConv, DAG.getConstant(SignBit, DL, VT));
11119
87
    assert(N0.getOpcode() == ISD::FABS);
11120
87
    return DAG.getNode(ISD::AND, DL, VT,
11121
87
                       NewConv, DAG.getConstant(~SignBit, DL, VT));
11122
87
  }
11123
529k
11124
529k
  // fold (bitconvert (fcopysign cst, x)) ->
11125
529k
  //         (or (and (bitconvert x), sign), (and cst, (not sign)))
11126
529k
  // Note that we don't handle (copysign x, cst) because this can always be
11127
529k
  // folded to an fneg or fabs.
11128
529k
  //
11129
529k
  // For ppc_fp128:
11130
529k
  // fold (bitcast (fcopysign cst, x)) ->
11131
529k
  //     flipbit = (and (extract_element
11132
529k
  //                     (xor (bitcast cst), (bitcast x)), 0),
11133
529k
  //                    signbit)
11134
529k
  //     (xor (bitcast cst) (build_pair flipbit, flipbit))
11135
529k
  if (N0.getOpcode() == ISD::FCOPYSIGN && 
N0.getNode()->hasOneUse()245
&&
11136
529k
      
isa<ConstantFPSDNode>(N0.getOperand(0))226
&&
11137
529k
      
VT.isInteger()6
&&
!VT.isVector()6
) {
11138
6
    unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
11139
6
    EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
11140
6
    if (isTypeLegal(IntXVT)) {
11141
6
      SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
11142
6
      AddToWorklist(X.getNode());
11143
6
11144
6
      // If X has a different width than the result/lhs, sext it or truncate it.
11145
6
      unsigned VTWidth = VT.getSizeInBits();
11146
6
      if (OrigXWidth < VTWidth) {
11147
0
        X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
11148
0
        AddToWorklist(X.getNode());
11149
6
      } else if (OrigXWidth > VTWidth) {
11150
0
        // To get the sign bit in the right place, we have to shift it right
11151
0
        // before truncating.
11152
0
        SDLoc DL(X);
11153
0
        X = DAG.getNode(ISD::SRL, DL,
11154
0
                        X.getValueType(), X,
11155
0
                        DAG.getConstant(OrigXWidth-VTWidth, DL,
11156
0
                                        X.getValueType()));
11157
0
        AddToWorklist(X.getNode());
11158
0
        X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
11159
0
        AddToWorklist(X.getNode());
11160
0
      }
11161
6
11162
6
      if (N0.getValueType() == MVT::ppcf128 && 
!LegalTypes5
) {
11163
5
        APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
11164
5
        SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11165
5
        AddToWorklist(Cst.getNode());
11166
5
        SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
11167
5
        AddToWorklist(X.getNode());
11168
5
        SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
11169
5
        AddToWorklist(XorResult.getNode());
11170
5
        SDValue XorResult64 = DAG.getNode(
11171
5
            ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
11172
5
            DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11173
5
                                  SDLoc(XorResult)));
11174
5
        AddToWorklist(XorResult64.getNode());
11175
5
        SDValue FlipBit =
11176
5
            DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
11177
5
                        DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
11178
5
        AddToWorklist(FlipBit.getNode());
11179
5
        SDValue FlipBits =
11180
5
            DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11181
5
        AddToWorklist(FlipBits.getNode());
11182
5
        return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
11183
5
      }
11184
1
      APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11185
1
      X = DAG.getNode(ISD::AND, SDLoc(X), VT,
11186
1
                      X, DAG.getConstant(SignBit, SDLoc(X), VT));
11187
1
      AddToWorklist(X.getNode());
11188
1
11189
1
      SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11190
1
      Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
11191
1
                        Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
11192
1
      AddToWorklist(Cst.getNode());
11193
1
11194
1
      return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
11195
1
    }
11196
6
  }
11197
529k
11198
529k
  // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
11199
529k
  if (N0.getOpcode() == ISD::BUILD_PAIR)
11200
8.25k
    if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
11201
38
      return CombineLD;
11202
529k
11203
529k
  // Remove double bitcasts from shuffles - this is often a legacy of
11204
529k
  // XformToShuffleWithZero being used to combine bitmaskings (of
11205
529k
  // float vectors bitcast to integer vectors) into shuffles.
11206
529k
  // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
11207
529k
  if (Level < AfterLegalizeDAG && 
TLI.isTypeLegal(VT)194k
&&
VT.isVector()179k
&&
11208
529k
      
N0->getOpcode() == ISD::VECTOR_SHUFFLE144k
&&
N0.hasOneUse()25.1k
&&
11209
529k
      
VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements()24.4k
&&
11210
529k
      
!(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())3.56k
) {
11211
3.56k
    ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
11212
3.56k
11213
3.56k
    // If operands are a bitcast, peek through if it casts the original VT.
11214
3.56k
    // If operands are a constant, just bitcast back to original VT.
11215
7.12k
    auto PeekThroughBitcast = [&](SDValue Op) {
11216
7.12k
      if (Op.getOpcode() == ISD::BITCAST &&
11217
7.12k
          
Op.getOperand(0).getValueType() == VT2.36k
)
11218
410
        return SDValue(Op.getOperand(0));
11219
6.71k
      if (Op.isUndef() || 
ISD::isBuildVectorOfConstantSDNodes(Op.getNode())4.34k
||
11220
6.71k
          
ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())4.21k
)
11221
2.50k
        return DAG.getBitcast(VT, Op);
11222
4.21k
      return SDValue();
11223
4.21k
    };
11224
3.56k
11225
3.56k
    // FIXME: If either input vector is bitcast, try to convert the shuffle to
11226
3.56k
    // the result type of this bitcast. This would eliminate at least one
11227
3.56k
    // bitcast. See the transform in InstCombine.
11228
3.56k
    SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
11229
3.56k
    SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
11230
3.56k
    if (!(SV0 && 
SV1290
))
11231
3.28k
      return SDValue();
11232
277
11233
277
    int MaskScale =
11234
277
        VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
11235
277
    SmallVector<int, 8> NewMask;
11236
277
    for (int M : SVN->getMask())
11237
5.88k
      
for (int i = 0; 1.40k
i != MaskScale;
++i4.47k
)
11238
4.47k
        NewMask.push_back(M < 0 ? 
-17
:
M * MaskScale + i4.47k
);
11239
277
11240
277
    bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
11241
277
    if (!LegalMask) {
11242
5
      std::swap(SV0, SV1);
11243
5
      ShuffleVectorSDNode::commuteMask(NewMask);
11244
5
      LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
11245
5
    }
11246
277
11247
277
    if (LegalMask)
11248
272
      return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
11249
526k
  }
11250
526k
11251
526k
  return SDValue();
11252
526k
}
11253
11254
68.8k
SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
11255
68.8k
  EVT VT = N->getValueType(0);
11256
68.8k
  return CombineConsecutiveLoads(N, VT);
11257
68.8k
}
11258
11259
/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
11260
/// operands. DstEltVT indicates the destination element value type.
11261
SDValue DAGCombiner::
11262
1.32k
ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
11263
1.32k
  EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
11264
1.32k
11265
1.32k
  // If this is already the right type, we're done.
11266
1.32k
  if (SrcEltVT == DstEltVT) 
return SDValue(BV, 0)0
;
11267
1.32k
11268
1.32k
  unsigned SrcBitSize = SrcEltVT.getSizeInBits();
11269
1.32k
  unsigned DstBitSize = DstEltVT.getSizeInBits();
11270
1.32k
11271
1.32k
  // If this is a conversion of N elements of one type to N elements of another
11272
1.32k
  // type, convert each element.  This handles FP<->INT cases.
11273
1.32k
  if (SrcBitSize == DstBitSize) {
11274
380
    SmallVector<SDValue, 8> Ops;
11275
1.22k
    for (SDValue Op : BV->op_values()) {
11276
1.22k
      // If the vector element type is not legal, the BUILD_VECTOR operands
11277
1.22k
      // are promoted and implicitly truncated.  Make that explicit here.
11278
1.22k
      if (Op.getValueType() != SrcEltVT)
11279
0
        Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
11280
1.22k
      Ops.push_back(DAG.getBitcast(DstEltVT, Op));
11281
1.22k
      AddToWorklist(Ops.back().getNode());
11282
1.22k
    }
11283
380
    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11284
380
                              BV->getValueType(0).getVectorNumElements());
11285
380
    return DAG.getBuildVector(VT, SDLoc(BV), Ops);
11286
380
  }
11287
946
11288
946
  // Otherwise, we're growing or shrinking the elements.  To avoid having to
11289
946
  // handle annoying details of growing/shrinking FP values, we convert them to
11290
946
  // int first.
11291
946
  if (SrcEltVT.isFloatingPoint()) {
11292
79
    // Convert the input float vector to a int vector where the elements are the
11293
79
    // same sizes.
11294
79
    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
11295
79
    BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
11296
79
    SrcEltVT = IntVT;
11297
79
  }
11298
946
11299
946
  // Now we know the input is an integer vector.  If the output is a FP type,
11300
946
  // convert to integer first, then to FP of the right size.
11301
946
  if (DstEltVT.isFloatingPoint()) {
11302
125
    EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
11303
125
    SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
11304
125
11305
125
    // Next, convert to FP elements of the same size.
11306
125
    return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
11307
125
  }
11308
821
11309
821
  SDLoc DL(BV);
11310
821
11311
821
  // Okay, we know the src/dst types are both integers of differing types.
11312
821
  // Handling growing first.
11313
821
  assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
11314
821
  if (SrcBitSize < DstBitSize) {
11315
357
    unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
11316
357
11317
357
    SmallVector<SDValue, 8> Ops;
11318
1.30k
    for (unsigned i = 0, e = BV->getNumOperands(); i != e;
11319
952
         i += NumInputsPerOutput) {
11320
952
      bool isLE = DAG.getDataLayout().isLittleEndian();
11321
952
      APInt NewBits = APInt(DstBitSize, 0);
11322
952
      bool EltIsUndef = true;
11323
3.61k
      for (unsigned j = 0; j != NumInputsPerOutput; 
++j2.66k
) {
11324
2.66k
        // Shift the previously computed bits over.
11325
2.66k
        NewBits <<= SrcBitSize;
11326
2.66k
        SDValue Op = BV->getOperand(i+ (isLE ? 
(NumInputsPerOutput-j-1)2.19k
:
j472
));
11327
2.66k
        if (Op.isUndef()) 
continue269
;
11328
2.39k
        EltIsUndef = false;
11329
2.39k
11330
2.39k
        NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
11331
2.39k
                   zextOrTrunc(SrcBitSize).zext(DstBitSize);
11332
2.39k
      }
11333
952
11334
952
      if (EltIsUndef)
11335
45
        Ops.push_back(DAG.getUNDEF(DstEltVT));
11336
907
      else
11337
907
        Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
11338
952
    }
11339
357
11340
357
    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
11341
357
    return DAG.getBuildVector(VT, DL, Ops);
11342
357
  }
11343
464
11344
464
  // Finally, this must be the case where we are shrinking elements: each input
11345
464
  // turns into multiple outputs.
11346
464
  unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
11347
464
  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11348
464
                            NumOutputsPerInput*BV->getNumOperands());
11349
464
  SmallVector<SDValue, 8> Ops;
11350
464
11351
1.98k
  for (const SDValue &Op : BV->op_values()) {
11352
1.98k
    if (Op.isUndef()) {
11353
58
      Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
11354
58
      continue;
11355
58
    }
11356
1.92k
11357
1.92k
    APInt OpVal = cast<ConstantSDNode>(Op)->
11358
1.92k
                  getAPIntValue().zextOrTrunc(SrcBitSize);
11359
1.92k
11360
8.04k
    for (unsigned j = 0; j != NumOutputsPerInput; 
++j6.12k
) {
11361
6.12k
      APInt ThisVal = OpVal.trunc(DstBitSize);
11362
6.12k
      Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
11363
6.12k
      OpVal.lshrInPlace(DstBitSize);
11364
6.12k
    }
11365
1.92k
11366
1.92k
    // For big endian targets, swap the order of the pieces of each element.
11367
1.92k
    if (DAG.getDataLayout().isBigEndian())
11368
76
      std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
11369
1.92k
  }
11370
464
11371
464
  return DAG.getBuildVector(VT, DL, Ops);
11372
464
}
11373
11374
132k
static bool isContractable(SDNode *N) {
11375
132k
  SDNodeFlags F = N->getFlags();
11376
132k
  return F.hasAllowContract() || 
F.hasAllowReassociation()131k
;
11377
132k
}
11378
11379
/// Try to perform FMA combining on a given FADD node.
11380
82.6k
SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
11381
82.6k
  SDValue N0 = N->getOperand(0);
11382
82.6k
  SDValue N1 = N->getOperand(1);
11383
82.6k
  EVT VT = N->getValueType(0);
11384
82.6k
  SDLoc SL(N);
11385
82.6k
11386
82.6k
  const TargetOptions &Options = DAG.getTarget().Options;
11387
82.6k
11388
82.6k
  // Floating-point multiply-add with intermediate rounding.
11389
82.6k
  bool HasFMAD = (LegalOperations && 
TLI.isOperationLegal(ISD::FMAD, VT)43.3k
);
11390
82.6k
11391
82.6k
  // Floating-point multiply-add without intermediate rounding.
11392
82.6k
  bool HasFMA =
11393
82.6k
      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11394
82.6k
      
(53.9k
!LegalOperations53.9k
||
TLI.isOperationLegalOrCustom(ISD::FMA, VT)29.4k
);
11395
82.6k
11396
82.6k
  // No valid opcode, do not combine.
11397
82.6k
  if (!HasFMAD && 
!HasFMA76.5k
)
11398
22.9k
    return SDValue();
11399
59.7k
11400
59.7k
  SDNodeFlags Flags = N->getFlags();
11401
59.7k
  bool CanFuse = Options.UnsafeFPMath || 
isContractable(N)58.1k
;
11402
59.7k
  bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11403
59.7k
                              
CanFuse58.7k
||
HasFMAD56.8k
);
11404
59.7k
  // If the addition is not contractable, do not combine.
11405
59.7k
  if (!AllowFusionGlobally && 
!isContractable(N)51.0k
)
11406
51.0k
    return SDValue();
11407
8.67k
11408
8.67k
  const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
11409
8.67k
  if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11410
56
    return SDValue();
11411
8.61k
11412
8.61k
  // Always prefer FMAD to FMA for precision.
11413
8.61k
  unsigned PreferredFusedOpcode = HasFMAD ? 
ISD::FMAD6.04k
:
ISD::FMA2.57k
;
11414
8.61k
  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11415
8.61k
11416
8.61k
  // Is the node an FMUL and contractable either due to global flags or
11417
8.61k
  // SDNodeFlags.
11418
21.1k
  auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11419
21.1k
    if (N.getOpcode() != ISD::FMUL)
11420
16.9k
      return false;
11421
4.17k
    return AllowFusionGlobally || 
isContractable(N.getNode())0
;
11422
4.17k
  };
11423
8.61k
  // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
11424
8.61k
  // prefer to fold the multiply with fewer uses.
11425
8.61k
  if (Aggressive && 
isContractableFMUL(N0)4.45k
&&
isContractableFMUL(N1)1.48k
) {
11426
224
    if (N0.getNode()->use_size() > N1.getNode()->use_size())
11427
4
      std::swap(N0, N1);
11428
224
  }
11429
8.61k
11430
8.61k
  // fold (fadd (fmul x, y), z) -> (fma x, y, z)
11431
8.61k
  if (isContractableFMUL(N0) && 
(2.08k
Aggressive2.08k
||
N0->hasOneUse()601
)) {
11432
2.06k
    return DAG.getNode(PreferredFusedOpcode, SL, VT,
11433
2.06k
                       N0.getOperand(0), N0.getOperand(1), N1, Flags);
11434
2.06k
  }
11435
6.55k
11436
6.55k
  // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
11437
6.55k
  // Note: Commutes FADD operands.
11438
6.55k
  if (isContractableFMUL(N1) && 
(345
Aggressive345
||
N1->hasOneUse()230
)) {
11439
332
    return DAG.getNode(PreferredFusedOpcode, SL, VT,
11440
332
                       N1.getOperand(0), N1.getOperand(1), N0, Flags);
11441
332
  }
11442
6.22k
11443
6.22k
  // Look through FP_EXTEND nodes to do more combining.
11444
6.22k
11445
6.22k
  // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
11446
6.22k
  if (N0.getOpcode() == ISD::FP_EXTEND) {
11447
17
    SDValue N00 = N0.getOperand(0);
11448
17
    if (isContractableFMUL(N00) &&
11449
17
        
TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())4
) {
11450
3
      return DAG.getNode(PreferredFusedOpcode, SL, VT,
11451
3
                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
11452
3
                                     N00.getOperand(0)),
11453
3
                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
11454
3
                                     N00.getOperand(1)), N1, Flags);
11455
3
    }
11456
6.21k
  }
11457
6.21k
11458
6.21k
  // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
11459
6.21k
  // Note: Commutes FADD operands.
11460
6.21k
  if (N1.getOpcode() == ISD::FP_EXTEND) {
11461
16
    SDValue N10 = N1.getOperand(0);
11462
16
    if (isContractableFMUL(N10) &&
11463
16
        
TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())4
) {
11464
3
      return DAG.getNode(PreferredFusedOpcode, SL, VT,
11465
3
                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
11466
3
                                     N10.getOperand(0)),
11467
3
                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
11468
3
                                     N10.getOperand(1)), N0, Flags);
11469
3
    }
11470
6.21k
  }
11471
6.21k
11472
6.21k
  // More folding opportunities when target permits.
11473
6.21k
  if (Aggressive) {
11474
2.85k
    // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
11475
2.85k
    if (CanFuse &&
11476
2.85k
        
N0.getOpcode() == PreferredFusedOpcode347
&&
11477
2.85k
        
N0.getOperand(2).getOpcode() == ISD::FMUL20
&&
11478
2.85k
        
N0->hasOneUse()15
&&
N0.getOperand(2)->hasOneUse()11
) {
11479
7
      return DAG.getNode(PreferredFusedOpcode, SL, VT,
11480
7
                         N0.getOperand(0), N0.getOperand(1),
11481
7
                         DAG.getNode(PreferredFusedOpcode, SL, VT,
11482
7
                                     N0.getOperand(2).getOperand(0),
11483
7
                                     N0.getOperand(2).getOperand(1),
11484
7
                                     N1, Flags), Flags);
11485
7
    }
11486
2.84k
11487
2.84k
    // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
11488
2.84k
    if (CanFuse &&
11489
2.84k
        
N1->getOpcode() == PreferredFusedOpcode340
&&
11490
2.84k
        
N1.getOperand(2).getOpcode() == ISD::FMUL20
&&
11491
2.84k
        
N1->hasOneUse()10
&&
N1.getOperand(2)->hasOneUse()6
) {
11492
2
      return DAG.getNode(PreferredFusedOpcode, SL, VT,
11493
2
                         N1.getOperand(0), N1.getOperand(1),
11494
2
                         DAG.getNode(PreferredFusedOpcode, SL, VT,
11495
2
                                     N1.getOperand(2).getOperand(0),
11496
2
                                     N1.getOperand(2).getOperand(1),
11497
2
                                     N0, Flags), Flags);
11498
2
    }
11499
2.84k
11500
2.84k
11501
2.84k
    // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
11502
2.84k
    //   -> (fma x, y, (fma (fpext u), (fpext v), z))
11503
2.84k
    auto FoldFAddFMAFPExtFMul = [&] (
11504
2.84k
      SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
11505
2.84k
      SDNodeFlags Flags) {
11506
11
      return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
11507
11
                         DAG.getNode(PreferredFusedOpcode, SL, VT,
11508
11
                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11509
11
                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11510
11
                                     Z, Flags), Flags);
11511
11
    };
11512
2.84k
    if (N0.getOpcode() == PreferredFusedOpcode) {
11513
86
      SDValue N02 = N0.getOperand(2);
11514
86
      if (N02.getOpcode() == ISD::FP_EXTEND) {
11515
8
        SDValue N020 = N02.getOperand(0);
11516
8
        if (isContractableFMUL(N020) &&
11517
8
            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
11518
6
          return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
11519
6
                                      N020.getOperand(0), N020.getOperand(1),
11520
6
                                      N1, Flags);
11521
6
        }
11522
2.83k
      }
11523
86
    }
11524
2.83k
11525
2.83k
    // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
11526
2.83k
    //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
11527
2.83k
    // FIXME: This turns two single-precision and one double-precision
11528
2.83k
    // operation into two double-precision operations, which might not be
11529
2.83k
    // interesting for all targets, especially GPUs.
11530
2.83k
    auto FoldFAddFPExtFMAFMul = [&] (
11531
2.83k
      SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
11532
2.83k
      SDNodeFlags Flags) {
11533
8
      return DAG.getNode(PreferredFusedOpcode, SL, VT,
11534
8
                         DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
11535
8
                         DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
11536
8
                         DAG.getNode(PreferredFusedOpcode, SL, VT,
11537
8
                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11538
8
                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11539
8
                                     Z, Flags), Flags);
11540
8
    };
11541
2.83k
    if (N0.getOpcode() == ISD::FP_EXTEND) {
11542
14
      SDValue N00 = N0.getOperand(0);
11543
14
      if (N00.getOpcode() == PreferredFusedOpcode) {
11544
4
        SDValue N002 = N00.getOperand(2);
11545
4
        if (isContractableFMUL(N002) &&
11546
4
            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11547
4
          return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
11548
4
                                      N002.getOperand(0), N002.getOperand(1),
11549
4
                                      N1, Flags);
11550
4
        }
11551
2.83k
      }
11552
14
    }
11553
2.83k
11554
2.83k
    // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
11555
2.83k
    //   -> (fma y, z, (fma (fpext u), (fpext v), x))
11556
2.83k
    if (N1.getOpcode() == PreferredFusedOpcode) {
11557
62
      SDValue N12 = N1.getOperand(2);
11558
62
      if (N12.getOpcode() == ISD::FP_EXTEND) {
11559
6
        SDValue N120 = N12.getOperand(0);
11560
6
        if (isContractableFMUL(N120) &&
11561
6
            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11562
5
          return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
11563
5
                                      N120.getOperand(0), N120.getOperand(1),
11564
5
                                      N0, Flags);
11565
5
        }
11566
2.82k
      }
11567
62
    }
11568
2.82k
11569
2.82k
    // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
11570
2.82k
    //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
11571
2.82k
    // FIXME: This turns two single-precision and one double-precision
11572
2.82k
    // operation into two double-precision operations, which might not be
11573
2.82k
    // interesting for all targets, especially GPUs.
11574
2.82k
    if (N1.getOpcode() == ISD::FP_EXTEND) {
11575
13
      SDValue N10 = N1.getOperand(0);
11576
13
      if (N10.getOpcode() == PreferredFusedOpcode) {
11577
4
        SDValue N102 = N10.getOperand(2);
11578
4
        if (isContractableFMUL(N102) &&
11579
4
            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11580
4
          return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
11581
4
                                      N102.getOperand(0), N102.getOperand(1),
11582
4
                                      N0, Flags);
11583
4
        }
11584
6.18k
      }
11585
13
    }
11586
2.82k
  }
11587
6.18k
11588
6.18k
  return SDValue();
11589
6.18k
}
11590
11591
/// Try to perform FMA combining on a given FSUB node.
11592
15.2k
SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
11593
15.2k
  SDValue N0 = N->getOperand(0);
11594
15.2k
  SDValue N1 = N->getOperand(1);
11595
15.2k
  EVT VT = N->getValueType(0);
11596
15.2k
  SDLoc SL(N);
11597
15.2k
11598
15.2k
  const TargetOptions &Options = DAG.getTarget().Options;
11599
15.2k
  // Floating-point multiply-add with intermediate rounding.
11600
15.2k
  bool HasFMAD = (LegalOperations && 
TLI.isOperationLegal(ISD::FMAD, VT)6.51k
);
11601
15.2k
11602
15.2k
  // Floating-point multiply-add without intermediate rounding.
11603
15.2k
  bool HasFMA =
11604
15.2k
      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11605
15.2k
      
(8.05k
!LegalOperations8.05k
||
TLI.isOperationLegalOrCustom(ISD::FMA, VT)3.24k
);
11606
15.2k
11607
15.2k
  // No valid opcode, do not combine.
11608
15.2k
  if (!HasFMAD && 
!HasFMA14.5k
)
11609
6.50k
    return SDValue();
11610
8.75k
11611
8.75k
  const SDNodeFlags Flags = N->getFlags();
11612
8.75k
  bool CanFuse = Options.UnsafeFPMath || 
isContractable(N)8.15k
;
11613
8.75k
  bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11614
8.75k
                              
CanFuse7.98k
||
HasFMAD7.41k
);
11615
8.75k
11616
8.75k
  // If the subtraction is not contractable, do not combine.
11617
8.75k
  if (!AllowFusionGlobally && 
!isContractable(N)6.79k
)
11618
6.79k
    return SDValue();
11619
1.96k
11620
1.96k
  const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
11621
1.96k
  if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11622
58
    return SDValue();
11623
1.90k
11624
1.90k
  // Always prefer FMAD to FMA for precision.
11625
1.90k
  unsigned PreferredFusedOpcode = HasFMAD ? 
ISD::FMAD726
:
ISD::FMA1.17k
;
11626
1.90k
  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11627
1.90k
11628
1.90k
  // Is the node an FMUL and contractable either due to global flags or
11629
1.90k
  // SDNodeFlags.
11630
3.70k
  auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11631
3.70k
    if (N.getOpcode() != ISD::FMUL)
11632
2.70k
      return false;
11633
1.00k
    return AllowFusionGlobally || 
isContractable(N.getNode())0
;
11634
1.00k
  };
11635
1.90k
11636
1.90k
  // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
11637
1.90k
  if (isContractableFMUL(N0) && 
(313
Aggressive313
||
N0->hasOneUse()130
)) {
11638
305
    return DAG.getNode(PreferredFusedOpcode, SL, VT,
11639
305
                       N0.getOperand(0), N0.getOperand(1),
11640
305
                       DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11641
305
  }
11642
1.59k
11643
1.59k
  // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
11644
1.59k
  // Note: Commutes FSUB operands.
11645
1.59k
  if (isContractableFMUL(N1) && 
(547
Aggressive547
||
N1->hasOneUse()325
)) {
11646
533
    return DAG.getNode(PreferredFusedOpcode, SL, VT,
11647
533
                       DAG.getNode(ISD::FNEG, SL, VT,
11648
533
                                   N1.getOperand(0)),
11649
533
                       N1.getOperand(1), N0, Flags);
11650
533
  }
11651
1.06k
11652
1.06k
  // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
11653
1.06k
  if (N0.getOpcode() == ISD::FNEG && 
isContractableFMUL(N0.getOperand(0))115
&&
11654
1.06k
      
(85
Aggressive85
||
(66
N0->hasOneUse()66
&&
N0.getOperand(0).hasOneUse()66
))) {
11655
85
    SDValue N00 = N0.getOperand(0).getOperand(0);
11656
85
    SDValue N01 = N0.getOperand(0).getOperand(1);
11657
85
    return DAG.getNode(PreferredFusedOpcode, SL, VT,
11658
85
                       DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
11659
85
                       DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11660
85
  }
11661
981
11662
981
  // Look through FP_EXTEND nodes to do more combining.
11663
981
11664
981
  // fold (fsub (fpext (fmul x, y)), z)
11665
981
  //   -> (fma (fpext x), (fpext y), (fneg z))
11666
981
  if (N0.getOpcode() == ISD::FP_EXTEND) {
11667
23
    SDValue N00 = N0.getOperand(0);
11668
23
    if (isContractableFMUL(N00) &&
11669
23
        
TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())8
) {
11670
5
      return DAG.getNode(PreferredFusedOpcode, SL, VT,
11671
5
                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
11672
5
                                     N00.getOperand(0)),
11673
5
                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
11674
5
                                     N00.getOperand(1)),
11675
5
                         DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11676
5
    }
11677
976
  }
11678
976
11679
976
  // fold (fsub x, (fpext (fmul y, z)))
11680
976
  //   -> (fma (fneg (fpext y)), (fpext z), x)
11681
976
  // Note: Commutes FSUB operands.
11682
976
  if (N1.getOpcode() == ISD::FP_EXTEND) {
11683
16
    SDValue N10 = N1.getOperand(0);
11684
16
    if (isContractableFMUL(N10) &&
11685
16
        
TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())6
) {
11686
3
      return DAG.getNode(PreferredFusedOpcode, SL, VT,
11687
3
                         DAG.getNode(ISD::FNEG, SL, VT,
11688
3
                                     DAG.getNode(ISD::FP_EXTEND, SL, VT,
11689
3
                                                 N10.getOperand(0))),
11690
3
                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
11691
3
                                     N10.getOperand(1)),
11692
3
                         N0, Flags);
11693
3
    }
11694
973
  }
11695
973
11696
973
  // fold (fsub (fpext (fneg (fmul, x, y))), z)
11697
973
  //   -> (fneg (fma (fpext x), (fpext y), z))
11698
973
  // Note: This could be removed with appropriate canonicalization of the
11699
973
  // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11700
973
  // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11701
973
  // from implementing the canonicalization in visitFSUB.
11702
973
  if (N0.getOpcode() == ISD::FP_EXTEND) {
11703
18
    SDValue N00 = N0.getOperand(0);
11704
18
    if (N00.getOpcode() == ISD::FNEG) {
11705
2
      SDValue N000 = N00.getOperand(0);
11706
2
      if (isContractableFMUL(N000) &&
11707
2
          TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11708
2
        return DAG.getNode(ISD::FNEG, SL, VT,
11709
2
                           DAG.getNode(PreferredFusedOpcode, SL, VT,
11710
2
                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
11711
2
                                                   N000.getOperand(0)),
11712
2
                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
11713
2
                                                   N000.getOperand(1)),
11714
2
                                       N1, Flags));
11715
2
      }
11716
971
    }
11717
18
  }
11718
971
11719
971
  // fold (fsub (fneg (fpext (fmul, x, y))), z)
11720
971
  //   -> (fneg (fma (fpext x)), (fpext y), z)
11721
971
  // Note: This could be removed with appropriate canonicalization of the
11722
971
  // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11723
971
  // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11724
971
  // from implementing the canonicalization in visitFSUB.
11725
971
  if (N0.getOpcode() == ISD::FNEG) {
11726
30
    SDValue N00 = N0.getOperand(0);
11727
30
    if (N00.getOpcode() == ISD::FP_EXTEND) {
11728
2
      SDValue N000 = N00.getOperand(0);
11729
2
      if (isContractableFMUL(N000) &&
11730
2
          TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
11731
2
        return DAG.getNode(ISD::FNEG, SL, VT,
11732
2
                           DAG.getNode(PreferredFusedOpcode, SL, VT,
11733
2
                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
11734
2
                                                   N000.getOperand(0)),
11735
2
                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
11736
2
                                                   N000.getOperand(1)),
11737
2
                                       N1, Flags));
11738
2
      }
11739
969
    }
11740
30
  }
11741
969
11742
969
  // More folding opportunities when target permits.
11743
969
  if (Aggressive) {
11744
578
    // fold (fsub (fma x, y, (fmul u, v)), z)
11745
578
    //   -> (fma x, y (fma u, v, (fneg z)))
11746
578
    if (CanFuse && 
N0.getOpcode() == PreferredFusedOpcode87
&&
11747
578
        
isContractableFMUL(N0.getOperand(2))17
&&
N0->hasOneUse()15
&&
11748
578
        
N0.getOperand(2)->hasOneUse()11
) {
11749
7
      return DAG.getNode(PreferredFusedOpcode, SL, VT,
11750
7
                         N0.getOperand(0), N0.getOperand(1),
11751
7
                         DAG.getNode(PreferredFusedOpcode, SL, VT,
11752
7
                                     N0.getOperand(2).getOperand(0),
11753
7
                                     N0.getOperand(2).getOperand(1),
11754
7
                                     DAG.getNode(ISD::FNEG, SL, VT,
11755
7
                                                 N1), Flags), Flags);
11756
7
    }
11757
571
11758
571
    // fold (fsub x, (fma y, z, (fmul u, v)))
11759
571
    //   -> (fma (fneg y), z, (fma (fneg u), v, x))
11760
571
    if (CanFuse && 
N1.getOpcode() == PreferredFusedOpcode80
&&
11761
571
        
isContractableFMUL(N1.getOperand(2))6
) {
11762
4
      SDValue N20 = N1.getOperand(2).getOperand(0);
11763
4
      SDValue N21 = N1.getOperand(2).getOperand(1);
11764
4
      return DAG.getNode(PreferredFusedOpcode, SL, VT,
11765
4
                         DAG.getNode(ISD::FNEG, SL, VT,
11766
4
                                     N1.getOperand(0)),
11767
4
                         N1.getOperand(1),
11768
4
                         DAG.getNode(PreferredFusedOpcode, SL, VT,
11769
4
                                     DAG.getNode(ISD::FNEG, SL, VT, N20),
11770
4
                                     N21, N0, Flags), Flags);
11771
4
    }
11772
567
11773
567
11774
567
    // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
11775
567
    //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
11776
567
    if (N0.getOpcode() == PreferredFusedOpcode) {
11777
30
      SDValue N02 = N0.getOperand(2);
11778
30
      if (N02.getOpcode() == ISD::FP_EXTEND) {
11779
6
        SDValue N020 = N02.getOperand(0);
11780
6
        if (isContractableFMUL(N020) &&
11781
6
            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
11782
5
          return DAG.getNode(PreferredFusedOpcode, SL, VT,
11783
5
                             N0.getOperand(0), N0.getOperand(1),
11784
5
                             DAG.getNode(PreferredFusedOpcode, SL, VT,
11785
5
                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
11786
5
                                                     N020.getOperand(0)),
11787
5
                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
11788
5
                                                     N020.getOperand(1)),
11789
5
                                         DAG.getNode(ISD::FNEG, SL, VT,
11790
5
                                                     N1), Flags), Flags);
11791
5
        }
11792
562
      }
11793
30
    }
11794
562
11795
562
    // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
11796
562
    //   -> (fma (fpext x), (fpext y),
11797
562
    //           (fma (fpext u), (fpext v), (fneg z)))
11798
562
    // FIXME: This turns two single-precision and one double-precision
11799
562
    // operation into two double-precision operations, which might not be
11800
562
    // interesting for all targets, especially GPUs.
11801
562
    if (N0.getOpcode() == ISD::FP_EXTEND) {
11802
16
      SDValue N00 = N0.getOperand(0);
11803
16
      if (N00.getOpcode() == PreferredFusedOpcode) {
11804
4
        SDValue N002 = N00.getOperand(2);
11805
4
        if (isContractableFMUL(N002) &&
11806
4
            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11807
4
          return DAG.getNode(PreferredFusedOpcode, SL, VT,
11808
4
                             DAG.getNode(ISD::FP_EXTEND, SL, VT,
11809
4
                                         N00.getOperand(0)),
11810
4
                             DAG.getNode(ISD::FP_EXTEND, SL, VT,
11811
4
                                         N00.getOperand(1)),
11812
4
                             DAG.getNode(PreferredFusedOpcode, SL, VT,
11813
4
                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
11814
4
                                                     N002.getOperand(0)),
11815
4
                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
11816
4
                                                     N002.getOperand(1)),
11817
4
                                         DAG.getNode(ISD::FNEG, SL, VT,
11818
4
                                                     N1), Flags), Flags);
11819
4
        }
11820
558
      }
11821
16
    }
11822
558
11823
558
    // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
11824
558
    //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
11825
558
    if (N1.getOpcode() == PreferredFusedOpcode &&
11826
558
        
N1.getOperand(2).getOpcode() == ISD::FP_EXTEND20
) {
11827
6
      SDValue N120 = N1.getOperand(2).getOperand(0);
11828
6
      if (isContractableFMUL(N120) &&
11829
6
          TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11830
5
        SDValue N1200 = N120.getOperand(0);
11831
5
        SDValue N1201 = N120.getOperand(1);
11832
5
        return DAG.getNode(PreferredFusedOpcode, SL, VT,
11833
5
                           DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
11834
5
                           N1.getOperand(1),
11835
5
                           DAG.getNode(PreferredFusedOpcode, SL, VT,
11836
5
                                       DAG.getNode(ISD::FNEG, SL, VT,
11837
5
                                                   DAG.getNode(ISD::FP_EXTEND, SL,
11838
5
                                                               VT, N1200)),
11839
5
                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
11840
5
                                                   N1201),
11841
5
                                       N0, Flags), Flags);
11842
5
      }
11843
553
    }
11844
553
11845
553
    // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
11846
553
    //   -> (fma (fneg (fpext y)), (fpext z),
11847
553
    //           (fma (fneg (fpext u)), (fpext v), x))
11848
553
    // FIXME: This turns two single-precision and one double-precision
11849
553
    // operation into two double-precision operations, which might not be
11850
553
    // interesting for all targets, especially GPUs.
11851
553
    if (N1.getOpcode() == ISD::FP_EXTEND &&
11852
553
        
N1.getOperand(0).getOpcode() == PreferredFusedOpcode13
) {
11853
4
      SDValue CvtSrc = N1.getOperand(0);
11854
4
      SDValue N100 = CvtSrc.getOperand(0);
11855
4
      SDValue N101 = CvtSrc.getOperand(1);
11856
4
      SDValue N102 = CvtSrc.getOperand(2);
11857
4
      if (isContractableFMUL(N102) &&
11858
4
          TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
11859
4
        SDValue N1020 = N102.getOperand(0);
11860
4
        SDValue N1021 = N102.getOperand(1);
11861
4
        return DAG.getNode(PreferredFusedOpcode, SL, VT,
11862
4
                           DAG.getNode(ISD::FNEG, SL, VT,
11863
4
                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
11864
4
                                                   N100)),
11865
4
                           DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
11866
4
                           DAG.getNode(PreferredFusedOpcode, SL, VT,
11867
4
                                       DAG.getNode(ISD::FNEG, SL, VT,
11868
4
                                                   DAG.getNode(ISD::FP_EXTEND, SL,
11869
4
                                                               VT, N1020)),
11870
4
                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
11871
4
                                                   N1021),
11872
4
                                       N0, Flags), Flags);
11873
4
      }
11874
940
    }
11875
553
  }
11876
940
11877
940
  return SDValue();
11878
940
}
11879
11880
/// Try to perform FMA combining on a given FMUL node based on the distributive
11881
/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
11882
/// subtraction instead of addition).
11883
61.1k
SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
11884
61.1k
  SDValue N0 = N->getOperand(0);
11885
61.1k
  SDValue N1 = N->getOperand(1);
11886
61.1k
  EVT VT = N->getValueType(0);
11887
61.1k
  SDLoc SL(N);
11888
61.1k
  const SDNodeFlags Flags = N->getFlags();
11889
61.1k
11890
61.1k
  assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
11891
61.1k
11892
61.1k
  const TargetOptions &Options = DAG.getTarget().Options;
11893
61.1k
11894
61.1k
  // The transforms below are incorrect when x == 0 and y == inf, because the
11895
61.1k
  // intermediate multiplication produces a nan.
11896
61.1k
  if (!Options.NoInfsFPMath)
11897
60.6k
    return SDValue();
11898
462
11899
462
  // Floating-point multiply-add without intermediate rounding.
11900
462
  bool HasFMA =
11901
462
      (Options.AllowFPOpFusion == FPOpFusion::Fast || 
Options.UnsafeFPMath229
) &&
11902
462
      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11903
462
      
(337
!LegalOperations337
||
TLI.isOperationLegalOrCustom(ISD::FMA, VT)76
);
11904
462
11905
462
  // Floating-point multiply-add with intermediate rounding. This can result
11906
462
  // in a less precise result due to the changed rounding order.
11907
462
  bool HasFMAD = Options.UnsafeFPMath &&
11908
462
                 
(310
LegalOperations310
&&
TLI.isOperationLegal(ISD::FMAD, VT)127
);
11909
462
11910
462
  // No valid opcode, do not combine.
11911
462
  if (!HasFMAD && !HasFMA)
11912
125
    return SDValue();
11913
337
11914
337
  // Always prefer FMAD to FMA for precision.
11915
337
  unsigned PreferredFusedOpcode = HasFMAD ? 
ISD::FMAD0
: ISD::FMA;
11916
337
  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11917
337
11918
337
  // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
11919
337
  // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
11920
656
  auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11921
656
    if (X.getOpcode() == ISD::FADD && 
(56
Aggressive56
||
X->hasOneUse()50
)) {
11922
56
      if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
11923
46
        if (C->isExactlyValue(+1.0))
11924
22
          return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11925
22
                             Y, Flags);
11926
24
        if (C->isExactlyValue(-1.0))
11927
22
          return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11928
22
                             DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11929
612
      }
11930
56
    }
11931
612
    return SDValue();
11932
612
  };
11933
337
11934
337
  if (SDValue FMA = FuseFADD(N0, N1, Flags))
11935
18
    return FMA;
11936
319
  if (SDValue FMA = FuseFADD(N1, N0, Flags))
11937
26
    return FMA;
11938
293
11939
293
  // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
11940
293
  // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
11941
293
  // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
11942
293
  // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
11943
550
  
auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) 293
{
11944
550
    if (X.getOpcode() == ISD::FSUB && 
(122
Aggressive122
||
X->hasOneUse()112
)) {
11945
122
      if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
11946
78
        if (C0->isExactlyValue(+1.0))
11947
56
          return DAG.getNode(PreferredFusedOpcode, SL, VT,
11948
56
                             DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11949
56
                             Y, Flags);
11950
22
        if (C0->isExactlyValue(-1.0))
11951
22
          return DAG.getNode(PreferredFusedOpcode, SL, VT,
11952
22
                             DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11953
22
                             DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11954
44
      }
11955
44
      if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
11956
44
        if (C1->isExactlyValue(+1.0))
11957
22
          return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11958
22
                             DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11959
22
        if (C1->isExactlyValue(-1.0))
11960
22
          return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11961
22
                             Y, Flags);
11962
428
      }
11963
44
    }
11964
428
    return SDValue();
11965
428
  };
11966
293
11967
293
  if (SDValue FMA = FuseFSUB(N0, N1, Flags))
11968
36
    return FMA;
11969
257
  if (SDValue FMA = FuseFSUB(N1, N0, Flags))
11970
86
    return FMA;
11971
171
11972
171
  return SDValue();
11973
171
}
11974
11975
83.3k
SDValue DAGCombiner::visitFADD(SDNode *N) {
11976
83.3k
  SDValue N0 = N->getOperand(0);
11977
83.3k
  SDValue N1 = N->getOperand(1);
11978
83.3k
  bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11979
83.3k
  bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11980
83.3k
  EVT VT = N->getValueType(0);
11981
83.3k
  SDLoc DL(N);
11982
83.3k
  const TargetOptions &Options = DAG.getTarget().Options;
11983
83.3k
  const SDNodeFlags Flags = N->getFlags();
11984
83.3k
11985
83.3k
  // fold vector ops
11986
83.3k
  if (VT.isVector())
11987
39.6k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
11988
37
      return FoldedVOp;
11989
83.3k
11990
83.3k
  // fold (fadd c1, c2) -> c1 + c2
11991
83.3k
  if (N0CFP && 
N1CFP98
)
11992
4
    return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
11993
83.3k
11994
83.3k
  // canonicalize constant to RHS
11995
83.3k
  if (N0CFP && 
!N1CFP94
)
11996
94
    return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
11997
83.2k
11998
83.2k
  // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
11999
83.2k
  ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
12000
83.2k
  if (N1C && 
N1C->isZero()25.7k
)
12001
1.13k
    if (N1C->isNegative() || Options.UnsafeFPMath || 
Flags.hasNoSignedZeros()1.12k
)
12002
154
      return N0;
12003
83.0k
12004
83.0k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
12005
2
    return NewSel;
12006
83.0k
12007
83.0k
  // fold (fadd A, (fneg B)) -> (fsub A, B)
12008
83.0k
  if ((!LegalOperations || 
TLI.isOperationLegalOrCustom(ISD::FSUB, VT)43.3k
) &&
12009
83.0k
      
isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 280.0k
)
12010
171
    return DAG.getNode(ISD::FSUB, DL, VT, N0,
12011
171
                       GetNegatedExpression(N1, DAG, LegalOperations,
12012
171
                                            ForCodeSize), Flags);
12013
82.8k
12014
82.8k
  // fold (fadd (fneg A), B) -> (fsub B, A)
12015
82.8k
  if ((!LegalOperations || 
TLI.isOperationLegalOrCustom(ISD::FSUB, VT)43.3k
) &&
12016
82.8k
      
isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 279.8k
)
12017
186
    return DAG.getNode(ISD::FSUB, DL, VT, N1,
12018
186
                       GetNegatedExpression(N0, DAG, LegalOperations,
12019
186
                                            ForCodeSize), Flags);
12020
82.7k
12021
165k
  
auto isFMulNegTwo = [](SDValue FMul) 82.7k
{
12022
165k
    if (!FMul.hasOneUse() || 
FMul.getOpcode() != ISD::FMUL94.3k
)
12023
149k
      return false;
12024
15.8k
    auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
12025
15.8k
    return C && 
C->isExactlyValue(-2.0)1.68k
;
12026
15.8k
  };
12027
82.7k
12028
82.7k
  // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
12029
82.7k
  if (isFMulNegTwo(N0)) {
12030
18
    SDValue B = N0.getOperand(0);
12031
18
    SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12032
18
    return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
12033
18
  }
12034
82.6k
  // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
12035
82.6k
  if (isFMulNegTwo(N1)) {
12036
25
    SDValue B = N1.getOperand(0);
12037
25
    SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12038
25
    return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
12039
25
  }
12040
82.6k
12041
82.6k
  // No FP constant should be created after legalization as Instruction
12042
82.6k
  // Selection pass has a hard time dealing with FP constants.
12043
82.6k
  bool AllowNewConst = (Level < AfterLegalizeDAG);
12044
82.6k
12045
82.6k
  // If nnan is enabled, fold lots of things.
12046
82.6k
  if ((Options.NoNaNsFPMath || 
Flags.hasNoNaNs()82.0k
) &&
AllowNewConst3.26k
) {
12047
2.13k
    // If allowed, fold (fadd (fneg x), x) -> 0.0
12048
2.13k
    if (N0.getOpcode() == ISD::FNEG && 
N0.getOperand(0) == N10
)
12049
0
      return DAG.getConstantFP(0.0, DL, VT);
12050
2.13k
12051
2.13k
    // If allowed, fold (fadd x, (fneg x)) -> 0.0
12052
2.13k
    if (N1.getOpcode() == ISD::FNEG && 
N1.getOperand(0) == N00
)
12053
0
      return DAG.getConstantFP(0.0, DL, VT);
12054
82.6k
  }
12055
82.6k
12056
82.6k
  // If 'unsafe math' or reassoc and nsz, fold lots of things.
12057
82.6k
  // TODO: break out portions of the transformations below for which Unsafe is
12058
82.6k
  //       considered and which do not require both nsz and reassoc
12059
82.6k
  if ((Options.UnsafeFPMath ||
12060
82.6k
       
(79.5k
Flags.hasAllowReassociation()79.5k
&&
Flags.hasNoSignedZeros()1.58k
)) &&
12061
82.6k
      
AllowNewConst4.66k
) {
12062
3.04k
    // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
12063
3.04k
    if (N1CFP && 
N0.getOpcode() == ISD::FADD165
&&
12064
3.04k
        
isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))7
) {
12065
7
      SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
12066
7
      return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
12067
7
    }
12068
3.03k
12069
3.03k
    // We can fold chains of FADD's of the same value into multiplications.
12070
3.03k
    // This transform is not safe in general because we are reducing the number
12071
3.03k
    // of rounding steps.
12072
3.03k
    if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && 
!N0CFP2.64k
&&
!N1CFP2.64k
) {
12073
2.48k
      if (N0.getOpcode() == ISD::FMUL) {
12074
441
        bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12075
441
        bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
12076
441
12077
441
        // (fadd (fmul x, c), x) -> (fmul x, c+1)
12078
441
        if (CFP01 && 
!CFP0075
&&
N0.getOperand(0) == N175
) {
12079
7
          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12080
7
                                       DAG.getConstantFP(1.0, DL, VT), Flags);
12081
7
          return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
12082
7
        }
12083
434
12084
434
        // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
12085
434
        if (CFP01 && 
!CFP0068
&&
N1.getOpcode() == ISD::FADD68
&&
12086
434
            
N1.getOperand(0) == N1.getOperand(1)4
&&
12087
434
            
N0.getOperand(0) == N1.getOperand(0)4
) {
12088
4
          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12089
4
                                       DAG.getConstantFP(2.0, DL, VT), Flags);
12090
4
          return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
12091
4
        }
12092
2.47k
      }
12093
2.47k
12094
2.47k
      if (N1.getOpcode() == ISD::FMUL) {
12095
577
        bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12096
577
        bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
12097
577
12098
577
        // (fadd x, (fmul x, c)) -> (fmul x, c+1)
12099
577
        if (CFP11 && 
!CFP1019
&&
N1.getOperand(0) == N019
) {
12100
4
          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12101
4
                                       DAG.getConstantFP(1.0, DL, VT), Flags);
12102
4
          return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
12103
4
        }
12104
573
12105
573
        // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
12106
573
        if (CFP11 && 
!CFP1015
&&
N0.getOpcode() == ISD::FADD15
&&
12107
573
            
N0.getOperand(0) == N0.getOperand(1)4
&&
12108
573
            
N1.getOperand(0) == N0.getOperand(0)4
) {
12109
4
          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12110
4
                                       DAG.getConstantFP(2.0, DL, VT), Flags);
12111
4
          return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
12112
4
        }
12113
2.46k
      }
12114
2.46k
12115
2.46k
      if (N0.getOpcode() == ISD::FADD) {
12116
521
        bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12117
521
        // (fadd (fadd x, x), x) -> (fmul x, 3.0)
12118
521
        if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
12119
521
            
(N0.getOperand(0) == N1)39
) {
12120
5
          return DAG.getNode(ISD::FMUL, DL, VT,
12121
5
                             N1, DAG.getConstantFP(3.0, DL, VT), Flags);
12122
5
        }
12123
2.46k
      }
12124
2.46k
12125
2.46k
      if (N1.getOpcode() == ISD::FADD) {
12126
429
        bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12127
429
        // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
12128
429
        if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
12129
429
            
N1.getOperand(0) == N07
) {
12130
4
          return DAG.getNode(ISD::FMUL, DL, VT,
12131
4
                             N0, DAG.getConstantFP(3.0, DL, VT), Flags);
12132
4
        }
12133
2.45k
      }
12134
2.45k
12135
2.45k
      // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
12136
2.45k
      if (N0.getOpcode() == ISD::FADD && 
N1.getOpcode() == ISD::FADD516
&&
12137
2.45k
          
N0.getOperand(0) == N0.getOperand(1)63
&&
12138
2.45k
          
N1.getOperand(0) == N1.getOperand(1)3
&&
12139
2.45k
          
N0.getOperand(0) == N1.getOperand(0)3
) {
12140
3
        return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
12141
3
                           DAG.getConstantFP(4.0, DL, VT), Flags);
12142
3
      }
12143
82.6k
    }
12144
3.03k
  } // enable-unsafe-fp-math
12145
82.6k
12146
82.6k
  // FADD -> FMA combines:
12147
82.6k
  if (SDValue Fused = visitFADDForFMACombine(N)) {
12148
2.43k
    AddToWorklist(Fused.getNode());
12149
2.43k
    return Fused;
12150
2.43k
  }
12151
80.1k
  return SDValue();
12152
80.1k
}
12153
12154
15.7k
SDValue DAGCombiner::visitFSUB(SDNode *N) {
12155
15.7k
  SDValue N0 = N->getOperand(0);
12156
15.7k
  SDValue N1 = N->getOperand(1);
12157
15.7k
  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12158
15.7k
  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12159
15.7k
  EVT VT = N->getValueType(0);
12160
15.7k
  SDLoc DL(N);
12161
15.7k
  const TargetOptions &Options = DAG.getTarget().Options;
12162
15.7k
  const SDNodeFlags Flags = N->getFlags();
12163
15.7k
12164
15.7k
  // fold vector ops
12165
15.7k
  if (VT.isVector())
12166
4.19k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
12167
14
      return FoldedVOp;
12168
15.7k
12169
15.7k
  // fold (fsub c1, c2) -> c1-c2
12170
15.7k
  if (N0CFP && 
N1CFP2.51k
)
12171
0
    return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
12172
15.7k
12173
15.7k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
12174
10
    return NewSel;
12175
15.6k
12176
15.6k
  // (fsub A, 0) -> A
12177
15.6k
  if (N1CFP && 
N1CFP->isZero()453
) {
12178
6
    if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
12179
6
        
Flags.hasNoSignedZeros()3
) {
12180
4
      return N0;
12181
4
    }
12182
15.6k
  }
12183
15.6k
12184
15.6k
  if (N0 == N1) {
12185
168
    // (fsub x, x) -> 0.0
12186
168
    if (Options.NoNaNsFPMath || 
Flags.hasNoNaNs()164
)
12187
8
      return DAG.getConstantFP(0.0f, DL, VT);
12188
15.6k
  }
12189
15.6k
12190
15.6k
  // (fsub -0.0, N1) -> -N1
12191
15.6k
  // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
12192
15.6k
  //       FSUB does not specify the sign bit of a NaN. Also note that for
12193
15.6k
  //       the same reason, the inverse transform is not safe, unless fast math
12194
15.6k
  //       flags are in play.
12195
15.6k
  if (N0CFP && 
N0CFP->isZero()2.50k
) {
12196
424
    if (N0CFP->isNegative() ||
12197
424
        
(416
Options.NoSignedZerosFPMath416
||
Flags.hasNoSignedZeros()396
)) {
12198
55
      if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
12199
3
        return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12200
52
      if (!LegalOperations || 
TLI.isOperationLegal(ISD::FNEG, VT)4
)
12201
48
        return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
12202
15.6k
    }
12203
424
  }
12204
15.6k
12205
15.6k
  if ((Options.UnsafeFPMath ||
12206
15.6k
      
(14.5k
Flags.hasAllowReassociation()14.5k
&&
Flags.hasNoSignedZeros()67
))
12207
15.6k
      && 
N1.getOpcode() == ISD::FADD1.18k
) {
12208
27
    // X - (X + Y) -> -Y
12209
27
    if (N0 == N1->getOperand(0))
12210
8
      return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
12211
19
    // X - (Y + X) -> -Y
12212
19
    if (N0 == N1->getOperand(1))
12213
10
      return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
12214
15.6k
  }
12215
15.6k
12216
15.6k
  // fold (fsub A, (fneg B)) -> (fadd A, B)
12217
15.6k
  if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
12218
347
    return DAG.getNode(ISD::FADD, DL, VT, N0,
12219
347
                       GetNegatedExpression(N1, DAG, LegalOperations,
12220
347
                                            ForCodeSize), Flags);
12221
15.2k
12222
15.2k
  // FSUB -> FMA combines:
12223
15.2k
  if (SDValue Fused = visitFSUBForFMACombine(N)) {
12224
964
    AddToWorklist(Fused.getNode());
12225
964
    return Fused;
12226
964
  }
12227
14.3k
12228
14.3k
  return SDValue();
12229
14.3k
}
12230
12231
61.9k
SDValue DAGCombiner::visitFMUL(SDNode *N) {
12232
61.9k
  SDValue N0 = N->getOperand(0);
12233
61.9k
  SDValue N1 = N->getOperand(1);
12234
61.9k
  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12235
61.9k
  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12236
61.9k
  EVT VT = N->getValueType(0);
12237
61.9k
  SDLoc DL(N);
12238
61.9k
  const TargetOptions &Options = DAG.getTarget().Options;
12239
61.9k
  const SDNodeFlags Flags = N->getFlags();
12240
61.9k
12241
61.9k
  // fold vector ops
12242
61.9k
  if (VT.isVector()) {
12243
27.2k
    // This just handles C1 * C2 for vectors. Other vector folds are below.
12244
27.2k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
12245
37
      return FoldedVOp;
12246
61.9k
  }
12247
61.9k
12248
61.9k
  // fold (fmul c1, c2) -> c1*c2
12249
61.9k
  if (N0CFP && 
N1CFP205
)
12250
0
    return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
12251
61.9k
12252
61.9k
  // canonicalize constant to RHS
12253
61.9k
  if (isConstantFPBuildVectorOrConstantFP(N0) &&
12254
61.9k
     
!isConstantFPBuildVectorOrConstantFP(N1)356
)
12255
356
    return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
12256
61.6k
12257
61.6k
  // fold (fmul A, 1.0) -> A
12258
61.6k
  if (N1CFP && 
N1CFP->isExactlyValue(1.0)7.84k
)
12259
7
    return N0;
12260
61.5k
12261
61.5k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
12262
3
    return NewSel;
12263
61.5k
12264
61.5k
  if ((Options.NoNaNsFPMath && 
Options.NoSignedZerosFPMath538
) ||
12265
61.5k
      
(61.4k
Flags.hasNoNaNs()61.4k
&&
Flags.hasNoSignedZeros()4.45k
)) {
12266
4.50k
    // fold (fmul A, 0) -> 0
12267
4.50k
    if (N1CFP && 
N1CFP->isZero()889
)
12268
6
      return N1;
12269
61.5k
  }
12270
61.5k
12271
61.5k
  if (Options.UnsafeFPMath || 
Flags.hasAllowReassociation()57.3k
) {
12272
6.36k
    // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
12273
6.36k
    if (isConstantFPBuildVectorOrConstantFP(N1) &&
12274
6.36k
        
N0.getOpcode() == ISD::FMUL1.42k
) {
12275
142
      SDValue N00 = N0.getOperand(0);
12276
142
      SDValue N01 = N0.getOperand(1);
12277
142
      // Avoid an infinite loop by making sure that N00 is not a constant
12278
142
      // (the inner multiply has not been constant folded yet).
12279
142
      if (isConstantFPBuildVectorOrConstantFP(N01) &&
12280
142
          
!isConstantFPBuildVectorOrConstantFP(N00)35
) {
12281
35
        SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
12282
35
        return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
12283
35
      }
12284
6.32k
    }
12285
6.32k
12286
6.32k
    // Match a special-case: we convert X * 2.0 into fadd.
12287
6.32k
    // fmul (fadd X, X), C -> fmul X, 2.0 * C
12288
6.32k
    if (N0.getOpcode() == ISD::FADD && 
N0.hasOneUse()458
&&
12289
6.32k
        
N0.getOperand(0) == N0.getOperand(1)206
) {
12290
33
      const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
12291
33
      SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
12292
33
      return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
12293
33
    }
12294
61.5k
  }
12295
61.5k
12296
61.5k
  // fold (fmul X, 2.0) -> (fadd X, X)
12297
61.5k
  if (N1CFP && 
N1CFP->isExactlyValue(+2.0)7.79k
)
12298
282
    return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
12299
61.2k
12300
61.2k
  // fold (fmul X, -1.0) -> (fneg X)
12301
61.2k
  if (N1CFP && 
N1CFP->isExactlyValue(-1.0)7.51k
)
12302
58
    if (!LegalOperations || 
TLI.isOperationLegal(ISD::FNEG, VT)12
)
12303
58
      return DAG.getNode(ISD::FNEG, DL, VT, N0);
12304
61.1k
12305
61.1k
  // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
12306
61.1k
  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
12307
1.24k
                                       ForCodeSize)) {
12308
1.24k
    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
12309
372
                                         ForCodeSize)) {
12310
372
      // Both can be negated for free, check to see if at least one is cheaper
12311
372
      // negated.
12312
372
      if (LHSNeg == 2 || 
RHSNeg == 2324
)
12313
48
        return DAG.getNode(ISD::FMUL, DL, VT,
12314
48
                           GetNegatedExpression(N0, DAG, LegalOperations,
12315
48
                                                ForCodeSize),
12316
48
                           GetNegatedExpression(N1, DAG, LegalOperations,
12317
48
                                                ForCodeSize),
12318
48
                           Flags);
12319
61.1k
    }
12320
1.24k
  }
12321
61.1k
12322
61.1k
  // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
12323
61.1k
  // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
12324
61.1k
  if (Flags.hasNoNaNs() && 
Flags.hasNoSignedZeros()4.49k
&&
12325
61.1k
      
(4.36k
N0.getOpcode() == ISD::SELECT4.36k
||
N1.getOpcode() == ISD::SELECT4.36k
) &&
12326
61.1k
      
TLI.isOperationLegal(ISD::FABS, VT)2
) {
12327
2
    SDValue Select = N0, X = N1;
12328
2
    if (Select.getOpcode() != ISD::SELECT)
12329
2
      std::swap(Select, X);
12330
2
12331
2
    SDValue Cond = Select.getOperand(0);
12332
2
    auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
12333
2
    auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
12334
2
12335
2
    if (TrueOpnd && FalseOpnd &&
12336
2
        Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
12337
2
        isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
12338
2
        cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
12339
2
      ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12340
2
      switch (CC) {
12341
2
      
default: break0
;
12342
2
      case ISD::SETOLT:
12343
1
      case ISD::SETULT:
12344
1
      case ISD::SETOLE:
12345
1
      case ISD::SETULE:
12346
1
      case ISD::SETLT:
12347
1
      case ISD::SETLE:
12348
1
        std::swap(TrueOpnd, FalseOpnd);
12349
1
        LLVM_FALLTHROUGH;
12350
2
      case ISD::SETOGT:
12351
2
      case ISD::SETUGT:
12352
2
      case ISD::SETOGE:
12353
2
      case ISD::SETUGE:
12354
2
      case ISD::SETGT:
12355
2
      case ISD::SETGE:
12356
2
        if (TrueOpnd->isExactlyValue(-1.0) && 
FalseOpnd->isExactlyValue(1.0)1
&&
12357
2
            
TLI.isOperationLegal(ISD::FNEG, VT)1
)
12358
1
          return DAG.getNode(ISD::FNEG, DL, VT,
12359
1
                   DAG.getNode(ISD::FABS, DL, VT, X));
12360
1
        if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
12361
1
          return DAG.getNode(ISD::FABS, DL, VT, X);
12362
0
12363
0
        break;
12364
2
      }
12365
2
    }
12366
2
  }
12367
61.1k
12368
61.1k
  // FMUL -> FMA combines:
12369
61.1k
  if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
12370
166
    AddToWorklist(Fused.getNode());
12371
166
    return Fused;
12372
166
  }
12373
60.9k
12374
60.9k
  return SDValue();
12375
60.9k
}
12376
12377
10.2k
SDValue DAGCombiner::visitFMA(SDNode *N) {
12378
10.2k
  SDValue N0 = N->getOperand(0);
12379
10.2k
  SDValue N1 = N->getOperand(1);
12380
10.2k
  SDValue N2 = N->getOperand(2);
12381
10.2k
  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12382
10.2k
  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12383
10.2k
  EVT VT = N->getValueType(0);
12384
10.2k
  SDLoc DL(N);
12385
10.2k
  const TargetOptions &Options = DAG.getTarget().Options;
12386
10.2k
12387
10.2k
  // FMA nodes have flags that propagate to the created nodes.
12388
10.2k
  const SDNodeFlags Flags = N->getFlags();
12389
10.2k
  bool UnsafeFPMath = Options.UnsafeFPMath || 
isContractable(N)7.86k
;
12390
10.2k
12391
10.2k
  // Constant fold FMA.
12392
10.2k
  if (isa<ConstantFPSDNode>(N0) &&
12393
10.2k
      
isa<ConstantFPSDNode>(N1)72
&&
12394
10.2k
      
isa<ConstantFPSDNode>(N2)12
) {
12395
0
    return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
12396
0
  }
12397
10.2k
12398
10.2k
  if (UnsafeFPMath) {
12399
2.60k
    if (N0CFP && 
N0CFP->isZero()2
)
12400
0
      return N2;
12401
2.60k
    if (N1CFP && 
N1CFP->isZero()57
)
12402
0
      return N2;
12403
10.2k
  }
12404
10.2k
  // TODO: The FMA node should have flags that propagate to these nodes.
12405
10.2k
  if (N0CFP && 
N0CFP->isExactlyValue(1.0)72
)
12406
0
    return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
12407
10.2k
  if (N1CFP && 
N1CFP->isExactlyValue(1.0)394
)
12408
5
    return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
12409
10.2k
12410
10.2k
  // Canonicalize (fma c, x, y) -> (fma x, c, y)
12411
10.2k
  if (isConstantFPBuildVectorOrConstantFP(N0) &&
12412
10.2k
     
!isConstantFPBuildVectorOrConstantFP(N1)87
)
12413
75
    return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
12414
10.1k
12415
10.1k
  if (UnsafeFPMath) {
12416
2.59k
    // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
12417
2.59k
    if (N2.getOpcode() == ISD::FMUL && 
N0 == N2.getOperand(0)113
&&
12418
2.59k
        
isConstantFPBuildVectorOrConstantFP(N1)17
&&
12419
2.59k
        
isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))17
) {
12420
17
      return DAG.getNode(ISD::FMUL, DL, VT, N0,
12421
17
                         DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
12422
17
                                     Flags), Flags);
12423
17
    }
12424
2.57k
12425
2.57k
    // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
12426
2.57k
    if (N0.getOpcode() == ISD::FMUL &&
12427
2.57k
        
isConstantFPBuildVectorOrConstantFP(N1)117
&&
12428
2.57k
        
isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))17
) {
12429
17
      return DAG.getNode(ISD::FMA, DL, VT,
12430
17
                         N0.getOperand(0),
12431
17
                         DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
12432
17
                                     Flags),
12433
17
                         N2);
12434
17
    }
12435
10.1k
  }
12436
10.1k
12437
10.1k
  // (fma x, 1, y) -> (fadd x, y)
12438
10.1k
  // (fma x, -1, y) -> (fadd (fneg x), y)
12439
10.1k
  if (N1CFP) {
12440
387
    if (N1CFP->isExactlyValue(1.0))
12441
0
      // TODO: The FMA node should have flags that propagate to this node.
12442
0
      return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
12443
387
12444
387
    if (N1CFP->isExactlyValue(-1.0) &&
12445
387
        
(1
!LegalOperations1
||
TLI.isOperationLegal(ISD::FNEG, VT)0
)) {
12446
1
      SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
12447
1
      AddToWorklist(RHSNeg.getNode());
12448
1
      // TODO: The FMA node should have flags that propagate to this node.
12449
1
      return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
12450
1
    }
12451
386
12452
386
    // fma (fneg x), K, y -> fma x -K, y
12453
386
    if (N0.getOpcode() == ISD::FNEG &&
12454
386
        
(22
TLI.isOperationLegal(ISD::ConstantFP, VT)22
||
12455
22
         
(0
N1.hasOneUse()0
&& !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
12456
22
                                              ForCodeSize)))) {
12457
22
      return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
12458
22
                         DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
12459
22
    }
12460
10.1k
  }
12461
10.1k
12462
10.1k
  if (UnsafeFPMath) {
12463
2.55k
    // (fma x, c, x) -> (fmul x, (c+1))
12464
2.55k
    if (N1CFP && 
N0 == N251
) {
12465
1
      return DAG.getNode(ISD::FMUL, DL, VT, N0,
12466
1
                         DAG.getNode(ISD::FADD, DL, VT, N1,
12467
1
                                     DAG.getConstantFP(1.0, DL, VT), Flags),
12468
1
                         Flags);
12469
1
    }
12470
2.55k
12471
2.55k
    // (fma x, c, (fneg x)) -> (fmul x, (c-1))
12472
2.55k
    if (N1CFP && 
N2.getOpcode() == ISD::FNEG50
&&
N2.getOperand(0) == N01
) {
12473
1
      return DAG.getNode(ISD::FMUL, DL, VT, N0,
12474
1
                         DAG.getNode(ISD::FADD, DL, VT, N1,
12475
1
                                     DAG.getConstantFP(-1.0, DL, VT), Flags),
12476
1
                         Flags);
12477
1
    }
12478
10.1k
  }
12479
10.1k
12480
10.1k
  return SDValue();
12481
10.1k
}
12482
12483
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
12484
// reciprocal.
12485
// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
12486
// Notice that this is not always beneficial. One reason is different targets
12487
// may have different costs for FDIV and FMUL, so sometimes the cost of two
12488
// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
12489
// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
12490
64.7k
SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
12491
64.7k
  // TODO: Limit this transform based on optsize/minsize - it always creates at
12492
64.7k
  //       least 1 extra instruction. But the perf win may be substantial enough
12493
64.7k
  //       that only minsize should restrict this.
12494
64.7k
  bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
12495
64.7k
  const SDNodeFlags Flags = N->getFlags();
12496
64.7k
  if (!UnsafeMath && 
!Flags.hasAllowReciprocal()63.8k
)
12497
63.3k
    return SDValue();
12498
1.43k
12499
1.43k
  // Skip if current node is a reciprocal/fneg-reciprocal.
12500
1.43k
  SDValue N0 = N->getOperand(0);
12501
1.43k
  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
12502
1.43k
  if (N0CFP && 
(748
N0CFP->isExactlyValue(1.0)748
||
N0CFP->isExactlyValue(-1.0)47
))
12503
712
    return SDValue();
12504
719
12505
719
  // Exit early if the target does not want this transform or if there can't
12506
719
  // possibly be enough uses of the divisor to make the transform worthwhile.
12507
719
  SDValue N1 = N->getOperand(1);
12508
719
  unsigned MinUses = TLI.combineRepeatedFPDivisors();
12509
719
12510
719
  // For splat vectors, scale the number of uses by the splat factor. If we can
12511
719
  // convert the division into a scalar op, that will likely be much faster.
12512
719
  unsigned NumElts = 1;
12513
719
  EVT VT = N->getValueType(0);
12514
719
  if (VT.isVector() && 
DAG.isSplatValue(N1)297
)
12515
14
    NumElts = VT.getVectorNumElements();
12516
719
12517
719
  if (!MinUses || 
(N1->use_size() * NumElts) < MinUses557
)
12518
646
    return SDValue();
12519
73
12520
73
  // Find all FDIV users of the same divisor.
12521
73
  // Use a set because duplicates may be present in the user list.
12522
73
  SetVector<SDNode *> Users;
12523
137
  for (auto *U : N1->uses()) {
12524
137
    if (U->getOpcode() == ISD::FDIV && 
U->getOperand(1) == N1135
) {
12525
135
      // This division is eligible for optimization only if global unsafe math
12526
135
      // is enabled or if this division allows reciprocal formation.
12527
135
      if (UnsafeMath || 
U->getFlags().hasAllowReciprocal()36
)
12528
131
        Users.insert(U);
12529
135
    }
12530
137
  }
12531
73
12532
73
  // Now that we have the actual number of divisor uses, make sure it meets
12533
73
  // the minimum threshold specified by the target.
12534
73
  if ((Users.size() * NumElts) < MinUses)
12535
7
    return SDValue();
12536
66
12537
66
  SDLoc DL(N);
12538
66
  SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
12539
66
  SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
12540
66
12541
66
  // Dividend / Divisor -> Dividend * Reciprocal
12542
123
  for (auto *U : Users) {
12543
123
    SDValue Dividend = U->getOperand(0);
12544
123
    if (Dividend != FPOne) {
12545
122
      SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
12546
122
                                    Reciprocal, Flags);
12547
122
      CombineTo(U, NewNode);
12548
122
    } else 
if (1
U != Reciprocal.getNode()1
) {
12549
0
      // In the absence of fast-math-flags, this user node is always the
12550
0
      // same node as Reciprocal, but with FMF they may be different nodes.
12551
0
      CombineTo(U, Reciprocal);
12552
0
    }
12553
123
  }
12554
66
  return SDValue(N, 0);  // N was replaced.
12555
66
}
12556
12557
64.8k
SDValue DAGCombiner::visitFDIV(SDNode *N) {
12558
64.8k
  SDValue N0 = N->getOperand(0);
12559
64.8k
  SDValue N1 = N->getOperand(1);
12560
64.8k
  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12561
64.8k
  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12562
64.8k
  EVT VT = N->getValueType(0);
12563
64.8k
  SDLoc DL(N);
12564
64.8k
  const TargetOptions &Options = DAG.getTarget().Options;
12565
64.8k
  SDNodeFlags Flags = N->getFlags();
12566
64.8k
12567
64.8k
  // fold vector ops
12568
64.8k
  if (VT.isVector())
12569
51.2k
    if (SDValue FoldedVOp = SimplifyVBinOp(N))
12570
37
      return FoldedVOp;
12571
64.8k
12572
64.8k
  // fold (fdiv c1, c2) -> c1/c2
12573
64.8k
  if (N0CFP && 
N1CFP4.49k
)
12574
0
    return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
12575
64.8k
12576
64.8k
  if (SDValue NewSel = foldBinOpIntoSelect(N))
12577
9
    return NewSel;
12578
64.7k
12579
64.7k
  if (SDValue V = combineRepeatedFPDivisors(N))
12580
66
    return V;
12581
64.7k
12582
64.7k
  if (Options.UnsafeFPMath || 
Flags.hasAllowReciprocal()63.8k
) {
12583
1.36k
    // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
12584
1.36k
    if (N1CFP) {
12585
56
      // Compute the reciprocal 1.0 / c2.
12586
56
      const APFloat &N1APF = N1CFP->getValueAPF();
12587
56
      APFloat Recip(N1APF.getSemantics(), 1); // 1.0
12588
56
      APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
12589
56
      // Only do the transform if the reciprocal is a legal fp immediate that
12590
56
      // isn't too nasty (eg NaN, denormal, ...).
12591
56
      if ((st == APFloat::opOK || 
st == APFloat::opInexact38
) && // Not too nasty
12592
56
          
(45
!LegalOperations45
||
12593
45
           // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
12594
45
           // backend)... we should handle this gracefully after Legalize.
12595
45
           // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
12596
45
           
TLI.isOperationLegal(ISD::ConstantFP, VT)0
||
12597
45
           
TLI.isFPImmLegal(Recip, VT, ForCodeSize)0
))
12598
45
        return DAG.getNode(ISD::FMUL, DL, VT, N0,
12599
45
                           DAG.getConstantFP(Recip, DL, VT), Flags);
12600
1.32k
    }
12601
1.32k
12602
1.32k
    // If this FDIV is part of a reciprocal square root, it may be folded
12603
1.32k
    // into a target-specific square root estimate instruction.
12604
1.32k
    if (N1.getOpcode() == ISD::FSQRT) {
12605
124
      if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
12606
55
        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12607
55
      }
12608
1.19k
    } else if (N1.getOpcode() == ISD::FP_EXTEND &&
12609
1.19k
               
N1.getOperand(0).getOpcode() == ISD::FSQRT2
) {
12610
2
      if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12611
2
                                          Flags)) {
12612
2
        RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
12613
2
        AddToWorklist(RV.getNode());
12614
2
        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12615
2
      }
12616
1.19k
    } else if (N1.getOpcode() == ISD::FP_ROUND &&
12617
1.19k
               
N1.getOperand(0).getOpcode() == ISD::FSQRT2
) {
12618
2
      if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12619
2
                                          Flags)) {
12620
2
        RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
12621
2
        AddToWorklist(RV.getNode());
12622
2
        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12623
2
      }
12624
1.19k
    } else if (N1.getOpcode() == ISD::FMUL) {
12625
2
      // Look through an FMUL. Even though this won't remove the FDIV directly,
12626
2
      // it's still worthwhile to get rid of the FSQRT if possible.
12627
2
      SDValue SqrtOp;
12628
2
      SDValue OtherOp;
12629
2
      if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12630
2
        SqrtOp = N1.getOperand(0);
12631
2
        OtherOp = N1.getOperand(1);
12632
2
      } else 
if (0
N1.getOperand(1).getOpcode() == ISD::FSQRT0
) {
12633
0
        SqrtOp = N1.getOperand(1);
12634
0
        OtherOp = N1.getOperand(0);
12635
0
      }
12636
2
      if (SqrtOp.getNode()) {
12637
2
        // We found a FSQRT, so try to make this fold:
12638
2
        // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
12639
2
        if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
12640
2
          RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
12641
2
          AddToWorklist(RV.getNode());
12642
2
          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12643
2
        }
12644
1.25k
      }
12645
2
    }
12646
1.25k
12647
1.25k
    // Fold into a reciprocal estimate and multiply instead of a real divide.
12648
1.25k
    if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
12649
660
      AddToWorklist(RV.getNode());
12650
660
      return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12651
660
    }
12652
63.9k
  }
12653
63.9k
12654
63.9k
  // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
12655
63.9k
  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
12656
4.08k
                                       ForCodeSize)) {
12657
4.08k
    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
12658
32
                                         ForCodeSize)) {
12659
32
      // Both can be negated for free, check to see if at least one is cheaper
12660
32
      // negated.
12661
32
      if (LHSNeg == 2 || 
RHSNeg == 230
)
12662
18
        return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
12663
18
                           GetNegatedExpression(N0, DAG, LegalOperations,
12664
18
                                                ForCodeSize),
12665
18
                           GetNegatedExpression(N1, DAG, LegalOperations,
12666
18
                                                ForCodeSize),
12667
18
                           Flags);
12668
63.9k
    }
12669
4.08k
  }
12670
63.9k
12671
63.9k
  return SDValue();
12672
63.9k
}
12673
12674
335
SDValue DAGCombiner::visitFREM(SDNode *N) {
12675
335
  SDValue N0 = N->getOperand(0);
12676
335
  SDValue N1 = N->getOperand(1);
12677
335
  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12678
335
  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12679
335
  EVT VT = N->getValueType(0);
12680
335
12681
335
  // fold (frem c1, c2) -> fmod(c1,c2)
12682
335
  if (N0CFP && 
N1CFP19
)
12683
3
    return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
12684
332
12685
332
  if (SDValue NewSel = foldBinOpIntoSelect(N))
12686
7
    return NewSel;
12687
325
12688
325
  return SDValue();
12689
325
}
12690
12691
2.37k
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
12692
2.37k
  SDNodeFlags Flags = N->getFlags();
12693
2.37k
  if (!DAG.getTarget().Options.UnsafeFPMath &&
12694
2.37k
      
!Flags.hasApproximateFuncs()2.15k
)
12695
2.03k
    return SDValue();
12696
343
12697
343
  SDValue N0 = N->getOperand(0);
12698
343
  if (TLI.isFsqrtCheap(N0, DAG))
12699
47
    return SDValue();
12700
296
12701
296
  // FSQRT nodes have flags that propagate to the created nodes.
12702
296
  return buildSqrtEstimate(N0, Flags);
12703
296
}
12704
12705
/// copysign(x, fp_extend(y)) -> copysign(x, y)
12706
/// copysign(x, fp_round(y)) -> copysign(x, y)
12707
3.26k
static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
12708
3.26k
  SDValue N1 = N->getOperand(1);
12709
3.26k
  if ((N1.getOpcode() == ISD::FP_EXTEND ||
12710
3.26k
       
N1.getOpcode() == ISD::FP_ROUND3.22k
)) {
12711
95
    // Do not optimize out type conversion of f128 type yet.
12712
95
    // For some targets like x86_64, configuration is changed to keep one f128
12713
95
    // value in one SSE register, but instruction selection cannot handle
12714
95
    // FCOPYSIGN on SSE registers yet.
12715
95
    EVT N1VT = N1->getValueType(0);
12716
95
    EVT N1Op0VT = N1->getOperand(0).getValueType();
12717
95
    return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
12718
95
  }
12719
3.16k
  return false;
12720
3.16k
}
12721
12722
3.29k
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
12723
3.29k
  SDValue N0 = N->getOperand(0);
12724
3.29k
  SDValue N1 = N->getOperand(1);
12725
3.29k
  bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
12726
3.29k
  bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
12727
3.29k
  EVT VT = N->getValueType(0);
12728
3.29k
12729
3.29k
  if (N0CFP && 
N1CFP1.00k
) // Constant fold
12730
0
    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
12731
3.29k
12732
3.29k
  if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
12733
24
    const APFloat &V = N1C->getValueAPF();
12734
24
    // copysign(x, c1) -> fabs(x)       iff ispos(c1)
12735
24
    // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
12736
24
    if (!V.isNegative()) {
12737
18
      if (!LegalOperations || 
TLI.isOperationLegal(ISD::FABS, VT)0
)
12738
18
        return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12739
6
    } else {
12740
6
      if (!LegalOperations || 
TLI.isOperationLegal(ISD::FNEG, VT)0
)
12741
6
        return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
12742
6
                           DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
12743
3.27k
    }
12744
24
  }
12745
3.27k
12746
3.27k
  // copysign(fabs(x), y) -> copysign(x, y)
12747
3.27k
  // copysign(fneg(x), y) -> copysign(x, y)
12748
3.27k
  // copysign(copysign(x,z), y) -> copysign(x, y)
12749
3.27k
  if (N0.getOpcode() == ISD::FABS || 
N0.getOpcode() == ISD::FNEG3.27k
||
12750
3.27k
      
N0.getOpcode() == ISD::FCOPYSIGN3.26k
)
12751
6
    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
12752
3.26k
12753
3.26k
  // copysign(x, abs(y)) -> abs(x)
12754
3.26k
  if (N1.getOpcode() == ISD::FABS)
12755
2
    return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12756
3.26k
12757
3.26k
  // copysign(x, copysign(y,z)) -> copysign(x, z)
12758
3.26k
  if (N1.getOpcode() == ISD::FCOPYSIGN)
12759
2
    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
12760
3.26k
12761
3.26k
  // copysign(x, fp_extend(y)) -> copysign(x, y)
12762
3.26k
  // copysign(x, fp_round(y)) -> copysign(x, y)
12763
3.26k
  if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
12764
83
    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
12765
3.17k
12766
3.17k
  return SDValue();
12767
3.17k
}
12768
12769
354
SDValue DAGCombiner::visitFPOW(SDNode *N) {
12770
354
  ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
12771
354
  if (!ExponentC)
12772
263
    return SDValue();
12773
91
12774
91
  // Try to convert x ** (1/3) into cube root.
12775
91
  // TODO: Handle the various flavors of long double.
12776
91
  // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
12777
91
  //       Some range near 1/3 should be fine.
12778
91
  EVT VT = N->getValueType(0);
12779
91
  if ((VT == MVT::f32 && 
ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)35
) ||
12780
91
      
(90
VT == MVT::f6490
&&
ExponentC->getValueAPF().isExactlyValue(1.0/3.0)33
)) {
12781
5
    // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
12782
5
    // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
12783
5
    // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
12784
5
    // For regular numbers, rounding may cause the results to differ.
12785
5
    // Therefore, we require { nsz ninf nnan afn } for this transform.
12786
5
    // TODO: We could select out the special cases if we don't have nsz/ninf.
12787
5
    SDNodeFlags Flags = N->getFlags();
12788
5
    if (!Flags.hasNoSignedZeros() || 
!Flags.hasNoInfs()3
||
!Flags.hasNoNaNs()3
||
12789
5
        
!Flags.hasApproximateFuncs()2
)
12790
3
      return SDValue();
12791
2
12792
2
    // Do not create a cbrt() libcall if the target does not have it, and do not
12793
2
    // turn a pow that has lowering support into a cbrt() libcall.
12794
2
    if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
12795
2
        (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
12796
2
         
DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)0
))
12797
0
      return SDValue();
12798
2
12799
2
    return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
12800
2
  }
12801
86
12802
86
  // Try to convert x ** (1/4) and x ** (3/4) into square roots.
12803
86
  // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
12804
86
  // TODO: This could be extended (using a target hook) to handle smaller
12805
86
  // power-of-2 fractional exponents.
12806
86
  bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
12807
86
  bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
12808
86
  if (ExponentIs025 || 
ExponentIs07524
) {
12809
66
    // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
12810
66
    // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
12811
66
    // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
12812
66
    // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
12813
66
    // For regular numbers, rounding may cause the results to differ.
12814
66
    // Therefore, we require { nsz ninf afn } for this transform.
12815
66
    // TODO: We could select out the special cases if we don't have nsz/ninf.
12816
66
    SDNodeFlags Flags = N->getFlags();
12817
66
12818
66
    // We only need no signed zeros for the 0.25 case.
12819
66
    if ((!Flags.hasNoSignedZeros() && 
ExponentIs02514
) ||
!Flags.hasNoInfs()56
||
12820
66
        
!Flags.hasApproximateFuncs()26
)
12821
44
      return SDValue();
12822
22
12823
22
    // Don't double the number of libcalls. We are trying to inline fast code.
12824
22
    if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
12825
6
      return SDValue();
12826
16
12827
16
    // Assume that libcalls are the smallest code.
12828
16
    // TODO: This restriction should probably be lifted for vectors.
12829
16
    if (DAG.getMachineFunction().getFunction().hasOptSize())
12830
0
      return SDValue();
12831
16
12832
16
    // pow(X, 0.25) --> sqrt(sqrt(X))
12833
16
    SDLoc DL(N);
12834
16
    SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
12835
16
    SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
12836
16
    if (ExponentIs025)
12837
16
      return SqrtSqrt;
12838
0
    // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
12839
0
    return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
12840
0
  }
12841
20
12842
20
  return SDValue();
12843
20
}
12844
12845
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
12846
134k
                               const TargetLowering &TLI) {
12847
134k
  // This optimization is guarded by a function attribute because it may produce
12848
134k
  // unexpected results. Ie, programs may be relying on the platform-specific
12849
134k
  // undefined behavior when the float-to-int conversion overflows.
12850
134k
  const Function &F = DAG.getMachineFunction().getFunction();
12851
134k
  Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
12852
134k
  if (StrictOverflow.getValueAsString().equals("false"))
12853
15
    return SDValue();
12854
134k
12855
134k
  // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
12856
134k
  // replacing casts with a libcall. We also must be allowed to ignore -0.0
12857
134k
  // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
12858
134k
  // conversions would return +0.0.
12859
134k
  // FIXME: We should be able to use node-level FMF here.
12860
134k
  // TODO: If strict math, should we use FABS (+ range check for signed cast)?
12861
134k
  EVT VT = N->getValueType(0);
12862
134k
  if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
12863
134k
      
!DAG.getTarget().Options.NoSignedZerosFPMath123k
)
12864
134k
    return SDValue();
12865
131
12866
131
  // fptosi/fptoui round towards zero, so converting from FP to integer and
12867
131
  // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
12868
131
  SDValue N0 = N->getOperand(0);
12869
131
  if (N->getOpcode() == ISD::SINT_TO_FP && 
N0.getOpcode() == ISD::FP_TO_SINT104
&&
12870
131
      
N0.getOperand(0).getValueType() == VT20
)
12871
20
    return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12872
111
12873
111
  if (N->getOpcode() == ISD::UINT_TO_FP && 
N0.getOpcode() == ISD::FP_TO_UINT27
&&
12874
111
      
N0.getOperand(0).getValueType() == VT19
)
12875
19
    return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12876
92
12877
92
  return SDValue();
12878
92
}
12879
12880
99.7k
SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
12881
99.7k
  SDValue N0 = N->getOperand(0);
12882
99.7k
  EVT VT = N->getValueType(0);
12883
99.7k
  EVT OpVT = N0.getValueType();
12884
99.7k
12885
99.7k
  // [us]itofp(undef) = 0, because the result value is bounded.
12886
99.7k
  if (N0.isUndef())
12887
0
    return DAG.getConstantFP(0.0, SDLoc(N), VT);
12888
99.7k
12889
99.7k
  // fold (sint_to_fp c1) -> c1fp
12890
99.7k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12891
99.7k
      // ...but only if the target supports immediate floating-point values
12892
99.7k
      
(7
!LegalOperations7
||
12893
7
       
TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)1
))
12894
6
    return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12895
99.7k
12896
99.7k
  // If the input is a legal type, and SINT_TO_FP is not legal on this target,
12897
99.7k
  // but UINT_TO_FP is legal on this target, try to convert.
12898
99.7k
  if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
12899
99.7k
      
hasOperation(ISD::UINT_TO_FP, OpVT)52.6k
) {
12900
0
    // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
12901
0
    if (DAG.SignBitIsZero(N0))
12902
0
      return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12903
99.7k
  }
12904
99.7k
12905
99.7k
  // The next optimizations are desirable only if SELECT_CC can be lowered.
12906
99.7k
  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || 
!LegalOperations88.6k
) {
12907
51.7k
    // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12908
51.7k
    if (N0.getOpcode() == ISD::SETCC && 
N0.getValueType() == MVT::i1213
&&
12909
51.7k
        
!VT.isVector()4
&&
12910
51.7k
        
(4
!LegalOperations4
||
12911
4
         
TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)0
)) {
12912
4
      SDLoc DL(N);
12913
4
      SDValue Ops[] =
12914
4
        { N0.getOperand(0), N0.getOperand(1),
12915
4
          DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12916
4
          N0.getOperand(2) };
12917
4
      return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12918
4
    }
12919
51.7k
12920
51.7k
    // fold (sint_to_fp (zext (setcc x, y, cc))) ->
12921
51.7k
    //      (select_cc x, y, 1.0, 0.0,, cc)
12922
51.7k
    if (N0.getOpcode() == ISD::ZERO_EXTEND &&
12923
51.7k
        
N0.getOperand(0).getOpcode() == ISD::SETCC1.14k
&&
!VT.isVector()150
&&
12924
51.7k
        
(21
!LegalOperations21
||
12925
21
         
TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)0
)) {
12926
21
      SDLoc DL(N);
12927
21
      SDValue Ops[] =
12928
21
        { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
12929
21
          DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12930
21
          N0.getOperand(0).getOperand(2) };
12931
21
      return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12932
21
    }
12933
99.7k
  }
12934
99.7k
12935
99.7k
  if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12936
20
    return FTrunc;
12937
99.6k
12938
99.6k
  return SDValue();
12939
99.6k
}
12940
12941
35.0k
SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
12942
35.0k
  SDValue N0 = N->getOperand(0);
12943
35.0k
  EVT VT = N->getValueType(0);
12944
35.0k
  EVT OpVT = N0.getValueType();
12945
35.0k
12946
35.0k
  // [us]itofp(undef) = 0, because the result value is bounded.
12947
35.0k
  if (N0.isUndef())
12948
0
    return DAG.getConstantFP(0.0, SDLoc(N), VT);
12949
35.0k
12950
35.0k
  // fold (uint_to_fp c1) -> c1fp
12951
35.0k
  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12952
35.0k
      // ...but only if the target supports immediate floating-point values
12953
35.0k
      
(1
!LegalOperations1
||
12954
1
       
TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)0
))
12955
1
    return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12956
35.0k
12957
35.0k
  // If the input is a legal type, and UINT_TO_FP is not legal on this target,
12958
35.0k
  // but SINT_TO_FP is legal on this target, try to convert.
12959
35.0k
  if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
12960
35.0k
      
hasOperation(ISD::SINT_TO_FP, OpVT)21.9k
) {
12961
259
    // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
12962
259
    if (DAG.SignBitIsZero(N0))
12963
20
      return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12964
35.0k
  }
12965
35.0k
12966
35.0k
  // The next optimizations are desirable only if SELECT_CC can be lowered.
12967
35.0k
  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || 
!LegalOperations31.6k
) {
12968
17.0k
    // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12969
17.0k
    if (N0.getOpcode() == ISD::SETCC && 
!VT.isVector()467
&&
12970
17.0k
        
(343
!LegalOperations343
||
12971
343
         
TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)0
)) {
12972
343
      SDLoc DL(N);
12973
343
      SDValue Ops[] =
12974
343
        { N0.getOperand(0), N0.getOperand(1),
12975
343
          DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12976
343
          N0.getOperand(2) };
12977
343
      return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12978
343
    }
12979
34.6k
  }
12980
34.6k
12981
34.6k
  if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12982
19
    return FTrunc;
12983
34.6k
12984
34.6k
  return SDValue();
12985
34.6k
}
12986
12987
// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
12988
17.5k
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
12989
17.5k
  SDValue N0 = N->getOperand(0);
12990
17.5k
  EVT VT = N->getValueType(0);
12991
17.5k
12992
17.5k
  if (N0.getOpcode() != ISD::UINT_TO_FP && 
N0.getOpcode() != ISD::SINT_TO_FP17.5k
)
12993
17.5k
    return SDValue();
12994
35
12995
35
  SDValue Src = N0.getOperand(0);
12996
35
  EVT SrcVT = Src.getValueType();
12997
35
  bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
12998
35
  bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
12999
35
13000
35
  // We can safely assume the conversion won't overflow the output range,
13001
35
  // because (for example) (uint8_t)18293.f is undefined behavior.
13002
35
13003
35
  // Since we can assume the conversion won't overflow, our decision as to
13004
35
  // whether the input will fit in the float should depend on the minimum
13005
35
  // of the input range and output range.
13006
35
13007
35
  // This means this is also safe for a signed input and unsigned output, since
13008
35
  // a negative input would lead to undefined behavior.
13009
35
  unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
13010
35
  unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
13011
35
  unsigned ActualSize = std::min(InputSize, OutputSize);
13012
35
  const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
13013
35
13014
35
  // We can only fold away the float conversion if the input range can be
13015
35
  // represented exactly in the float range.
13016
35
  if (APFloat::semanticsPrecision(sem) >= ActualSize) {
13017
5
    if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
13018
3
      unsigned ExtOp = IsInputSigned && 
IsOutputSigned1
?
ISD::SIGN_EXTEND1
13019
3
                                                       : 
ISD::ZERO_EXTEND2
;
13020
3
      return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
13021
3
    }
13022
2
    if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
13023
1
      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
13024
1
    return DAG.getBitcast(VT, Src);
13025
1
  }
13026
30
  return SDValue();
13027
30
}
13028
13029
11.7k
SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
13030
11.7k
  SDValue N0 = N->getOperand(0);
13031
11.7k
  EVT VT = N->getValueType(0);
13032
11.7k
13033
11.7k
  // fold (fp_to_sint undef) -> undef
13034
11.7k
  if (N0.isUndef())
13035
0
    return DAG.getUNDEF(VT);
13036
11.7k
13037
11.7k
  // fold (fp_to_sint c1fp) -> c1
13038
11.7k
  if (isConstantFPBuildVectorOrConstantFP(N0))
13039
6
    return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
13040
11.7k
13041
11.7k
  return FoldIntToFPToInt(N, DAG);
13042
11.7k
}
13043
13044
5.76k
SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
13045
5.76k
  SDValue N0 = N->getOperand(0);
13046
5.76k
  EVT VT = N->getValueType(0);
13047
5.76k
13048
5.76k
  // fold (fp_to_uint undef) -> undef
13049
5.76k
  if (N0.isUndef())
13050
0
    return DAG.getUNDEF(VT);
13051
5.76k
13052
5.76k
  // fold (fp_to_uint c1fp) -> c1
13053
5.76k
  if (isConstantFPBuildVectorOrConstantFP(N0))
13054
9
    return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
13055
5.75k
13056
5.75k
  return FoldIntToFPToInt(N, DAG);
13057
5.75k
}
13058
13059
7.44k
SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
13060
7.44k
  SDValue N0 = N->getOperand(0);
13061
7.44k
  SDValue N1 = N->getOperand(1);
13062
7.44k
  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13063
7.44k
  EVT VT = N->getValueType(0);
13064
7.44k
13065
7.44k
  // fold (fp_round c1fp) -> c1fp
13066
7.44k
  if (N0CFP)
13067
3
    return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
13068
7.44k
13069
7.44k
  // fold (fp_round (fp_extend x)) -> x
13070
7.44k
  if (N0.getOpcode() == ISD::FP_EXTEND && 
VT == N0.getOperand(0).getValueType()18
)
13071
18
    return N0.getOperand(0);
13072
7.42k
13073
7.42k
  // fold (fp_round (fp_round x)) -> (fp_round x)
13074
7.42k
  if (N0.getOpcode() == ISD::FP_ROUND) {
13075
19
    const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
13076
19
    const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
13077
19
13078
19
    // Skip this folding if it results in an fp_round from f80 to f16.
13079
19
    //
13080
19
    // f80 to f16 always generates an expensive (and as yet, unimplemented)
13081
19
    // libcall to __truncxfhf2 instead of selecting native f16 conversion
13082
19
    // instructions from f32 or f64.  Moreover, the first (value-preserving)
13083
19
    // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
13084
19
    // x86.
13085
19
    if (N0.getOperand(0).getValueType() == MVT::f80 && 
VT == MVT::f167
)
13086
1
      return SDValue();
13087
18
13088
18
    // If the first fp_round isn't a value preserving truncation, it might
13089
18
    // introduce a tie in the second fp_round, that wouldn't occur in the
13090
18
    // single-step fp_round we want to fold to.
13091
18
    // In other words, double rounding isn't the same as rounding.
13092
18
    // Also, this is a value preserving truncation iff both fp_round's are.
13093
18
    if (DAG.getTarget().Options.UnsafeFPMath || 
N0IsTrunc16
) {
13094
7
      SDLoc DL(N);
13095
7
      return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
13096
7
                         DAG.getIntPtrConstant(NIsTrunc && 
N0IsTrunc0
, DL));
13097
7
    }
13098
7.42k
  }
13099
7.42k
13100
7.42k
  // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
13101
7.42k
  if (N0.getOpcode() == ISD::FCOPYSIGN && 
N0.getNode()->hasOneUse()20
) {
13102
20
    SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
13103
20
                              N0.getOperand(0), N1);
13104
20
    AddToWorklist(Tmp.getNode());
13105
20
    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
13106
20
                       Tmp, N0.getOperand(1));
13107
20
  }
13108
7.40k
13109
7.40k
  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13110
4
    return NewVSel;
13111
7.39k
13112
7.39k
  return SDValue();
13113
7.39k
}
13114
13115
0
SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
13116
0
  SDValue N0 = N->getOperand(0);
13117
0
  EVT VT = N->getValueType(0);
13118
0
  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
13119
0
  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13120
0
13121
0
  // fold (fp_round_inreg c1fp) -> c1fp
13122
0
  if (N0CFP && isTypeLegal(EVT)) {
13123
0
    SDLoc DL(N);
13124
0
    SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
13125
0
    return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
13126
0
  }
13127
0
13128
0
  return SDValue();
13129
0
}
13130
13131
17.7k
SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
13132
17.7k
  SDValue N0 = N->getOperand(0);
13133
17.7k
  EVT VT = N->getValueType(0);
13134
17.7k
13135
17.7k
  // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
13136
17.7k
  if (N->hasOneUse() &&
13137
17.7k
      
N->use_begin()->getOpcode() == ISD::FP_ROUND16.9k
)
13138
0
    return SDValue();
13139
17.7k
13140
17.7k
  // fold (fp_extend c1fp) -> c1fp
13141
17.7k
  if (isConstantFPBuildVectorOrConstantFP(N0))
13142
14
    return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
13143
17.7k
13144
17.7k
  // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
13145
17.7k
  if (N0.getOpcode() == ISD::FP16_TO_FP &&
13146
17.7k
      
TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal843
)
13147
6
    return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
13148
17.7k
13149
17.7k
  // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
13150
17.7k
  // value of X.
13151
17.7k
  if (N0.getOpcode() == ISD::FP_ROUND
13152
17.7k
      && 
N0.getConstantOperandVal(1) == 1622
) {
13153
398
    SDValue In = N0.getOperand(0);
13154
398
    if (In.getValueType() == VT) 
return In389
;
13155
9
    if (VT.bitsLT(In.getValueType()))
13156
5
      return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
13157
5
                         In, N0.getOperand(1));
13158
4
    return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
13159
4
  }
13160
17.3k
13161
17.3k
  // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
13162
17.3k
  if (ISD::isNormalLoad(N0.getNode()) && 
N0.hasOneUse()5.25k
&&
13163
17.3k
       
TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())4.90k
) {
13164
451
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13165
451
    SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
13166
451
                                     LN0->getChain(),
13167
451
                                     LN0->getBasePtr(), N0.getValueType(),
13168
451
                                     LN0->getMemOperand());
13169
451
    CombineTo(N, ExtLoad);
13170
451
    CombineTo(N0.getNode(),
13171
451
              DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
13172
451
                          N0.getValueType(), ExtLoad,
13173
451
                          DAG.getIntPtrConstant(1, SDLoc(N0))),
13174
451
              ExtLoad.getValue(1));
13175
451
    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13176
451
  }
13177
16.8k
13178
16.8k
  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13179
2
    return NewVSel;
13180
16.8k
13181
16.8k
  return SDValue();
13182
16.8k
}
13183
13184
1.64k
SDValue DAGCombiner::visitFCEIL(SDNode *N) {
13185
1.64k
  SDValue N0 = N->getOperand(0);
13186
1.64k
  EVT VT = N->getValueType(0);
13187
1.64k
13188
1.64k
  // fold (fceil c1) -> fceil(c1)
13189
1.64k
  if (isConstantFPBuildVectorOrConstantFP(N0))
13190
0
    return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
13191
1.64k
13192
1.64k
  return SDValue();
13193
1.64k
}
13194
13195
1.65k
SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
13196
1.65k
  SDValue N0 = N->getOperand(0);
13197
1.65k
  EVT VT = N->getValueType(0);
13198
1.65k
13199
1.65k
  // fold (ftrunc c1) -> ftrunc(c1)
13200
1.65k
  if (isConstantFPBuildVectorOrConstantFP(N0))
13201
0
    return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
13202
1.65k
13203
1.65k
  // fold ftrunc (known rounded int x) -> x
13204
1.65k
  // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
13205
1.65k
  // likely to be generated to extract integer from a rounded floating value.
13206
1.65k
  switch (N0.getOpcode()) {
13207
1.65k
  
default: break1.65k
;
13208
1.65k
  case ISD::FRINT:
13209
6
  case ISD::FTRUNC:
13210
6
  case ISD::FNEARBYINT:
13211
6
  case ISD::FFLOOR:
13212
6
  case ISD::FCEIL:
13213
6
    return N0;
13214
1.65k
  }
13215
1.65k
13216
1.65k
  return SDValue();
13217
1.65k
}
13218
13219
1.80k
SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
13220
1.80k
  SDValue N0 = N->getOperand(0);
13221
1.80k
  EVT VT = N->getValueType(0);
13222
1.80k
13223
1.80k
  // fold (ffloor c1) -> ffloor(c1)
13224
1.80k
  if (isConstantFPBuildVectorOrConstantFP(N0))
13225
0
    return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
13226
1.80k
13227
1.80k
  return SDValue();
13228
1.80k
}
13229
13230
// FIXME: FNEG and FABS have a lot in common; refactor.
13231
8.29k
SDValue DAGCombiner::visitFNEG(SDNode *N) {
13232
8.29k
  SDValue N0 = N->getOperand(0);
13233
8.29k
  EVT VT = N->getValueType(0);
13234
8.29k
13235
8.29k
  // Constant fold FNEG.
13236
8.29k
  if (isConstantFPBuildVectorOrConstantFP(N0))
13237
0
    return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
13238
8.29k
13239
8.29k
  if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
13240
8.29k
                         &DAG.getTarget().Options, ForCodeSize))
13241
109
    return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
13242
8.18k
13243
8.18k
  // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
13244
8.18k
  // constant pool values.
13245
8.18k
  if (!TLI.isFNegFree(VT) &&
13246
8.18k
      
N0.getOpcode() == ISD::BITCAST3.96k
&&
13247
8.18k
      
N0.getNode()->hasOneUse()302
) {
13248
273
    SDValue Int = N0.getOperand(0);
13249
273
    EVT IntVT = Int.getValueType();
13250
273
    if (IntVT.isInteger() && 
!IntVT.isVector()243
) {
13251
47
      APInt SignMask;
13252
47
      if (N0.getValueType().isVector()) {
13253
30
        // For a vector, get a mask such as 0x80... per scalar element
13254
30
        // and splat it.
13255
30
        SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
13256
30
        SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13257
30
      } else {
13258
17
        // For a scalar, just generate 0x80...
13259
17
        SignMask = APInt::getSignMask(IntVT.getSizeInBits());
13260
17
      }
13261
47
      SDLoc DL0(N0);
13262
47
      Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
13263
47
                        DAG.getConstant(SignMask, DL0, IntVT));
13264
47
      AddToWorklist(Int.getNode());
13265
47
      return DAG.getBitcast(VT, Int);
13266
47
    }
13267
8.14k
  }
13268
8.14k
13269
8.14k
  // (fneg (fmul c, x)) -> (fmul -c, x)
13270
8.14k
  if (N0.getOpcode() == ISD::FMUL &&
13271
8.14k
      
(573
N0.getNode()->hasOneUse()573
||
!TLI.isFNegFree(VT)75
)) {
13272
510
    ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
13273
510
    if (CFP1) {
13274
8
      APFloat CVal = CFP1->getValueAPF();
13275
8
      CVal.changeSign();
13276
8
      if (Level >= AfterLegalizeDAG &&
13277
8
          
(3
TLI.isFPImmLegal(CVal, VT, ForCodeSize)3
||
13278
3
           
TLI.isOperationLegal(ISD::ConstantFP, VT)0
))
13279
3
        return DAG.getNode(
13280
3
            ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
13281
3
            DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
13282
3
            N0->getFlags());
13283
8.13k
    }
13284
510
  }
13285
8.13k
13286
8.13k
  return SDValue();
13287
8.13k
}
13288
13289
static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
13290
5.67k
                            APFloat (*Op)(const APFloat &, const APFloat &)) {
13291
5.67k
  SDValue N0 = N->getOperand(0);
13292
5.67k
  SDValue N1 = N->getOperand(1);
13293
5.67k
  EVT VT = N->getValueType(0);
13294
5.67k
  const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
13295
5.67k
  const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
13296
5.67k
13297
5.67k
  if (N0CFP && 
N1CFP68
) {
13298
36
    const APFloat &C0 = N0CFP->getValueAPF();
13299
36
    const APFloat &C1 = N1CFP->getValueAPF();
13300
36
    return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
13301
36
  }
13302
5.64k
13303
5.64k
  // Canonicalize to constant on RHS.
13304
5.64k
  if (isConstantFPBuildVectorOrConstantFP(N0) &&
13305
5.64k
      
!isConstantFPBuildVectorOrConstantFP(N1)56
)
13306
44
    return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
13307
5.59k
13308
5.59k
  return SDValue();
13309
5.59k
}
13310
13311
2.46k
SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
13312
2.46k
  return visitFMinMax(DAG, N, minnum);
13313
2.46k
}
13314
13315
2.33k
SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
13316
2.33k
  return visitFMinMax(DAG, N, maxnum);
13317
2.33k
}
13318
13319
385
SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
13320
385
  return visitFMinMax(DAG, N, minimum);
13321
385
}
13322
13323
488
SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
13324
488
  return visitFMinMax(DAG, N, maximum);
13325
488
}
13326
13327
5.69k
SDValue DAGCombiner::visitFABS(SDNode *N) {
13328
5.69k
  SDValue N0 = N->getOperand(0);
13329
5.69k
  EVT VT = N->getValueType(0);
13330
5.69k
13331
5.69k
  // fold (fabs c1) -> fabs(c1)
13332
5.69k
  if (isConstantFPBuildVectorOrConstantFP(N0))
13333
0
    return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13334
5.69k
13335
5.69k
  // fold (fabs (fabs x)) -> (fabs x)
13336
5.69k
  if (N0.getOpcode() == ISD::FABS)
13337
4
    return N->getOperand(0);
13338
5.68k
13339
5.68k
  // fold (fabs (fneg x)) -> (fabs x)
13340
5.68k
  // fold (fabs (fcopysign x, y)) -> (fabs x)
13341
5.68k
  if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
13342
4
    return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
13343
5.68k
13344
5.68k
  // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
13345
5.68k
  if (!TLI.isFAbsFree(VT) && 
N0.getOpcode() == ISD::BITCAST3.81k
&&
N0.hasOneUse()338
) {
13346
326
    SDValue Int = N0.getOperand(0);
13347
326
    EVT IntVT = Int.getValueType();
13348
326
    if (IntVT.isInteger() && 
!IntVT.isVector()265
) {
13349
232
      APInt SignMask;
13350
232
      if (N0.getValueType().isVector()) {
13351
46
        // For a vector, get a mask such as 0x7f... per scalar element
13352
46
        // and splat it.
13353
46
        SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
13354
46
        SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13355
186
      } else {
13356
186
        // For a scalar, just generate 0x7f...
13357
186
        SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
13358
186
      }
13359
232
      SDLoc DL(N0);
13360
232
      Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
13361
232
                        DAG.getConstant(SignMask, DL, IntVT));
13362
232
      AddToWorklist(Int.getNode());
13363
232
      return DAG.getBitcast(N->getValueType(0), Int);
13364
232
    }
13365
5.45k
  }
13366
5.45k
13367
5.45k
  return SDValue();
13368
5.45k
}
13369
13370
1.09M
SDValue DAGCombiner::visitBRCOND(SDNode *N) {
13371
1.09M
  SDValue Chain = N->getOperand(0);
13372
1.09M
  SDValue N1 = N->getOperand(1);
13373
1.09M
  SDValue N2 = N->getOperand(2);
13374
1.09M
13375
1.09M
  // If N is a constant we could fold this into a fallthrough or unconditional
13376
1.09M
  // branch. However that doesn't happen very often in normal code, because
13377
1.09M
  // Instcombine/SimplifyCFG should have handled the available opportunities.
13378
1.09M
  // If we did this folding here, it would be necessary to update the
13379
1.09M
  // MachineBasicBlock CFG, which is awkward.
13380
1.09M
13381
1.09M
  // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
13382
1.09M
  // on the target.
13383
1.09M
  if (N1.getOpcode() == ISD::SETCC &&
13384
1.09M
      TLI.isOperationLegalOrCustom(ISD::BR_CC,
13385
817k
                                   N1.getOperand(0).getValueType())) {
13386
405k
    return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
13387
405k
                       Chain, N1.getOperand(2),
13388
405k
                       N1.getOperand(0), N1.getOperand(1), N2);
13389
405k
  }
13390
693k
13391
693k
  if (N1.hasOneUse()) {
13392
692k
    if (SDValue NewN1 = rebuildSetCC(N1))
13393
234k
      return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
13394
459k
  }
13395
459k
13396
459k
  return SDValue();
13397
459k
}
13398
13399
693k
SDValue DAGCombiner::rebuildSetCC(SDValue N) {
13400
693k
  if (N.getOpcode() == ISD::SRL ||
13401
693k
      
(693k
N.getOpcode() == ISD::TRUNCATE693k
&&
13402
693k
       
(15.7k
N.getOperand(0).hasOneUse()15.7k
&&
13403
15.7k
        
N.getOperand(0).getOpcode() == ISD::SRL15.5k
))) {
13404
489
    // Look pass the truncate.
13405
489
    if (N.getOpcode() == ISD::TRUNCATE)
13406
367
      N = N.getOperand(0);
13407
489
13408
489
    // Match this pattern so that we can generate simpler code:
13409
489
    //
13410
489
    //   %a = ...
13411
489
    //   %b = and i32 %a, 2
13412
489
    //   %c = srl i32 %b, 1
13413
489
    //   brcond i32 %c ...
13414
489
    //
13415
489
    // into
13416
489
    //
13417
489
    //   %a = ...
13418
489
    //   %b = and i32 %a, 2
13419
489
    //   %c = setcc eq %b, 0
13420
489
    //   brcond %c ...
13421
489
    //
13422
489
    // This applies only when the AND constant value has one bit set and the
13423
489
    // SRL constant is equal to the log2 of the AND constant. The back-end is
13424
489
    // smart enough to convert the result into a TEST/JMP sequence.
13425
489
    SDValue Op0 = N.getOperand(0);
13426
489
    SDValue Op1 = N.getOperand(1);
13427
489
13428
489
    if (Op0.getOpcode() == ISD::AND && 
Op1.getOpcode() == ISD::Constant488
) {
13429
488
      SDValue AndOp1 = Op0.getOperand(1);
13430
488
13431
488
      if (AndOp1.getOpcode() == ISD::Constant) {
13432
488
        const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
13433
488
13434
488
        if (AndConst.isPowerOf2() &&
13435
488
            cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
13436
488
          SDLoc DL(N);
13437
488
          return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
13438
488
                              Op0, DAG.getConstant(0, DL, Op0.getValueType()),
13439
488
                              ISD::SETNE);
13440
488
        }
13441
693k
      }
13442
488
    }
13443
489
  }
13444
693k
13445
693k
  // Transform br(xor(x, y)) -> br(x != y)
13446
693k
  // Transform br(xor(xor(x,y), 1)) -> br (x == y)
13447
693k
  if (N.getOpcode() == ISD::XOR) {
13448
234k
    // Because we may call this on a speculatively constructed
13449
234k
    // SimplifiedSetCC Node, we need to simplify this node first.
13450
234k
    // Ideally this should be folded into SimplifySetCC and not
13451
234k
    // here. For now, grab a handle to N so we don't lose it from
13452
234k
    // replacements interal to the visit.
13453
234k
    HandleSDNode XORHandle(N);
13454
451k
    while (N.getOpcode() == ISD::XOR) {
13455
234k
      SDValue Tmp = visitXOR(N.getNode());
13456
234k
      // No simplification done.
13457
234k
      if (!Tmp.getNode())
13458
16.6k
        break;
13459
217k
      // Returning N is form in-visit replacement that may invalidated
13460
217k
      // N. Grab value from Handle.
13461
217k
      if (Tmp.getNode() == N.getNode())
13462
12
        N = XORHandle.getValue();
13463
217k
      else // Node simplified. Try simplifying again.
13464
217k
        N = Tmp;
13465
217k
    }
13466
234k
13467
234k
    if (N.getOpcode() != ISD::XOR)
13468
217k
      return N;
13469
16.6k
13470
16.6k
    SDNode *TheXor = N.getNode();
13471
16.6k
13472
16.6k
    SDValue Op0 = TheXor->getOperand(0);
13473
16.6k
    SDValue Op1 = TheXor->getOperand(1);
13474
16.6k
13475
16.6k
    if (Op0.getOpcode() != ISD::SETCC && 
Op1.getOpcode() != ISD::SETCC16.5k
) {
13476
16.5k
      bool Equal = false;
13477
16.5k
      if (isOneConstant(Op0) && 
Op0.hasOneUse()0
&&
13478
16.5k
          
Op0.getOpcode() == ISD::XOR0
) {
13479
0
        TheXor = Op0.getNode();
13480
0
        Equal = true;
13481
0
      }
13482
16.5k
13483
16.5k
      EVT SetCCVT = N.getValueType();
13484
16.5k
      if (LegalTypes)
13485
24
        SetCCVT = getSetCCResultType(SetCCVT);
13486
16.5k
      // Replace the uses of XOR with SETCC
13487
16.5k
      return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
13488
16.5k
                          Equal ? 
ISD::SETEQ0
: ISD::SETNE);
13489
16.5k
    }
13490
459k
  }
13491
459k
13492
459k
  return SDValue();
13493
459k
}
13494
13495
// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
13496
//
13497
519k
SDValue DAGCombiner::visitBR_CC(SDNode *N) {
13498
519k
  CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
13499
519k
  SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
13500
519k
13501
519k
  // If N is a constant we could fold this into a fallthrough or unconditional
13502
519k
  // branch. However that doesn't happen very often in normal code, because
13503
519k
  // Instcombine/SimplifyCFG should have handled the available opportunities.
13504
519k
  // If we did this folding here, it would be necessary to update the
13505
519k
  // MachineBasicBlock CFG, which is awkward.
13506
519k
13507
519k
  // Use SimplifySetCC to simplify SETCC's.
13508
519k
  SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
13509
519k
                               CondLHS, CondRHS, CC->get(), SDLoc(N),
13510
519k
                               false);
13511
519k
  if (Simp.getNode()) 
AddToWorklist(Simp.getNode())37.6k
;
13512
519k
13513
519k
  // fold to a simpler setcc
13514
519k
  if (Simp.getNode() && 
Simp.getOpcode() == ISD::SETCC37.6k
)
13515
36.9k
    return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
13516
36.9k
                       N->getOperand(0), Simp.getOperand(2),
13517
36.9k
                       Simp.getOperand(0), Simp.getOperand(1),
13518
36.9k
                       N->getOperand(4));
13519
482k
13520
482k
  return SDValue();
13521
482k
}
13522
13523
/// Return true if 'Use' is a load or a store that uses N as its base pointer
13524
/// and that N may be folded in the load / store addressing mode.
13525
static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
13526
                                    SelectionDAG &DAG,
13527
278k
                                    const TargetLowering &TLI) {
13528
278k
  EVT VT;
13529
278k
  unsigned AS;
13530
278k
13531
278k
  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
13532
85.5k
    if (LD->isIndexed() || 
LD->getBasePtr().getNode() != N85.4k
)
13533
10
      return false;
13534
85.4k
    VT = LD->getMemoryVT();
13535
85.4k
    AS = LD->getAddressSpace();
13536
193k
  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
13537
124k
    if (ST->isIndexed() || 
ST->getBasePtr().getNode() != N124k
)
13538
959
      return false;
13539
123k
    VT = ST->getMemoryVT();
13540
123k
    AS = ST->getAddressSpace();
13541
123k
  } else
13542
69.1k
    return false;
13543
208k
13544
208k
  TargetLowering::AddrMode AM;
13545
208k
  if (N->getOpcode() == ISD::ADD) {
13546
208k
    AM.HasBaseReg = true;
13547
208k
    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13548
208k
    if (Offset)
13549
208k
      // [reg +/- imm]
13550
208k
      AM.BaseOffs = Offset->getSExtValue();
13551
125
    else
13552
125
      // [reg +/- reg]
13553
125
      AM.Scale = 1;
13554
208k
  } else 
if (0
N->getOpcode() == ISD::SUB0
) {
13555
0
    AM.HasBaseReg = true;
13556
0
    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13557
0
    if (Offset)
13558
0
      // [reg +/- imm]
13559
0
      AM.BaseOffs = -Offset->getSExtValue();
13560
0
    else
13561
0
      // [reg +/- reg]
13562
0
      AM.Scale = 1;
13563
0
  } else
13564
0
    return false;
13565
208k
13566
208k
  return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
13567
208k
                                   VT.getTypeForEVT(*DAG.getContext()), AS);
13568
208k
}
13569
13570
/// Try turning a load/store into a pre-indexed load/store when the base
13571
/// pointer is an add or subtract and it has other uses besides the load/store.
13572
/// After the transformation, the new indexed load/store has effectively folded
13573
/// the add/subtract in and all of its other uses are redirected to the
13574
/// new load/store.
13575
5.74M
bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
13576
5.74M
  if (Level < AfterLegalizeDAG)
13577
3.74M
    return false;
13578
2.00M
13579
2.00M
  bool isLoad = true;
13580
2.00M
  SDValue Ptr;
13581
2.00M
  EVT VT;
13582
2.00M
  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
13583
931k
    if (LD->isIndexed())
13584
7.46k
      return false;
13585
923k
    VT = LD->getMemoryVT();
13586
923k
    if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
13587
923k
        
!TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)486k
)
13588
486k
      return false;
13589
437k
    Ptr = LD->getBasePtr();
13590
1.07M
  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
13591
1.07M
    if (ST->isIndexed())
13592
5.25k
      return false;
13593
1.06M
    VT = ST->getMemoryVT();
13594
1.06M
    if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
13595
1.06M
        
!TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)613k
)
13596
613k
      return false;
13597
454k
    Ptr = ST->getBasePtr();
13598
454k
    isLoad = false;
13599
454k
  } else {
13600
0
    return false;
13601
0
  }
13602
891k
13603
891k
  // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
13604
891k
  // out.  There is no reason to make this a preinc/predec.
13605
891k
  if ((Ptr.getOpcode() != ISD::ADD && 
Ptr.getOpcode() != ISD::SUB239k
) ||
13606
891k
      
Ptr.getNode()->hasOneUse()652k
)
13607
835k
    return false;
13608
56.1k
13609
56.1k
  // Ask the target to do addressing mode selection.
13610
56.1k
  SDValue BasePtr;
13611
56.1k
  SDValue Offset;
13612
56.1k
  ISD::MemIndexedMode AM = ISD::UNINDEXED;
13613
56.1k
  if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
13614
18.8k
    return false;
13615
37.2k
13616
37.2k
  // Backends without true r+i pre-indexed forms may need to pass a
13617
37.2k
  // constant base with a variable offset so that constant coercion
13618
37.2k
  // will work with the patterns in canonical form.
13619
37.2k
  bool Swapped = false;
13620
37.2k
  if (isa<ConstantSDNode>(BasePtr)) {
13621
1
    std::swap(BasePtr, Offset);
13622
1
    Swapped = true;
13623
1
  }
13624
37.2k
13625
37.2k
  // Don't create a indexed load / store with zero offset.
13626
37.2k
  if (isNullConstant(Offset))
13627
0
    return false;
13628
37.2k
13629
37.2k
  // Try turning it into a pre-indexed load / store except when:
13630
37.2k
  // 1) The new base ptr is a frame index.
13631
37.2k
  // 2) If N is a store and the new base ptr is either the same as or is a
13632
37.2k
  //    predecessor of the value being stored.
13633
37.2k
  // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
13634
37.2k
  //    that would create a cycle.
13635
37.2k
  // 4) All uses are load / store ops that use it as old base ptr.
13636
37.2k
13637
37.2k
  // Check #1.  Preinc'ing a frame index would require copying the stack pointer
13638
37.2k
  // (plus the implicit offset) to a register to preinc anyway.
13639
37.2k
  if (isa<FrameIndexSDNode>(BasePtr) || 
isa<RegisterSDNode>(BasePtr)33.6k
)
13640
3.73k
    return false;
13641
33.5k
13642
33.5k
  // Check #2.
13643
33.5k
  if (!isLoad) {
13644
11.5k
    SDValue Val = cast<StoreSDNode>(N)->getValue();
13645
11.5k
13646
11.5k
    // Would require a copy.
13647
11.5k
    if (Val == BasePtr)
13648
132
      return false;
13649
11.4k
13650
11.4k
    // Would create a cycle.
13651
11.4k
    if (Val == Ptr || 
Ptr->isPredecessorOf(Val.getNode())11.3k
)
13652
7.82k
      return false;
13653
25.5k
  }
13654
25.5k
13655
25.5k
  // Caches for hasPredecessorHelper.
13656
25.5k
  SmallPtrSet<const SDNode *, 32> Visited;
13657
25.5k
  SmallVector<const SDNode *, 16> Worklist;
13658
25.5k
  Worklist.push_back(N);
13659
25.5k
13660
25.5k
  // If the offset is a constant, there may be other adds of constants that
13661
25.5k
  // can be folded with this one. We should do this to avoid having to keep
13662
25.5k
  // a copy of the original base pointer.
13663
25.5k
  SmallVector<SDNode *, 16> OtherUses;
13664
25.5k
  if (isa<ConstantSDNode>(Offset))
13665
25.4k
    for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
13666
25.4k
                              UE = BasePtr.getNode()->use_end();
13667
91.2k
         UI != UE; 
++UI65.8k
) {
13668
75.3k
      SDUse &Use = UI.getUse();
13669
75.3k
      // Skip the use that is Ptr and uses of other results from BasePtr's
13670
75.3k
      // node (important for nodes that return multiple results).
13671
75.3k
      if (Use.getUser() == Ptr.getNode() || 
Use != BasePtr58.0k
)
13672
18.2k
        continue;
13673
57.0k
13674
57.0k
      if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
13675
21.4k
        continue;
13676
35.6k
13677
35.6k
      if (Use.getUser()->getOpcode() != ISD::ADD &&
13678
35.6k
          
Use.getUser()->getOpcode() != ISD::SUB9.43k
) {
13679
9.43k
        OtherUses.clear();
13680
9.43k
        break;
13681
9.43k
      }
13682
26.1k
13683
26.1k
      SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
13684
26.1k
      if (!isa<ConstantSDNode>(Op1)) {
13685
73
        OtherUses.clear();
13686
73
        break;
13687
73
      }
13688
26.0k
13689
26.0k
      // FIXME: In some cases, we can be smarter about this.
13690
26.0k
      if (Op1.getValueType() != Offset.getValueType()) {
13691
0
        OtherUses.clear();
13692
0
        break;
13693
0
      }
13694
26.0k
13695
26.0k
      OtherUses.push_back(Use.getUser());
13696
26.0k
    }
13697
25.5k
13698
25.5k
  if (Swapped)
13699
1
    std::swap(BasePtr, Offset);
13700
25.5k
13701
25.5k
  // Now check for #3 and #4.
13702
25.5k
  bool RealUse = false;
13703
25.5k
13704
55.5k
  for (SDNode *Use : Ptr.getNode()->uses()) {
13705
55.5k
    if (Use == N)
13706
22.6k
      continue;
13707
32.9k
    if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
13708
6.36k
      return false;
13709
26.5k
13710
26.5k
    // If Ptr may be folded in addressing mode of other use, then it's
13711
26.5k
    // not profitable to do this transformation.
13712
26.5k
    if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
13713
7.99k
      RealUse = true;
13714
26.5k
  }
13715
25.5k
13716
25.5k
  
if (19.2k
!RealUse19.2k
)
13717
11.8k
    return false;
13718
7.38k
13719
7.38k
  SDValue Result;
13720
7.38k
  if (isLoad)
13721
6.42k
    Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13722
6.42k
                                BasePtr, Offset, AM);
13723
959
  else
13724
959
    Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13725
959
                                 BasePtr, Offset, AM);
13726
7.38k
  ++PreIndexedNodes;
13727
7.38k
  ++NodesCombined;
13728
7.38k
  LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
13729
7.38k
             Result.getNode()->dump(&DAG); dbgs() << '\n');
13730
7.38k
  WorklistRemover DeadNodes(*this);
13731
7.38k
  if (isLoad) {
13732
6.42k
    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13733
6.42k
    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13734
6.42k
  } else {
13735
959
    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13736
959
  }
13737
7.38k
13738
7.38k
  // Finally, since the node is now dead, remove it from the graph.
13739
7.38k
  deleteAndRecombine(N);
13740
7.38k
13741
7.38k
  if (Swapped)
13742
1
    std::swap(BasePtr, Offset);
13743
7.38k
13744
7.38k
  // Replace other uses of BasePtr that can be updated to use Ptr
13745
8.94k
  for (unsigned i = 0, e = OtherUses.size(); i != e; 
++i1.56k
) {
13746
1.56k
    unsigned OffsetIdx = 1;
13747
1.56k
    if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
13748
0
      OffsetIdx = 0;
13749
1.56k
    assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
13750
1.56k
           BasePtr.getNode() && "Expected BasePtr operand");
13751
1.56k
13752
1.56k
    // We need to replace ptr0 in the following expression:
13753
1.56k
    //   x0 * offset0 + y0 * ptr0 = t0
13754
1.56k
    // knowing that
13755
1.56k
    //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
13756
1.56k
    //
13757
1.56k
    // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
13758
1.56k
    // indexed load/store and the expression that needs to be re-written.
13759
1.56k
    //
13760
1.56k
    // Therefore, we have:
13761
1.56k
    //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
13762
1.56k
13763
1.56k
    ConstantSDNode *CN =
13764
1.56k
      cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
13765
1.56k
    int X0, X1, Y0, Y1;
13766
1.56k
    const APInt &Offset0 = CN->getAPIntValue();
13767
1.56k
    APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
13768
1.56k
13769
1.56k
    X0 = (OtherUses[i]->getOpcode() == ISD::SUB && 
OffsetIdx == 10
) ?
-10
: 1;
13770
1.56k
    Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && 
OffsetIdx == 00
) ?
-10
: 1;
13771
1.56k
    X1 = (AM == ISD::PRE_DEC && 
!Swapped30
) ?
-130
:
11.53k
;
13772
1.56k
    Y1 = (AM == ISD::PRE_DEC && 
Swapped30
) ?
-10
: 1;
13773
1.56k
13774
1.56k
    unsigned Opcode = (Y0 * Y1 < 0) ? 
ISD::SUB0
: ISD::ADD;
13775
1.56k
13776
1.56k
    APInt CNV = Offset0;
13777
1.56k
    if (X0 < 0) 
CNV = -CNV0
;
13778
1.56k
    if (X1 * Y0 * Y1 < 0) 
CNV = CNV + Offset130
;
13779
1.53k
    else CNV = CNV - Offset1;
13780
1.56k
13781
1.56k
    SDLoc DL(OtherUses[i]);
13782
1.56k
13783
1.56k
    // We can now generate the new expression.
13784
1.56k
    SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
13785
1.56k
    SDValue NewOp2 = Result.getValue(isLoad ? 
1652
:
0908
);
13786
1.56k
13787
1.56k
    SDValue NewUse = DAG.getNode(Opcode,
13788
1.56k
                                 DL,
13789
1.56k
                                 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
13790
1.56k
    DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
13791
1.56k
    deleteAndRecombine(OtherUses[i]);
13792
1.56k
  }
13793
7.38k
13794
7.38k
  // Replace the uses of Ptr with uses of the updated base value.
13795
7.38k
  DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 
16.42k
:
0959
));
13796
7.38k
  deleteAndRecombine(Ptr.getNode());
13797
7.38k
  AddToWorklist(Result.getNode());
13798
7.38k
13799
7.38k
  return true;
13800
7.38k
}
13801
13802
/// Try to combine a load/store with a add/sub of the base pointer node into a
13803
/// post-indexed load/store. The transformation folded the add/subtract into the
13804
/// new indexed load/store effectively and all of its uses are redirected to the
13805
/// new load/store.
13806
5.74M
bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
13807
5.74M
  if (Level < AfterLegalizeDAG)
13808
3.74M
    return false;
13809
1.99M
13810
1.99M
  bool isLoad = true;
13811
1.99M
  SDValue Ptr;
13812
1.99M
  EVT VT;
13813
1.99M
  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
13814
924k
    if (LD->isIndexed())
13815
7.46k
      return false;
13816
917k
    VT = LD->getMemoryVT();
13817
917k
    if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
13818
917k
        
!TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)486k
)
13819
486k
      return false;
13820
430k
    Ptr = LD->getBasePtr();
13821
1.07M
  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
13822
1.07M
    if (ST->isIndexed())
13823
5.25k
      return false;
13824
1.06M
    VT = ST->getMemoryVT();
13825
1.06M
    if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
13826
1.06M
        
!TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)614k
)
13827
614k
      return false;
13828
452k
    Ptr = ST->getBasePtr();
13829
452k
    isLoad = false;
13830
452k
  } else {
13831
0
    return false;
13832
0
  }
13833
883k
13834
883k
  if (Ptr.getNode()->hasOneUse())
13835
729k
    return false;
13836
153k
13837
944k
  
for (SDNode *Op : Ptr.getNode()->uses())153k
{
13838
944k
    if (Op == N ||
13839
944k
        
(797k
Op->getOpcode() != ISD::ADD797k
&&
Op->getOpcode() != ISD::SUB467k
))
13840
615k
      continue;
13841
329k
13842
329k
    SDValue BasePtr;
13843
329k
    SDValue Offset;
13844
329k
    ISD::MemIndexedMode AM = ISD::UNINDEXED;
13845
329k
    if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
13846
223k
      // Don't create a indexed load / store with zero offset.
13847
223k
      if (isNullConstant(Offset))
13848
0
        continue;
13849
223k
13850
223k
      // Try turning it into a post-indexed load / store except when
13851
223k
      // 1) All uses are load / store ops that use it as base ptr (and
13852
223k
      //    it may be folded as addressing mmode).
13853
223k
      // 2) Op must be independent of N, i.e. Op is neither a predecessor
13854
223k
      //    nor a successor of N. Otherwise, if Op is folded that would
13855
223k
      //    create a cycle.
13856
223k
13857
223k
      if (isa<FrameIndexSDNode>(BasePtr) || 
isa<RegisterSDNode>(BasePtr)185k
)
13858
37.7k
        continue;
13859
185k
13860
185k
      // Check for #1.
13861
185k
      bool TryNext = false;
13862
365k
      for (SDNode *Use : BasePtr.getNode()->uses()) {
13863
365k
        if (Use == Ptr.getNode())
13864
0
          continue;
13865
365k
13866
365k
        // If all the uses are load / store addresses, then don't do the
13867
365k
        // transformation.
13868
365k
        if (Use->getOpcode() == ISD::ADD || 
Use->getOpcode() == ISD::SUB132k
){
13869
232k
          bool RealUse = false;
13870
252k
          for (SDNode *UseUse : Use->uses()) {
13871
252k
            if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
13872
62.4k
              RealUse = true;
13873
252k
          }
13874
232k
13875
232k
          if (!RealUse) {
13876
174k
            TryNext = true;
13877
174k
            break;
13878
174k
          }
13879
232k
        }
13880
365k
      }
13881
185k
13882
185k
      if (TryNext)
13883
174k
        continue;
13884
11.1k
13885
11.1k
      // Check for #2.
13886
11.1k
      SmallPtrSet<const SDNode *, 32> Visited;
13887
11.1k
      SmallVector<const SDNode *, 8> Worklist;
13888
11.1k
      // Ptr is predecessor to both N and Op.
13889
11.1k
      Visited.insert(Ptr.getNode());
13890
11.1k
      Worklist.push_back(N);
13891
11.1k
      Worklist.push_back(Op);
13892
11.1k
      if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
13893
11.1k
          
!SDNode::hasPredecessorHelper(Op, Visited, Worklist)11.1k
) {
13894
10.7k
        SDValue Result = isLoad
13895
10.7k
          ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13896
5.94k
                               BasePtr, Offset, AM)
13897
10.7k
          : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13898
4.83k
                                BasePtr, Offset, AM);
13899
10.7k
        ++PostIndexedNodes;
13900
10.7k
        ++NodesCombined;
13901
10.7k
        LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
13902
10.7k
                   dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
13903
10.7k
                   dbgs() << '\n');
13904
10.7k
        WorklistRemover DeadNodes(*this);
13905
10.7k
        if (isLoad) {
13906
5.94k
          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13907
5.94k
          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13908
5.94k
        } else {
13909
4.83k
          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13910
4.83k
        }
13911
10.7k
13912
10.7k
        // Finally, since the node is now dead, remove it from the graph.
13913
10.7k
        deleteAndRecombine(N);
13914
10.7k
13915
10.7k
        // Replace the uses of Use with uses of the updated base value.
13916
10.7k
        DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
13917
10.7k
                                      Result.getValue(isLoad ? 
15.94k
:
04.83k
));
13918
10.7k
        deleteAndRecombine(Op);
13919
10.7k
        return true;
13920
10.7k
      }
13921
11.1k
    }
13922
329k
  }
13923
153k
13924
153k
  
return false143k
;
13925
153k
}
13926
13927
/// Return the base-pointer arithmetic from an indexed \p LD.
13928
3
SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
13929
3
  ISD::MemIndexedMode AM = LD->getAddressingMode();
13930
3
  assert(AM != ISD::UNINDEXED);
13931
3
  SDValue BP = LD->getOperand(1);
13932
3
  SDValue Inc = LD->getOperand(2);
13933
3
13934
3
  // Some backends use TargetConstants for load offsets, but don't expect
13935
3
  // TargetConstants in general ADD nodes. We can convert these constants into
13936
3
  // regular Constants (if the constant is not opaque).
13937
3
  assert((Inc.getOpcode() != ISD::TargetConstant ||
13938
3
          !cast<ConstantSDNode>(Inc)->isOpaque()) &&
13939
3
         "Cannot split out indexing using opaque target constants");
13940
3
  if (Inc.getOpcode() == ISD::TargetConstant) {
13941
3
    ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
13942
3
    Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
13943
3
                          ConstInc->getValueType(0));
13944
3
  }
13945
3
13946
3
  unsigned Opc =
13947
3
      (AM == ISD::PRE_INC || 
AM == ISD::POST_INC0
? ISD::ADD :
ISD::SUB0
);
13948
3
  return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
13949
3
}
13950
13951
3.28k
static inline int numVectorEltsOrZero(EVT T) {
13952
3.28k
  return T.isVector() ? 
T.getVectorNumElements()1.10k
:
02.18k
;
13953
3.28k
}
13954
13955
10.2k
bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
13956
10.2k
  Val = ST->getValue();
13957
10.2k
  EVT STType = Val.getValueType();
13958
10.2k
  EVT STMemType = ST->getMemoryVT();
13959
10.2k
  if (STType == STMemType)
13960
8.59k
    return true;
13961
1.67k
  if (isTypeLegal(STMemType))
13962
28
    return false; // fail.
13963
1.64k
  if (STType.isFloatingPoint() && 
STMemType.isFloatingPoint()0
&&
13964
1.64k
      
TLI.isOperationLegal(ISD::FTRUNC, STMemType)0
) {
13965
0
    Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
13966
0
    return true;
13967
0
  }
13968
1.64k
  if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
13969
1.64k
      STType.isInteger() && STMemType.isInteger()) {
13970
1.64k
    Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
13971
1.64k
    return true;
13972
1.64k
  }
13973
0
  if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
13974
0
    Val = DAG.getBitcast(STMemType, Val);
13975
0
    return true;
13976
0
  }
13977
0
  return false; // fail.
13978
0
}
13979
13980
445
bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
13981
445
  EVT LDMemType = LD->getMemoryVT();
13982
445
  EVT LDType = LD->getValueType(0);
13983
445
  assert(Val.getValueType() == LDMemType &&
13984
445
         "Attempting to extend value of non-matching type");
13985
445
  if (LDType == LDMemType)
13986
35
    return true;
13987
410
  if (LDMemType.isInteger() && 
LDType.isInteger()409
) {
13988
409
    switch (LD->getExtensionType()) {
13989
409
    case ISD::NON_EXTLOAD:
13990
0
      Val = DAG.getBitcast(LDType, Val);
13991
0
      return true;
13992
409
    case ISD::EXTLOAD:
13993
291
      Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
13994
291
      return true;
13995
409
    case ISD::SEXTLOAD:
13996
50
      Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
13997
50
      return true;
13998
409
    case ISD::ZEXTLOAD:
13999
68
      Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
14000
68
      return true;
14001
1
    }
14002
1
  }
14003
1
  return false;
14004
1
}
14005
14006
2.81M
SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
14007
2.81M
  if (OptLevel == CodeGenOpt::None || 
LD->isVolatile()2.79M
)
14008
98.0k
    return SDValue();
14009
2.71M
  SDValue Chain = LD->getOperand(0);
14010
2.71M
  StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
14011
2.71M
  if (!ST || 
ST->isVolatile()211k
)
14012
2.50M
    return SDValue();
14013
209k
14014
209k
  EVT LDType = LD->getValueType(0);
14015
209k
  EVT LDMemType = LD->getMemoryVT();
14016
209k
  EVT STMemType = ST->getMemoryVT();
14017
209k
  EVT STType = ST->getValue().getValueType();
14018
209k
14019
209k
  BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
14020
209k
  BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
14021
209k
  int64_t Offset;
14022
209k
  if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
14023
164k
    return SDValue();
14024
45.3k
14025
45.3k
  // Normalize for Endianness. After this Offset=0 will denote that the least
14026
45.3k
  // significant bit in the loaded value maps to the least significant bit in
14027
45.3k
  // the stored value). With Offset=n (for n > 0) the loaded value starts at the
14028
45.3k
  // n:th least significant byte of the stored value.
14029
45.3k
  if (DAG.getDataLayout().isBigEndian())
14030
3.87k
    Offset = (STMemType.getStoreSizeInBits() -
14031
3.87k
              LDMemType.getStoreSizeInBits()) / 8 - Offset;
14032
45.3k
14033
45.3k
  // Check that the stored value cover all bits that are loaded.
14034
45.3k
  bool STCoversLD =
14035
45.3k
      (Offset >= 0) &&
14036
45.3k
      
(Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits())40.0k
;
14037
45.3k
14038
45.3k
  auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
14039
6.22k
    if (LD->isIndexed()) {
14040
1
      bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
14041
1
                    LD->getAddressingMode() == ISD::POST_DEC);
14042
1
      unsigned Opc = IsSub ? 
ISD::SUB0
: ISD::ADD;
14043
1
      SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
14044
1
                             LD->getOperand(1), LD->getOperand(2));
14045
1
      SDValue Ops[] = {Val, Idx, Chain};
14046
1
      return CombineTo(LD, Ops, 3);
14047
1
    }
14048
6.22k
    return CombineTo(LD, Val, Chain);
14049
6.22k
  };
14050
45.3k
14051
45.3k
  if (!STCoversLD)
14052
13.3k
    return SDValue();
14053
31.9k
14054
31.9k
  // Memory as copy space (potentially masked).
14055
31.9k
  if (Offset == 0 && 
LDType == STType16.0k
&&
STMemType == LDMemType8.62k
) {
14056
5.77k
    // Simple case: Direct non-truncating forwarding
14057
5.77k
    if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
14058
5.72k
      return ReplaceLd(LD, ST->getValue(), Chain);
14059
56
    // Can we model the truncate and extension with an and mask?
14060
56
    if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
14061
56
        !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
14062
56
      // Mask to size of LDMemType
14063
56
      auto Mask =
14064
56
          DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
14065
56
                                               STMemType.getSizeInBits()),
14066
56
                          SDLoc(ST), STType);
14067
56
      auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
14068
56
      return ReplaceLd(LD, Val, Chain);
14069
56
    }
14070
26.1k
  }
14071
26.1k
14072
26.1k
  // TODO: Deal with nonzero offset.
14073
26.1k
  if (LD->getBasePtr().isUndef() || 
Offset != 026.1k
)
14074
15.9k
    return SDValue();
14075
10.2k
  // Model necessary truncations / extenstions.
14076
10.2k
  SDValue Val;
14077
10.2k
  // Truncate Value To Stored Memory Size.
14078
10.2k
  do {
14079
10.2k
    if (!getTruncatedStoreValue(ST, Val))
14080
28
      continue;
14081
10.2k
    if (!isTypeLegal(LDMemType))
14082
6.40k
      continue;
14083
3.83k
    if (STMemType != LDMemType) {
14084
3.47k
      // TODO: Support vectors? This requires extract_subvector/bitcast.
14085
3.47k
      if (!STMemType.isVector() && 
!LDMemType.isVector()2.40k
&&
14086
3.47k
          
STMemType.isInteger()2.37k
&&
LDMemType.isInteger()494
)
14087
87
        Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
14088
3.39k
      else
14089
3.39k
        continue;
14090
445
    }
14091
445
    if (!extendLoadedValueToExtension(LD, Val))
14092
1
      continue;
14093
444
    return ReplaceLd(LD, Val, Chain);
14094
9.82k
  } while (false);
14095
10.2k
14096
10.2k
  // On failure, cleanup dead nodes we may have created.
14097
10.2k
  
if (9.82k
Val->use_empty()9.82k
)
14098
1.64k
    deleteAndRecombine(Val.getNode());
14099
9.82k
  return SDValue();
14100
10.2k
}
14101
14102
2.86M
SDValue DAGCombiner::visitLOAD(SDNode *N) {
14103
2.86M
  LoadSDNode *LD  = cast<LoadSDNode>(N);
14104
2.86M
  SDValue Chain = LD->getChain();
14105
2.86M
  SDValue Ptr   = LD->getBasePtr();
14106
2.86M
14107
2.86M
  // If load is not volatile and there are no uses of the loaded value (and
14108
2.86M
  // the updated indexed value in case of indexed loads), change uses of the
14109
2.86M
  // chain value into uses of the chain input (i.e. delete the dead load).
14110
2.86M
  if (!LD->isVolatile()) {
14111
2.77M
    if (N->getValueType(1) == MVT::Other) {
14112
2.77M
      // Unindexed loads.
14113
2.77M
      if (!N->hasAnyUseOfValue(0)) {
14114
48.4k
        // It's not safe to use the two value CombineTo variant here. e.g.
14115
48.4k
        // v1, chain2 = load chain1, loc
14116
48.4k
        // v2, chain3 = load chain2, loc
14117
48.4k
        // v3         = add v2, c
14118
48.4k
        // Now we replace use of chain2 with chain1.  This makes the second load
14119
48.4k
        // isomorphic to the one we are deleting, and thus makes this load live.
14120
48.4k
        LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
14121
48.4k
                   dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
14122
48.4k
                   dbgs() << "\n");
14123
48.4k
        WorklistRemover DeadNodes(*this);
14124
48.4k
        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14125
48.4k
        AddUsersToWorklist(Chain.getNode());
14126
48.4k
        if (N->use_empty())
14127
48.4k
          deleteAndRecombine(N);
14128
48.4k
14129
48.4k
        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14130
48.4k
      }
14131
7.29k
    } else {
14132
7.29k
      // Indexed loads.
14133
7.29k
      assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
14134
7.29k
14135
7.29k
      // If this load has an opaque TargetConstant offset, then we cannot split
14136
7.29k
      // the indexing into an add/sub directly (that TargetConstant may not be
14137
7.29k
      // valid for a different type of node, and we cannot convert an opaque
14138
7.29k
      // target constant into a regular constant).
14139
7.29k
      bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
14140
7.29k
                       
cast<ConstantSDNode>(LD->getOperand(2))->isOpaque()176
;
14141
7.29k
14142
7.29k
      if (!N->hasAnyUseOfValue(0) &&
14143
7.29k
          
(3
(3
MaySplitLoadIndex3
&&
!HasOTCInc3
) ||
!N->hasAnyUseOfValue(1)0
)) {
14144
3
        SDValue Undef = DAG.getUNDEF(N->getValueType(0));
14145
3
        SDValue Index;
14146
3
        if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
14147
3
          Index = SplitIndexingFromLoad(LD);
14148
3
          // Try to fold the base pointer arithmetic into subsequent loads and
14149
3
          // stores.
14150
3
          AddUsersToWorklist(N);
14151
3
        } else
14152
0
          Index = DAG.getUNDEF(N->getValueType(1));
14153
3
        LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
14154
3
                   dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
14155
3
                   dbgs() << " and 2 other values\n");
14156
3
        WorklistRemover DeadNodes(*this);
14157
3
        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
14158
3
        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
14159
3
        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
14160
3
        deleteAndRecombine(N);
14161
3
        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14162
3
      }
14163
2.81M
    }
14164
2.77M
  }
14165
2.81M
14166
2.81M
  // If this load is directly stored, replace the load value with the stored
14167
2.81M
  // value.
14168
2.81M
  if (auto V = ForwardStoreValueToDirectLoad(LD))
14169
6.22k
    return V;
14170
2.80M
14171
2.80M
  // Try to infer better alignment information than the load already has.
14172
2.80M
  if (OptLevel != CodeGenOpt::None && 
LD->isUnindexed()2.79M
) {
14173
2.78M
    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
14174
552k
      if (Align > LD->getAlignment() && 
LD->getSrcValueOffset() % Align == 032.5k
) {
14175
32.5k
        SDValue NewLoad = DAG.getExtLoad(
14176
32.5k
            LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
14177
32.5k
            LD->getPointerInfo(), LD->getMemoryVT(), Align,
14178
32.5k
            LD->getMemOperand()->getFlags(), LD->getAAInfo());
14179
32.5k
        // NewLoad will always be N as we are only refining the alignment
14180
32.5k
        assert(NewLoad.getNode() == N);
14181
32.5k
        (void)NewLoad;
14182
32.5k
      }
14183
552k
    }
14184
2.78M
  }
14185
2.80M
14186
2.80M
  if (LD->isUnindexed()) {
14187
2.80M
    // Walk up chain skipping non-aliasing memory nodes.
14188
2.80M
    SDValue BetterChain = FindBetterChain(LD, Chain);
14189
2.80M
14190
2.80M
    // If there is a better chain.
14191
2.80M
    if (Chain != BetterChain) {
14192
94.3k
      SDValue ReplLoad;
14193
94.3k
14194
94.3k
      // Replace the chain to void dependency.
14195
94.3k
      if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
14196
76.3k
        ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
14197
76.3k
                               BetterChain, Ptr, LD->getMemOperand());
14198
76.3k
      } else {
14199
18.0k
        ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
14200
18.0k
                                  LD->getValueType(0),
14201
18.0k
                                  BetterChain, Ptr, LD->getMemoryVT(),
14202
18.0k
                                  LD->getMemOperand());
14203
18.0k
      }
14204
94.3k
14205
94.3k
      // Create token factor to keep old chain connected.
14206
94.3k
      SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
14207
94.3k
                                  MVT::Other, Chain, ReplLoad.getValue(1));
14208
94.3k
14209
94.3k
      // Replace uses with load result and token factor
14210
94.3k
      return CombineTo(N, ReplLoad.getValue(0), Token);
14211
94.3k
    }
14212
2.71M
  }
14213
2.71M
14214
2.71M
  // Try transforming N to an indexed load.
14215
2.71M
  if (CombineToPreIndexedLoadStore(N) || 
CombineToPostIndexedLoadStore(N)2.70M
)
14216
12.3k
    return SDValue(N, 0);
14217
2.70M
14218
2.70M
  // Try to slice up N to more direct loads if the slices are mapped to
14219
2.70M
  // different register banks or pairing can take place.
14220
2.70M
  if (SliceUpLoad(N))
14221
5
    return SDValue(N, 0);
14222
2.70M
14223
2.70M
  return SDValue();
14224
2.70M
}
14225
14226
namespace {
14227
14228
/// Helper structure used to slice a load in smaller loads.
14229
/// Basically a slice is obtained from the following sequence:
14230
/// Origin = load Ty1, Base
14231
/// Shift = srl Ty1 Origin, CstTy Amount
14232
/// Inst = trunc Shift to Ty2
14233
///
14234
/// Then, it will be rewritten into:
14235
/// Slice = load SliceTy, Base + SliceOffset
14236
/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
14237
///
14238
/// SliceTy is deduced from the number of bits that are actually used to
14239
/// build Inst.
14240
struct LoadedSlice {
14241
  /// Helper structure used to compute the cost of a slice.
14242
  struct Cost {
14243
    /// Are we optimizing for code size.
14244
    bool ForCodeSize;
14245
14246
    /// Various cost.
14247
    unsigned Loads = 0;
14248
    unsigned Truncates = 0;
14249
    unsigned CrossRegisterBanksCopies = 0;
14250
    unsigned ZExts = 0;
14251
    unsigned Shift = 0;
14252
14253
44
    Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
14254
14255
    /// Get the cost of one isolated slice.
14256
    Cost(const LoadedSlice &LS, bool ForCodeSize = false)
14257
44
        : ForCodeSize(ForCodeSize), Loads(1) {
14258
44
      EVT TruncType = LS.Inst->getValueType(0);
14259
44
      EVT LoadedType = LS.getLoadedType();
14260
44
      if (TruncType != LoadedType &&
14261
44
          
!LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)0
)
14262
0
        ZExts = 1;
14263
44
    }
14264
14265
    /// Account for slicing gain in the current cost.
14266
    /// Slicing provide a few gains like removing a shift or a
14267
    /// truncate. This method allows to grow the cost of the original
14268
    /// load with the gain from this slice.
14269
44
    void addSliceGain(const LoadedSlice &LS) {
14270
44
      // Each slice saves a truncate.
14271
44
      const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
14272
44
      if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
14273
44
                              LS.Inst->getValueType(0)))
14274
12
        ++Truncates;
14275
44
      // If there is a shift amount, this slice gets rid of it.
14276
44
      if (LS.Shift)
14277
22
        ++Shift;
14278
44
      // If this slice can merge a cross register bank copy, account for it.
14279
44
      if (LS.canMergeExpensiveCrossRegisterBankCopy())
14280
4
        ++CrossRegisterBanksCopies;
14281
44
    }
14282
14283
44
    Cost &operator+=(const Cost &RHS) {
14284
44
      Loads += RHS.Loads;
14285
44
      Truncates += RHS.Truncates;
14286
44
      CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
14287
44
      ZExts += RHS.ZExts;
14288
44
      Shift += RHS.Shift;
14289
44
      return *this;
14290
44
    }
14291
14292
0
    bool operator==(const Cost &RHS) const {
14293
0
      return Loads == RHS.Loads && Truncates == RHS.Truncates &&
14294
0
             CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
14295
0
             ZExts == RHS.ZExts && Shift == RHS.Shift;
14296
0
    }
14297
14298
0
    bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
14299
14300
22
    bool operator<(const Cost &RHS) const {
14301
22
      // Assume cross register banks copies are as expensive as loads.
14302
22
      // FIXME: Do we want some more target hooks?
14303
22
      unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
14304
22
      unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
14305
22
      // Unless we are optimizing for code size, consider the
14306
22
      // expensive operation first.
14307
22
      if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
14308
21
        return ExpensiveOpsLHS < ExpensiveOpsRHS;
14309
1
      return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
14310
1
             (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
14311
1
    }
14312
14313
22
    bool operator>(const Cost &RHS) const { return RHS < *this; }
14314
14315
0
    bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
14316
14317
0
    bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
14318
  };
14319
14320
  // The last instruction that represent the slice. This should be a
14321
  // truncate instruction.
14322
  SDNode *Inst;
14323
14324
  // The original load instruction.
14325
  LoadSDNode *Origin;
14326
14327
  // The right shift amount in bits from the original load.
14328
  unsigned Shift;
14329
14330
  // The DAG from which Origin came from.
14331
  // This is used to get some contextual information about legal types, etc.
14332
  SelectionDAG *DAG;
14333
14334
  LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
14335
              unsigned Shift = 0, SelectionDAG *DAG = nullptr)
14336
1.57k
      : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
14337
14338
  /// Get the bits used in a chunk of bits \p BitWidth large.
14339
  /// \return Result is \p BitWidth and has used bits set to 1 and
14340
  ///         not used bits set to 0.
14341
3.30k
  APInt getUsedBits() const {
14342
3.30k
    // Reproduce the trunc(lshr) sequence:
14343
3.30k
    // - Start from the truncated value.
14344
3.30k
    // - Zero extend to the desired bit width.
14345
3.30k
    // - Shift left.
14346
3.30k
    assert(Origin && "No original load to compare against.");
14347
3.30k
    unsigned BitWidth = Origin->getValueSizeInBits(0);
14348
3.30k
    assert(Inst && "This slice is not bound to an instruction");
14349
3.30k
    assert(Inst->getValueSizeInBits(0) <= BitWidth &&
14350
3.30k
           "Extracted slice is bigger than the whole type!");
14351
3.30k
    APInt UsedBits(Inst->getValueSizeInBits(0), 0);
14352
3.30k
    UsedBits.setAllBits();
14353
3.30k
    UsedBits = UsedBits.zext(BitWidth);
14354
3.30k
    UsedBits <<= Shift;
14355
3.30k
    return UsedBits;
14356
3.30k
  }
14357
14358
  /// Get the size of the slice to be loaded in bytes.
14359
1.72k
  unsigned getLoadedSize() const {
14360
1.72k
    unsigned SliceSize = getUsedBits().countPopulation();
14361
1.72k
    assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
14362
1.72k
    return SliceSize / 8;
14363
1.72k
  }
14364
14365
  /// Get the type that will be loaded for this slice.
14366
  /// Note: This may not be the final type for the slice.
14367
1.66k
  EVT getLoadedType() const {
14368
1.66k
    assert(DAG && "Missing context");
14369
1.66k
    LLVMContext &Ctxt = *DAG->getContext();
14370
1.66k
    return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
14371
1.66k
  }
14372
14373
  /// Get the alignment of the load used for this slice.
14374
16
  unsigned getAlignment() const {
14375
16
    unsigned Alignment = Origin->getAlignment();
14376
16
    uint64_t Offset = getOffsetFromBase();
14377
16
    if (Offset != 0)
14378
7
      Alignment = MinAlign(Alignment, Alignment + Offset);
14379
16
    return Alignment;
14380
16
  }
14381
14382
  /// Check if this slice can be rewritten with legal operations.
14383
1.56k
  bool isLegal() const {
14384
1.56k
    // An invalid slice is not legal.
14385
1.56k
    if (!Origin || !Inst || !DAG)
14386
0
      return false;
14387
1.56k
14388
1.56k
    // Offsets are for indexed load only, we do not handle that.
14389
1.56k
    if (!Origin->getOffset().isUndef())
14390
0
      return false;
14391
1.56k
14392
1.56k
    const TargetLowering &TLI = DAG->getTargetLoweringInfo();
14393
1.56k
14394
1.56k
    // Check that the type is legal.
14395
1.56k
    EVT SliceType = getLoadedType();
14396
1.56k
    if (!TLI.isTypeLegal(SliceType))
14397
61
      return false;
14398
1.50k
14399
1.50k
    // Check that the load is legal for this type.
14400
1.50k
    if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
14401
809
      return false;
14402
692
14403
692
    // Check that the offset can be computed.
14404
692
    // 1. Check its type.
14405
692
    EVT PtrType = Origin->getBasePtr().getValueType();
14406
692
    if (PtrType == MVT::Untyped || PtrType.isExtended())
14407
0
      return false;
14408
692
14409
692
    // 2. Check that it fits in the immediate.
14410
692
    if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
14411
0
      return false;
14412
692
14413
692
    // 3. Check that the computation is legal.
14414
692
    if (!TLI.isOperationLegal(ISD::ADD, PtrType))
14415
0
      return false;
14416
692
14417
692
    // Check that the zext is legal if it needs one.
14418
692
    EVT TruncateType = Inst->getValueType(0);
14419
692
    if (TruncateType != SliceType &&
14420
692
        
!TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)153
)
14421
0
      return false;
14422
692
14423
692
    return true;
14424
692
  }
14425
14426
  /// Get the offset in bytes of this slice in the original chunk of
14427
  /// bits.
14428
  /// \pre DAG != nullptr.
14429
762
  uint64_t getOffsetFromBase() const {
14430
762
    assert(DAG && "Missing context.");
14431
762
    bool IsBigEndian = DAG->getDataLayout().isBigEndian();
14432
762
    assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
14433
762
    uint64_t Offset = Shift / 8;
14434
762
    unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
14435
762
    assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
14436
762
           "The size of the original loaded type is not a multiple of a"
14437
762
           " byte.");
14438
762
    // If Offset is bigger than TySizeInBytes, it means we are loading all
14439
762
    // zeros. This should have been optimized before in the process.
14440
762
    assert(TySizeInBytes > Offset &&
14441
762
           "Invalid shift amount for given loaded size");
14442
762
    if (IsBigEndian)
14443
61
      Offset = TySizeInBytes - Offset - getLoadedSize();
14444
762
    return Offset;
14445
762
  }
14446
14447
  /// Generate the sequence of instructions to load the slice
14448
  /// represented by this object and redirect the uses of this slice to
14449
  /// this new sequence of instructions.
14450
  /// \pre this->Inst && this->Origin are valid Instructions and this
14451
  /// object passed the legal check: LoadedSlice::isLegal returned true.
14452
  /// \return The last instruction of the sequence used to load the slice.
14453
10
  SDValue loadSlice() const {
14454
10
    assert(Inst && Origin && "Unable to replace a non-existing slice.");
14455
10
    const SDValue &OldBaseAddr = Origin->getBasePtr();
14456
10
    SDValue BaseAddr = OldBaseAddr;
14457
10
    // Get the offset in that chunk of bytes w.r.t. the endianness.
14458
10
    int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
14459
10
    assert(Offset >= 0 && "Offset too big to fit in int64_t!");
14460
10
    if (Offset) {
14461
5
      // BaseAddr = BaseAddr + Offset.
14462
5
      EVT ArithType = BaseAddr.getValueType();
14463
5
      SDLoc DL(Origin);
14464
5
      BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
14465
5
                              DAG->getConstant(Offset, DL, ArithType));
14466
5
    }
14467
10
14468
10
    // Create the type of the loaded slice according to its size.
14469
10
    EVT SliceType = getLoadedType();
14470
10
14471
10
    // Create the load for the slice.
14472
10
    SDValue LastInst =
14473
10
        DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
14474
10
                     Origin->getPointerInfo().getWithOffset(Offset),
14475
10
                     getAlignment(), Origin->getMemOperand()->getFlags());
14476
10
    // If the final type is not the same as the loaded type, this means that
14477
10
    // we have to pad with zero. Create a zero extend for that.
14478
10
    EVT FinalType = Inst->getValueType(0);
14479
10
    if (SliceType != FinalType)
14480
1
      LastInst =
14481
1
          DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
14482
10
    return LastInst;
14483
10
  }
14484
14485
  /// Check if this slice can be merged with an expensive cross register
14486
  /// bank copy. E.g.,
14487
  /// i = load i32
14488
  /// f = bitcast i32 i to float
14489
44
  bool canMergeExpensiveCrossRegisterBankCopy() const {
14490
44
    if (!Inst || !Inst->hasOneUse())
14491
4
      return false;
14492
40
    SDNode *Use = *Inst->use_begin();
14493
40
    if (Use->getOpcode() != ISD::BITCAST)
14494
36
      return false;
14495
4
    assert(DAG && "Missing context");
14496
4
    const TargetLowering &TLI = DAG->getTargetLoweringInfo();
14497
4
    EVT ResVT = Use->getValueType(0);
14498
4
    const TargetRegisterClass *ResRC =
14499
4
        TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
14500
4
    const TargetRegisterClass *ArgRC =
14501
4
        TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
14502
4
                           Use->getOperand(0)->isDivergent());
14503
4
    if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
14504
0
      return false;
14505
4
14506
4
    // At this point, we know that we perform a cross-register-bank copy.
14507
4
    // Check if it is expensive.
14508
4
    const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
14509
4
    // Assume bitcasts are cheap, unless both register classes do not
14510
4
    // explicitly share a common sub class.
14511
4
    if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
14512
0
      return false;
14513
4
14514
4
    // Check if it will be merged with the load.
14515
4
    // 1. Check the alignment constraint.
14516
4
    unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
14517
4
        ResVT.getTypeForEVT(*DAG->getContext()));
14518
4
14519
4
    if (RequiredAlignment > getAlignment())
14520
0
      return false;
14521
4
14522
4
    // 2. Check that the load is a legal operation for that type.
14523
4
    if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
14524
0
      return false;
14525
4
14526
4
    // 3. Check that we do not have a zext in the way.
14527
4
    if (Inst->getValueType(0) != getLoadedType())
14528
0
      return false;
14529
4
14530
4
    return true;
14531
4
  }
14532
};
14533
14534
} // end anonymous namespace
14535
14536
/// Check that all bits set in \p UsedBits form a dense region, i.e.,
14537
/// \p UsedBits looks like 0..0 1..1 0..0.
14538
26
static bool areUsedBitsDense(const APInt &UsedBits) {
14539
26
  // If all the bits are one, this is dense!
14540
26
  if (UsedBits.isAllOnesValue())
14541
24
    return true;
14542
2
14543
2
  // Get rid of the unused bits on the right.
14544
2
  APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
14545
2
  // Get rid of the unused bits on the left.
14546
2
  if (NarrowedUsedBits.countLeadingZeros())
14547
1
    NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
14548
2
  // Check that the chunk of bits is completely used.
14549
2
  return NarrowedUsedBits.isAllOnesValue();
14550
2
}
14551
14552
/// Check whether or not \p First and \p Second are next to each other
14553
/// in memory. This means that there is no hole between the bits loaded
14554
/// by \p First and the bits loaded by \p Second.
14555
static bool areSlicesNextToEachOther(const LoadedSlice &First,
14556
2
                                     const LoadedSlice &Second) {
14557
2
  assert(First.Origin == Second.Origin && First.Origin &&
14558
2
         "Unable to match different memory origins.");
14559
2
  APInt UsedBits = First.getUsedBits();
14560
2
  assert((UsedBits & Second.getUsedBits()) == 0 &&
14561
2
         "Slices are not supposed to overlap.");
14562
2
  UsedBits |= Second.getUsedBits();
14563
2
  return areUsedBitsDense(UsedBits);
14564
2
}
14565
14566
/// Adjust the \p GlobalLSCost according to the target
14567
/// paring capabilities and the layout of the slices.
14568
/// \pre \p GlobalLSCost should account for at least as many loads as
14569
/// there is in the slices in \p LoadedSlices.
14570
static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14571
22
                                 LoadedSlice::Cost &GlobalLSCost) {
14572
22
  unsigned NumberOfSlices = LoadedSlices.size();
14573
22
  // If there is less than 2 elements, no pairing is possible.
14574
22
  if (NumberOfSlices < 2)
14575
0
    return;
14576
22
14577
22
  // Sort the slices so that elements that are likely to be next to each
14578
22
  // other in memory are next to each other in the list.
14579
22
  llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
14580
22
    assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
14581
22
    return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
14582
22
  });
14583
22
  const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
14584
22
  // First (resp. Second) is the first (resp. Second) potentially candidate
14585
22
  // to be placed in a paired load.
14586
22
  const LoadedSlice *First = nullptr;
14587
22
  const LoadedSlice *Second = nullptr;
14588
66
  for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
14589
44
                // Set the beginning of the pair.
14590
44
                                                           First = Second) {
14591
44
    Second = &LoadedSlices[CurrSlice];
14592
44
14593
44
    // If First is NULL, it means we start a new pair.
14594
44
    // Get to the next slice.
14595
44
    if (!First)
14596
22
      continue;
14597
22
14598
22
    EVT LoadedType = First->getLoadedType();
14599
22
14600
22
    // If the types of the slices are different, we cannot pair them.
14601
22
    if (LoadedType != Second->getLoadedType())
14602
0
      continue;
14603
22
14604
22
    // Check if the target supplies paired loads for this type.
14605
22
    unsigned RequiredAlignment = 0;
14606
22
    if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
14607
20
      // move to the next pair, this type is hopeless.
14608
20
      Second = nullptr;
14609
20
      continue;
14610
20
    }
14611
2
    // Check if we meet the alignment requirement.
14612
2
    if (RequiredAlignment > First->getAlignment())
14613
0
      continue;
14614
2
14615
2
    // Check that both loads are next to each other in memory.
14616
2
    if (!areSlicesNextToEachOther(*First, *Second))
14617
0
      continue;
14618
2
14619
2
    assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
14620
2
    --GlobalLSCost.Loads;
14621
2
    // Move to the next pair.
14622
2
    Second = nullptr;
14623
2
  }
14624
22
}
14625
14626
/// Check the profitability of all involved LoadedSlice.
14627
/// Currently, it is considered profitable if there is exactly two
14628
/// involved slices (1) which are (2) next to each other in memory, and
14629
/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
14630
///
14631
/// Note: The order of the elements in \p LoadedSlices may be modified, but not
14632
/// the elements themselves.
14633
///
14634
/// FIXME: When the cost model will be mature enough, we can relax
14635
/// constraints (1) and (2).
14636
static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14637
433
                                const APInt &UsedBits, bool ForCodeSize) {
14638
433
  unsigned NumberOfSlices = LoadedSlices.size();
14639
433
  if (StressLoadSlicing)
14640
2
    return NumberOfSlices > 1;
14641
431
14642
431
  // Check (1).
14643
431
  if (NumberOfSlices != 2)
14644
407
    return false;
14645
24
14646
24
  // Check (2).
14647
24
  if (!areUsedBitsDense(UsedBits))
14648
2
    return false;
14649
22
14650
22
  // Check (3).
14651
22
  LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
14652
22
  // The original code has one big load.
14653
22
  OrigCost.Loads = 1;
14654
66
  for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; 
++CurrSlice44
) {
14655
44
    const LoadedSlice &LS = LoadedSlices[CurrSlice];
14656
44
    // Accumulate the cost of all the slices.
14657
44
    LoadedSlice::Cost SliceCost(LS, ForCodeSize);
14658
44
    GlobalSlicingCost += SliceCost;
14659
44
14660
44
    // Account as cost in the original configuration the gain obtained
14661
44
    // with the current slices.
14662
44
    OrigCost.addSliceGain(LS);
14663
44
  }
14664
22
14665
22
  // If the target supports paired load, adjust the cost accordingly.
14666
22
  adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
14667
22
  return OrigCost > GlobalSlicingCost;
14668
22
}
14669
14670
/// If the given load, \p LI, is used only by trunc or trunc(lshr)
14671
/// operations, split it in the various pieces being extracted.
14672
///
14673
/// This sort of thing is introduced by SROA.
14674
/// This slicing takes care not to insert overlapping loads.
14675
/// \pre LI is a simple load (i.e., not an atomic or volatile load).
14676
2.70M
bool DAGCombiner::SliceUpLoad(SDNode *N) {
14677
2.70M
  if (Level < AfterLegalizeDAG)
14678
1.78M
    return false;
14679
918k
14680
918k
  LoadSDNode *LD = cast<LoadSDNode>(N);
14681
918k
  if (LD->isVolatile() || 
!ISD::isNormalLoad(LD)886k
||
14682
918k
      
!LD->getValueType(0).isInteger()695k
)
14683
286k
    return false;
14684
631k
14685
631k
  // Keep track of already used bits to detect overlapping values.
14686
631k
  // In that case, we will just abort the transformation.
14687
631k
  APInt UsedBits(LD->getValueSizeInBits(0), 0);
14688
631k
14689
631k
  SmallVector<LoadedSlice, 4> LoadedSlices;
14690
631k
14691
631k
  // Check if this load is used as several smaller chunks of bits.
14692
631k
  // Basically, look for uses in trunc or trunc(lshr) and record a new chain
14693
631k
  // of computation for each trunc.
14694
631k
  for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
14695
747k
       UI != UIEnd; 
++UI115k
) {
14696
747k
    // Skip the uses of the chain.
14697
747k
    if (UI.getUse().getResNo() != 0)
14698
114k
      continue;
14699
632k
14700
632k
    SDNode *User = *UI;
14701
632k
    unsigned Shift = 0;
14702
632k
14703
632k
    // Check if this is a trunc(lshr).
14704
632k
    if (User->getOpcode() == ISD::SRL && 
User->hasOneUse()7.92k
&&
14705
632k
        
isa<ConstantSDNode>(User->getOperand(1))7.57k
) {
14706
2.34k
      Shift = User->getConstantOperandVal(1);
14707
2.34k
      User = *User->use_begin();
14708
2.34k
    }
14709
632k
14710
632k
    // At this point, User is a Truncate, iff we encountered, trunc or
14711
632k
    // trunc(lshr).
14712
632k
    if (User->getOpcode() != ISD::TRUNCATE)
14713
630k
      return false;
14714
1.78k
14715
1.78k
    // The width of the type must be a power of 2 and greater than 8-bits.
14716
1.78k
    // Otherwise the load cannot be represented in LLVM IR.
14717
1.78k
    // Moreover, if we shifted with a non-8-bits multiple, the slice
14718
1.78k
    // will be across several bytes. We do not support that.
14719
1.78k
    unsigned Width = User->getValueSizeInBits(0);
14720
1.78k
    if (Width < 8 || 
!isPowerOf2_32(Width)1.72k
||
(Shift & 0x7)1.72k
)
14721
208
      return false;
14722
1.57k
14723
1.57k
    // Build the slice for this chain of computations.
14724
1.57k
    LoadedSlice LS(User, LD, Shift, &DAG);
14725
1.57k
    APInt CurrentUsedBits = LS.getUsedBits();
14726
1.57k
14727
1.57k
    // Check if this slice overlaps with another.
14728
1.57k
    if ((CurrentUsedBits & UsedBits) != 0)
14729
13
      return false;
14730
1.56k
    // Update the bits used globally.
14731
1.56k
    UsedBits |= CurrentUsedBits;
14732
1.56k
14733
1.56k
    // Check if the new slice would be legal.
14734
1.56k
    if (!LS.isLegal())
14735
870
      return false;
14736
692
14737
692
    // Record the slice.
14738
692
    LoadedSlices.push_back(LS);
14739
692
  }
14740
631k
14741
631k
  // Abort slicing if it does not seem to be profitable.
14742
631k
  
if (433
!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)433
)
14743
428
    return false;
14744
5
14745
5
  ++SlicedLoads;
14746
5
14747
5
  // Rewrite each chain to use an independent load.
14748
5
  // By construction, each chain can be represented by a unique load.
14749
5
14750
5
  // Prepare the argument for the new token factor for all the slices.
14751
5
  SmallVector<SDValue, 8> ArgChains;
14752
5
  for (SmallVectorImpl<LoadedSlice>::const_iterator
14753
5
           LSIt = LoadedSlices.begin(),
14754
5
           LSItEnd = LoadedSlices.end();
14755
15
       LSIt != LSItEnd; 
++LSIt10
) {
14756
10
    SDValue SliceInst = LSIt->loadSlice();
14757
10
    CombineTo(LSIt->Inst, SliceInst, true);
14758
10
    if (SliceInst.getOpcode() != ISD::LOAD)
14759
1
      SliceInst = SliceInst.getOperand(0);
14760
10
    assert(SliceInst->getOpcode() == ISD::LOAD &&
14761
10
           "It takes more than a zext to get to the loaded slice!!");
14762
10
    ArgChains.push_back(SliceInst.getValue(1));
14763
10
  }
14764
5
14765
5
  SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
14766
5
                              ArgChains);
14767
5
  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14768
5
  AddToWorklist(Chain.getNode());
14769
5
  return true;
14770
5
}
14771
14772
/// Check to see if V is (and load (ptr), imm), where the load is having
14773
/// specific bytes cleared out.  If so, return the byte size being masked out
14774
/// and the shift amount.
14775
static std::pair<unsigned, unsigned>
14776
25.7k
CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
14777
25.7k
  std::pair<unsigned, unsigned> Result(0, 0);
14778
25.7k
14779
25.7k
  // Check for the structure we're looking for.
14780
25.7k
  if (V->getOpcode() != ISD::AND ||
14781
25.7k
      
!isa<ConstantSDNode>(V->getOperand(1))9.85k
||
14782
25.7k
      
!ISD::isNormalLoad(V->getOperand(0).getNode())6.04k
)
14783
22.7k
    return Result;
14784
3.06k
14785
3.06k
  // Check the chain and pointer.
14786
3.06k
  LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
14787
3.06k
  if (LD->getBasePtr() != Ptr) 
return Result891
; // Not from same pointer.
14788
2.17k
14789
2.17k
  // This only handles simple types.
14790
2.17k
  if (V.getValueType() != MVT::i16 &&
14791
2.17k
      
V.getValueType() != MVT::i321.84k
&&
14792
2.17k
      
V.getValueType() != MVT::i64376
)
14793
136
    return Result;
14794
2.03k
14795
2.03k
  // Check the constant mask.  Invert it so that the bits being masked out are
14796
2.03k
  // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
14797
2.03k
  // follow the sign bit for uniformity.
14798
2.03k
  uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
14799
2.03k
  unsigned NotMaskLZ = countLeadingZeros(NotMask);
14800
2.03k
  if (NotMaskLZ & 7) 
return Result698
; // Must be multiple of a byte.
14801
1.33k
  unsigned NotMaskTZ = countTrailingZeros(NotMask);
14802
1.33k
  if (NotMaskTZ & 7) 
return Result261
; // Must be multiple of a byte.
14803
1.07k
  if (NotMaskLZ == 64) 
return Result0
; // All zero mask.
14804
1.07k
14805
1.07k
  // See if we have a continuous run of bits.  If so, we have 0*1+0*
14806
1.07k
  if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
14807
2
    return Result;
14808
1.07k
14809
1.07k
  // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
14810
1.07k
  if (V.getValueType() != MVT::i64 && 
NotMaskLZ1.04k
)
14811
798
    NotMaskLZ -= 64-V.getValueSizeInBits();
14812
1.07k
14813
1.07k
  unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
14814
1.07k
  switch (MaskedBytes) {
14815
1.07k
  case 1:
14816
1.04k
  case 2:
14817
1.04k
  case 4: break;
14818
1.04k
  
default: return Result26
; // All one mask, or 5-byte mask.
14819
1.04k
  }
14820
1.04k
14821
1.04k
  // Verify that the first bit starts at a multiple of mask so that the access
14822
1.04k
  // is aligned the same as the access width.
14823
1.04k
  if (NotMaskTZ && 
NotMaskTZ/8 % MaskedBytes377
)
return Result5
;
14824
1.04k
14825
1.04k
  // For narrowing to be valid, it must be the case that the load the
14826
1.04k
  // immediately preceding memory operation before the store.
14827
1.04k
  if (LD == Chain.getNode())
14828
1.03k
    ; // ok.
14829
6
  else if (Chain->getOpcode() == ISD::TokenFactor &&
14830
6
           SDValue(LD, 1).hasOneUse()) {
14831
6
    // LD has only 1 chain use so they are no indirect dependencies.
14832
6
    if (!LD->isOperandOf(Chain.getNode()))
14833
0
      return Result;
14834
0
  } else
14835
0
    return Result; // Fail.
14836
1.04k
14837
1.04k
  Result.first = MaskedBytes;
14838
1.04k
  Result.second = NotMaskTZ/8;
14839
1.04k
  return Result;
14840
1.04k
}
14841
14842
/// Check to see if IVal is something that provides a value as specified by
14843
/// MaskInfo. If so, replace the specified store with a narrower store of
14844
/// truncated IVal.
14845
static SDValue
14846
ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
14847
                                SDValue IVal, StoreSDNode *St,
14848
1.04k
                                DAGCombiner *DC) {
14849
1.04k
  unsigned NumBytes = MaskInfo.first;
14850
1.04k
  unsigned ByteShift = MaskInfo.second;
14851
1.04k
  SelectionDAG &DAG = DC->getDAG();
14852
1.04k
14853
1.04k
  // Check to see if IVal is all zeros in the part being masked in by the 'or'
14854
1.04k
  // that uses this.  If not, this is not a replacement.
14855
1.04k
  APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
14856
1.04k
                                  ByteShift*8, (ByteShift+NumBytes)*8);
14857
1.04k
  if (!DAG.MaskedValueIsZero(IVal, Mask)) 
return SDValue()0
;
14858
1.04k
14859
1.04k
  // Check that it is legal on the target to do this.  It is legal if the new
14860
1.04k
  // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
14861
1.04k
  // legalization.
14862
1.04k
  MVT VT = MVT::getIntegerVT(NumBytes*8);
14863
1.04k
  if (!DC->isTypeLegal(VT))
14864
1.00k
    return SDValue();
14865
40
14866
40
  // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
14867
40
  // shifted by ByteShift and truncated down to NumBytes.
14868
40
  if (ByteShift) {
14869
26
    SDLoc DL(IVal);
14870
26
    IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
14871
26
                       DAG.getConstant(ByteShift*8, DL,
14872
26
                                    DC->getShiftAmountTy(IVal.getValueType())));
14873
26
  }
14874
40
14875
40
  // Figure out the offset for the store and the alignment of the access.
14876
40
  unsigned StOffset;
14877
40
  unsigned NewAlign = St->getAlignment();
14878
40
14879
40
  if (DAG.getDataLayout().isLittleEndian())
14880
39
    StOffset = ByteShift;
14881
1
  else
14882
1
    StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
14883
40
14884
40
  SDValue Ptr = St->getBasePtr();
14885
40
  if (StOffset) {
14886
27
    SDLoc DL(IVal);
14887
27
    Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
14888
27
                      Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
14889
27
    NewAlign = MinAlign(NewAlign, StOffset);
14890
27
  }
14891
40
14892
40
  // Truncate down to the new size.
14893
40
  IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
14894
40
14895
40
  ++OpsNarrowed;
14896
40
  return DAG
14897
40
      .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
14898
40
                St->getPointerInfo().getWithOffset(StOffset), NewAlign);
14899
40
}
14900
14901
/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
14902
/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
14903
/// narrowing the load and store if it would end up being a win for performance
14904
/// or code size.
14905
3.02M
SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
14906
3.02M
  StoreSDNode *ST  = cast<StoreSDNode>(N);
14907
3.02M
  if (ST->isVolatile())
14908
74.9k
    return SDValue();
14909
2.94M
14910
2.94M
  SDValue Chain = ST->getChain();
14911
2.94M
  SDValue Value = ST->getValue();
14912
2.94M
  SDValue Ptr   = ST->getBasePtr();
14913
2.94M
  EVT VT = Value.getValueType();
14914
2.94M
14915
2.94M
  if (ST->isTruncatingStore() || 
VT.isVector()2.65M
||
!Value.hasOneUse()1.77M
)
14916
1.88M
    return SDValue();
14917
1.06M
14918
1.06M
  unsigned Opc = Value.getOpcode();
14919
1.06M
14920
1.06M
  // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
14921
1.06M
  // is a byte mask indicating a consecutive number of bytes, check to see if
14922
1.06M
  // Y is known to provide just those bytes.  If so, we try to replace the
14923
1.06M
  // load + replace + store sequence with a single (narrower) store, which makes
14924
1.06M
  // the load dead.
14925
1.06M
  if (Opc == ISD::OR) {
14926
12.9k
    std::pair<unsigned, unsigned> MaskedLoad;
14927
12.9k
    MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
14928
12.9k
    if (MaskedLoad.first)
14929
1.03k
      if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14930
34
                                                  Value.getOperand(1), ST,this))
14931
34
        return NewST;
14932
12.8k
14933
12.8k
    // Or is commutative, so try swapping X and Y.
14934
12.8k
    MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
14935
12.8k
    if (MaskedLoad.first)
14936
6
      if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14937
6
                                                  Value.getOperand(0), ST,this))
14938
6
        return NewST;
14939
1.06M
  }
14940
1.06M
14941
1.06M
  if ((Opc != ISD::OR && 
Opc != ISD::XOR1.04M
&&
Opc != ISD::AND1.04M
) ||
14942
1.06M
      
Value.getOperand(1).getOpcode() != ISD::Constant24.3k
)
14943
1.05M
    return SDValue();
14944
11.2k
14945
11.2k
  SDValue N0 = Value.getOperand(0);
14946
11.2k
  if (ISD::isNormalLoad(N0.getNode()) && 
N0.hasOneUse()2.36k
&&
14947
11.2k
      
Chain == SDValue(N0.getNode(), 1)2.28k
) {
14948
1.25k
    LoadSDNode *LD = cast<LoadSDNode>(N0);
14949
1.25k
    if (LD->getBasePtr() != Ptr ||
14950
1.25k
        LD->getPointerInfo().getAddrSpace() !=
14951
961
        ST->getPointerInfo().getAddrSpace())
14952
292
      return SDValue();
14953
961
14954
961
    // Find the type to narrow it the load / op / store to.
14955
961
    SDValue N1 = Value.getOperand(1);
14956
961
    unsigned BitWidth = N1.getValueSizeInBits();
14957
961
    APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
14958
961
    if (Opc == ISD::AND)
14959
426
      Imm ^= APInt::getAllOnesValue(BitWidth);
14960
961
    if (Imm == 0 || Imm.isAllOnesValue())
14961
20
      return SDValue();
14962
941
    unsigned ShAmt = Imm.countTrailingZeros();
14963
941
    unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
14964
941
    unsigned NewBW = NextPowerOf2(MSB - ShAmt);
14965
941
    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14966
941
    // The narrowing should be profitable, the load/store operation should be
14967
941
    // legal (or custom) and the store size should be equal to the NewVT width.
14968
4.11k
    while (NewBW < BitWidth &&
14969
4.11k
           
(3.26k
NewVT.getStoreSizeInBits() != NewBW3.26k
||
14970
3.26k
            
!TLI.isOperationLegalOrCustom(Opc, NewVT)958
||
14971
3.26k
            
!TLI.isNarrowingProfitable(VT, NewVT)147
)) {
14972
3.17k
      NewBW = NextPowerOf2(NewBW);
14973
3.17k
      NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14974
3.17k
    }
14975
941
    if (NewBW >= BitWidth)
14976
855
      return SDValue();
14977
86
14978
86
    // If the lsb changed does not start at the type bitwidth boundary,
14979
86
    // start at the previous one.
14980
86
    if (ShAmt % NewBW)
14981
77
      ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
14982
86
    APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
14983
86
                                   std::min(BitWidth, ShAmt + NewBW));
14984
86
    if ((Imm & Mask) == Imm) {
14985
76
      APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
14986
76
      if (Opc == ISD::AND)
14987
46
        NewImm ^= APInt::getAllOnesValue(NewBW);
14988
76
      uint64_t PtrOff = ShAmt / 8;
14989
76
      // For big endian targets, we need to adjust the offset to the pointer to
14990
76
      // load the correct bytes.
14991
76
      if (DAG.getDataLayout().isBigEndian())
14992
0
        PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
14993
76
14994
76
      unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
14995
76
      Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
14996
76
      if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
14997
0
        return SDValue();
14998
76
14999
76
      SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
15000
76
                                   Ptr.getValueType(), Ptr,
15001
76
                                   DAG.getConstant(PtrOff, SDLoc(LD),
15002
76
                                                   Ptr.getValueType()));
15003
76
      SDValue NewLD =
15004
76
          DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
15005
76
                      LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
15006
76
                      LD->getMemOperand()->getFlags(), LD->getAAInfo());
15007
76
      SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
15008
76
                                   DAG.getConstant(NewImm, SDLoc(Value),
15009
76
                                                   NewVT));
15010
76
      SDValue NewST =
15011
76
          DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
15012
76
                       ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
15013
76
15014
76
      AddToWorklist(NewPtr.getNode());
15015
76
      AddToWorklist(NewLD.getNode());
15016
76
      AddToWorklist(NewVal.getNode());
15017
76
      WorklistRemover DeadNodes(*this);
15018
76
      DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
15019
76
      ++OpsNarrowed;
15020
76
      return NewST;
15021
76
    }
15022
86
  }
15023
10.0k
15024
10.0k
  return SDValue();
15025
10.0k
}
15026
15027
/// For a given floating point load / store pair, if the load value isn't used
15028
/// by any other operations, then consider transforming the pair to integer
15029
/// load / store operations if the target deems the transformation profitable.
15030
3.22M
SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
15031
3.22M
  StoreSDNode *ST  = cast<StoreSDNode>(N);
15032
3.22M
  SDValue Value = ST->getValue();
15033
3.22M
  if (ISD::isNormalStore(ST) && 
ISD::isNormalLoad(Value.getNode())2.91M
&&
15034
3.22M
      
Value.hasOneUse()492k
) {
15035
410k
    LoadSDNode *LD = cast<LoadSDNode>(Value);
15036
410k
    EVT VT = LD->getMemoryVT();
15037
410k
    if (!VT.isFloatingPoint() ||
15038
410k
        
VT != ST->getMemoryVT()165k
||
15039
410k
        
LD->isNonTemporal()165k
||
15040
410k
        
ST->isNonTemporal()164k
||
15041
410k
        
LD->getPointerInfo().getAddrSpace() != 0164k
||
15042
410k
        
ST->getPointerInfo().getAddrSpace() != 0163k
)
15043
247k
      return SDValue();
15044
163k
15045
163k
    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
15046
163k
    if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
15047
163k
        
!TLI.isOperationLegal(ISD::STORE, IntVT)2.09k
||
15048
163k
        
!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT)2.09k
||
15049
163k
        
!TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)53
)
15050
162k
      return SDValue();
15051
53
15052
53
    unsigned LDAlign = LD->getAlignment();
15053
53
    unsigned STAlign = ST->getAlignment();
15054
53
    Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
15055
53
    unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
15056
53
    if (LDAlign < ABIAlign || 
STAlign < ABIAlign40
)
15057
13
      return SDValue();
15058
40
15059
40
    SDValue NewLD =
15060
40
        DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
15061
40
                    LD->getPointerInfo(), LDAlign);
15062
40
15063
40
    SDValue NewST =
15064
40
        DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
15065
40
                     ST->getPointerInfo(), STAlign);
15066
40
15067
40
    AddToWorklist(NewLD.getNode());
15068
40
    AddToWorklist(NewST.getNode());
15069
40
    WorklistRemover DeadNodes(*this);
15070
40
    DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
15071
40
    ++LdStFP2Int;
15072
40
    return NewST;
15073
40
  }
15074
2.81M
15075
2.81M
  return SDValue();
15076
2.81M
}
15077
15078
// This is a helper function for visitMUL to check the profitability
15079
// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
15080
// MulNode is the original multiply, AddNode is (add x, c1),
15081
// and ConstNode is c2.
15082
//
15083
// If the (add x, c1) has multiple uses, we could increase
15084
// the number of adds if we make this transformation.
15085
// It would only be worth doing this if we can remove a
15086
// multiply in the process. Check for that here.
15087
// To illustrate:
15088
//     (A + c1) * c3
15089
//     (A + c2) * c3
15090
// We're checking for cases where we have common "c3 * A" expressions.
15091
bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
15092
                                              SDValue &AddNode,
15093
459
                                              SDValue &ConstNode) {
15094
459
  APInt Val;
15095
459
15096
459
  // If the add only has one use, this would be OK to do.
15097
459
  if (AddNode.getNode()->hasOneUse())
15098
364
    return true;
15099
95
15100
95
  // Walk all the users of the constant with which we're multiplying.
15101
177
  
for (SDNode *Use : ConstNode->uses())95
{
15102
177
    if (Use == MulNode) // This use is the one we're on right now. Skip it.
15103
84
      continue;
15104
93
15105
93
    if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
15106
70
      SDNode *OtherOp;
15107
70
      SDNode *MulVar = AddNode.getOperand(0).getNode();
15108
70
15109
70
      // OtherOp is what we're multiplying against the constant.
15110
70
      if (Use->getOperand(0) == ConstNode)
15111
0
        OtherOp = Use->getOperand(1).getNode();
15112
70
      else
15113
70
        OtherOp = Use->getOperand(0).getNode();
15114
70
15115
70
      // Check to see if multiply is with the same operand of our "add".
15116
70
      //
15117
70
      //     ConstNode  = CONST
15118
70
      //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
15119
70
      //     ...
15120
70
      //     AddNode  = (A + c1)  <-- MulVar is A.
15121
70
      //         = AddNode * ConstNode   <-- current visiting instruction.
15122
70
      //
15123
70
      // If we make this transformation, we will have a common
15124
70
      // multiply (ConstNode * A) that we can save.
15125
70
      if (OtherOp == MulVar)
15126
15
        return true;
15127
55
15128
55
      // Now check to see if a future expansion will give us a common
15129
55
      // multiply.
15130
55
      //
15131
55
      //     ConstNode  = CONST
15132
55
      //     AddNode    = (A + c1)
15133
55
      //     ...   = AddNode * ConstNode <-- current visiting instruction.
15134
55
      //     ...
15135
55
      //     OtherOp = (A + c2)
15136
55
      //     Use     = OtherOp * ConstNode <-- visiting Use.
15137
55
      //
15138
55
      // If we make this transformation, we will have a common
15139
55
      // multiply (CONST * A) after we also do the same transformation
15140
55
      // to the "t2" instruction.
15141
55
      if (OtherOp->getOpcode() == ISD::ADD &&
15142
55
          
DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1))40
&&
15143
55
          
OtherOp->getOperand(0).getNode() == MulVar30
)
15144
0
        return true;
15145
55
    }
15146
93
  }
15147
95
15148
95
  // Didn't find a case where this would be profitable.
15149
95
  
return false80
;
15150
95
}
15151
15152
SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
15153
11.0k
                                         unsigned NumStores) {
15154
11.0k
  SmallVector<SDValue, 8> Chains;
15155
11.0k
  SmallPtrSet<const SDNode *, 8> Visited;
15156
11.0k
  SDLoc StoreDL(StoreNodes[0].MemNode);
15157
11.0k
15158
35.1k
  for (unsigned i = 0; i < NumStores; 
++i24.0k
) {
15159
24.0k
    Visited.insert(StoreNodes[i].MemNode);
15160
24.0k
  }
15161
11.0k
15162
11.0k
  // don't include nodes that are children or repeated nodes.
15163
35.1k
  for (unsigned i = 0; i < NumStores; 
++i24.0k
) {
15164
24.0k
    if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
15165
11.3k
      Chains.push_back(StoreNodes[i].MemNode->getChain());
15166
24.0k
  }
15167
11.0k
15168
11.0k
  assert(Chains.size() > 0 && "Chain should have generated a chain");
15169
11.0k
  return DAG.getTokenFactor(StoreDL, Chains);
15170
11.0k
}
15171
15172
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
15173
    SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
15174
10.5k
    bool IsConstantSrc, bool UseVector, bool UseTrunc) {
15175
10.5k
  // Make sure we have something to merge.
15176
10.5k
  if (NumStores < 2)
15177
0
    return false;
15178
10.5k
15179
10.5k
  // The latest Node in the DAG.
15180
10.5k
  SDLoc DL(StoreNodes[0].MemNode);
15181
10.5k
15182
10.5k
  int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
15183
10.5k
  unsigned SizeInBits = NumStores * ElementSizeBits;
15184
10.5k
  unsigned NumMemElts = MemVT.isVector() ? 
MemVT.getVectorNumElements()26
:
110.5k
;
15185
10.5k
15186
10.5k
  EVT StoreTy;
15187
10.5k
  if (UseVector) {
15188
2.21k
    unsigned Elts = NumStores * NumMemElts;
15189
2.21k
    // Get the type for the merged vector store.
15190
2.21k
    StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15191
2.21k
  } else
15192
8.37k
    StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
15193
10.5k
15194
10.5k
  SDValue StoredVal;
15195
10.5k
  if (UseVector) {
15196
2.21k
    if (IsConstantSrc) {
15197
1.97k
      SmallVector<SDValue, 8> BuildVector;
15198
6.21k
      for (unsigned I = 0; I != NumStores; 
++I4.23k
) {
15199
4.23k
        StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
15200
4.23k
        SDValue Val = St->getValue();
15201
4.23k
        // If constant is of the wrong type, convert it now.
15202
4.23k
        if (MemVT != Val.getValueType()) {
15203
6
          Val = peekThroughBitcasts(Val);
15204
6
          // Deal with constants of wrong size.
15205
6
          if (ElementSizeBits != Val.getValueSizeInBits()) {
15206
0
            EVT IntMemVT =
15207
0
                EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
15208
0
            if (isa<ConstantFPSDNode>(Val)) {
15209
0
              // Not clear how to truncate FP values.
15210
0
              return false;
15211
0
            } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
15212
0
              Val = DAG.getConstant(C->getAPIntValue()
15213
0
                                        .zextOrTrunc(Val.getValueSizeInBits())
15214
0
                                        .zextOrTrunc(ElementSizeBits),
15215
0
                                    SDLoc(C), IntMemVT);
15216
0
          }
15217
6
          // Make sure correctly size type is the correct type.
15218
6
          Val = DAG.getBitcast(MemVT, Val);
15219
6
        }
15220
4.23k
        BuildVector.push_back(Val);
15221
4.23k
      }
15222
1.97k
      StoredVal = DAG.getNode(MemVT.isVector() ? 
ISD::CONCAT_VECTORS0
15223
1.97k
                                               : ISD::BUILD_VECTOR,
15224
1.97k
                              DL, StoreTy, BuildVector);
15225
1.97k
    } else {
15226
233
      SmallVector<SDValue, 8> Ops;
15227
803
      for (unsigned i = 0; i < NumStores; 
++i570
) {
15228
570
        StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15229
570
        SDValue Val = peekThroughBitcasts(St->getValue());
15230
570
        // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
15231
570
        // type MemVT. If the underlying value is not the correct
15232
570
        // type, but it is an extraction of an appropriate vector we
15233
570
        // can recast Val to be of the correct type. This may require
15234
570
        // converting between EXTRACT_VECTOR_ELT and
15235
570
        // EXTRACT_SUBVECTOR.
15236
570
        if ((MemVT != Val.getValueType()) &&
15237
570
            
(26
Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT26
||
15238
26
             
Val.getOpcode() == ISD::EXTRACT_SUBVECTOR5
)) {
15239
26
          EVT MemVTScalarTy = MemVT.getScalarType();
15240
26
          // We may need to add a bitcast here to get types to line up.
15241
26
          if (MemVTScalarTy != Val.getValueType().getScalarType()) {
15242
26
            Val = DAG.getBitcast(MemVT, Val);
15243
26
          } else {
15244
0
            unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
15245
0
                                            : ISD::EXTRACT_VECTOR_ELT;
15246
0
            SDValue Vec = Val.getOperand(0);
15247
0
            SDValue Idx = Val.getOperand(1);
15248
0
            Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
15249
0
          }
15250
26
        }
15251
570
        Ops.push_back(Val);
15252
570
      }
15253
233
15254
233
      // Build the extracted vector elements back into a vector.
15255
233
      StoredVal = DAG.getNode(MemVT.isVector() ? 
ISD::CONCAT_VECTORS26
15256
233
                                               : 
ISD::BUILD_VECTOR207
,
15257
233
                              DL, StoreTy, Ops);
15258
233
    }
15259
8.37k
  } else {
15260
8.37k
    // We should always use a vector store when merging extracted vector
15261
8.37k
    // elements, so this path implies a store of constants.
15262
8.37k
    assert(IsConstantSrc && "Merged vector elements should use vector store");
15263
8.37k
15264
8.37k
    APInt StoreInt(SizeInBits, 0);
15265
8.37k
15266
8.37k
    // Construct a single integer constant which is made of the smaller
15267
8.37k
    // constant inputs.
15268
8.37k
    bool IsLE = DAG.getDataLayout().isLittleEndian();
15269
26.3k
    for (unsigned i = 0; i < NumStores; 
++i17.9k
) {
15270
17.9k
      unsigned Idx = IsLE ? 
(NumStores - 1 - i)17.8k
:
i118
;
15271
17.9k
      StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
15272
17.9k
15273
17.9k
      SDValue Val = St->getValue();
15274
17.9k
      Val = peekThroughBitcasts(Val);
15275
17.9k
      StoreInt <<= ElementSizeBits;
15276
17.9k
      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
15277
17.9k
        StoreInt |= C->getAPIntValue()
15278
17.9k
                        .zextOrTrunc(ElementSizeBits)
15279
17.9k
                        .zextOrTrunc(SizeInBits);
15280
17.9k
      } else 
if (ConstantFPSDNode *69
C69
= dyn_cast<ConstantFPSDNode>(Val)) {
15281
69
        StoreInt |= C->getValueAPF()
15282
69
                        .bitcastToAPInt()
15283
69
                        .zextOrTrunc(ElementSizeBits)
15284
69
                        .zextOrTrunc(SizeInBits);
15285
69
        // If fp truncation is necessary give up for now.
15286
69
        if (MemVT.getSizeInBits() != ElementSizeBits)
15287
0
          return false;
15288
0
      } else {
15289
0
        llvm_unreachable("Invalid constant element type");
15290
0
      }
15291
17.9k
    }
15292
8.37k
15293
8.37k
    // Create the new Load and Store operations.
15294
8.37k
    StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
15295
8.37k
  }
15296
10.5k
15297
10.5k
  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15298
10.5k
  SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
15299
10.5k
15300
10.5k
  // make sure we use trunc store if it's necessary to be legal.
15301
10.5k
  SDValue NewStore;
15302
10.5k
  if (!UseTrunc) {
15303
10.0k
    NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
15304
10.0k
                            FirstInChain->getPointerInfo(),
15305
10.0k
                            FirstInChain->getAlignment());
15306
10.0k
  } else { // Must be realized as a trunc store
15307
506
    EVT LegalizedStoredValTy =
15308
506
        TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
15309
506
    unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
15310
506
    ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
15311
506
    SDValue ExtendedStoreVal =
15312
506
        DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
15313
506
                        LegalizedStoredValTy);
15314
506
    NewStore = DAG.getTruncStore(
15315
506
        NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
15316
506
        FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
15317
506
        FirstInChain->getAlignment(),
15318
506
        FirstInChain->getMemOperand()->getFlags());
15319
506
  }
15320
10.5k
15321
10.5k
  // Replace all merged stores with the new store.
15322
33.3k
  for (unsigned i = 0; i < NumStores; 
++i22.7k
)
15323
22.7k
    CombineTo(StoreNodes[i].MemNode, NewStore);
15324
10.5k
15325
10.5k
  AddToWorklist(NewChain.getNode());
15326
10.5k
  return true;
15327
10.5k
}
15328
15329
void DAGCombiner::getStoreMergeCandidates(
15330
    StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
15331
871k
    SDNode *&RootNode) {
15332
871k
  // This holds the base pointer, index, and the offset in bytes from the base
15333
871k
  // pointer.
15334
871k
  BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
15335
871k
  EVT MemVT = St->getMemoryVT();
15336
871k
15337
871k
  SDValue Val = peekThroughBitcasts(St->getValue());
15338
871k
  // We must have a base and an offset.
15339
871k
  if (!BasePtr.getBase().getNode())
15340
3
    return;
15341
871k
15342
871k
  // Do not handle stores to undef base pointers.
15343
871k
  if (BasePtr.getBase().isUndef())
15344
1.35k
    return;
15345
870k
15346
870k
  bool IsConstantSrc = isa<ConstantSDNode>(Val) || 
isa<ConstantFPSDNode>(Val)434k
;
15347
870k
  bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15348
870k
                          
Val.getOpcode() == ISD::EXTRACT_SUBVECTOR847k
);
15349
870k
  bool IsLoadSrc = isa<LoadSDNode>(Val);
15350
870k
  BaseIndexOffset LBasePtr;
15351
870k
  // Match on loadbaseptr if relevant.
15352
870k
  EVT LoadVT;
15353
870k
  if (IsLoadSrc) {
15354
407k
    auto *Ld = cast<LoadSDNode>(Val);
15355
407k
    LBasePtr = BaseIndexOffset::match(Ld, DAG);
15356
407k
    LoadVT = Ld->getMemoryVT();
15357
407k
    // Load and store should be the same type.
15358
407k
    if (MemVT != LoadVT)
15359
30.0k
      return;
15360
377k
    // Loads must only have one use.
15361
377k
    if (!Ld->hasNUsesOfValue(1, 0))
15362
214k
      return;
15363
162k
    // The memory operands must not be volatile/indexed.
15364
162k
    if (Ld->isVolatile() || 
Ld->isIndexed()155k
)
15365
7.72k
      return;
15366
617k
  }
15367
617k
  auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
15368
2.84M
                            int64_t &Offset) -> bool {
15369
2.84M
    // The memory operands must not be volatile/indexed.
15370
2.84M
    if (Other->isVolatile() || 
Other->isIndexed()2.76M
)
15371
88.6k
      return false;
15372
2.75M
    // Don't mix temporal stores with non-temporal stores.
15373
2.75M
    if (St->isNonTemporal() != Other->isNonTemporal())
15374
4
      return false;
15375
2.75M
    SDValue OtherBC = peekThroughBitcasts(Other->getValue());
15376
2.75M
    // Allow merging constants of different types as integers.
15377
2.75M
    bool NoTypeMatch = (MemVT.isInteger()) ? 
!MemVT.bitsEq(Other->getMemoryVT())2.70M
15378
2.75M
                                           : 
Other->getMemoryVT() != MemVT49.1k
;
15379
2.75M
    if (IsLoadSrc) {
15380
483k
      if (NoTypeMatch)
15381
66.6k
        return false;
15382
416k
      // The Load's Base Ptr must also match
15383
416k
      if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
15384
363k
        BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
15385
363k
        if (LoadVT != OtherLd->getMemoryVT())
15386
1.05k
          return false;
15387
362k
        // Loads must only have one use.
15388
362k
        if (!OtherLd->hasNUsesOfValue(1, 0))
15389
3.70k
          return false;
15390
358k
        // The memory operands must not be volatile/indexed.
15391
358k
        if (OtherLd->isVolatile() || 
OtherLd->isIndexed()358k
)
15392
49
          return false;
15393
358k
        // Don't mix temporal loads with non-temporal loads.
15394
358k
        if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
15395
4
          return false;
15396
358k
        if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
15397
26.2k
          return false;
15398
53.6k
      } else
15399
53.6k
        return false;
15400
2.60M
    }
15401
2.60M
    if (IsConstantSrc) {
15402
2.19M
      if (NoTypeMatch)
15403
517k
        return false;
15404
1.67M
      if (!(isa<ConstantSDNode>(OtherBC) || 
isa<ConstantFPSDNode>(OtherBC)376k
))
15405
368k
        return false;
15406
1.71M
    }
15407
1.71M
    if (IsExtractVecSrc) {
15408
77.1k
      // Do not merge truncated stores here.
15409
77.1k
      if (Other->isTruncatingStore())
15410
26.9k
        return false;
15411
50.1k
      if (!MemVT.bitsEq(OtherBC.getValueType()))
15412
8.77k
        return false;
15413
41.3k
      if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
15414
41.3k
          
OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR1.98k
)
15415
1.15k
        return false;
15416
1.68M
    }
15417
1.68M
    Ptr = BaseIndexOffset::match(Other, DAG);
15418
1.68M
    return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
15419
1.68M
  };
15420
617k
15421
617k
  // We looking for a root node which is an ancestor to all mergable
15422
617k
  // stores. We search up through a load, to our root and then down
15423
617k
  // through all children. For instance we will find Store{1,2,3} if
15424
617k
  // St is Store1, Store2. or Store3 where the root is not a load
15425
617k
  // which always true for nonvolatile ops. TODO: Expand
15426
617k
  // the search to find all valid candidates through multiple layers of loads.
15427
617k
  //
15428
617k
  // Root
15429
617k
  // |-------|-------|
15430
617k
  // Load    Load    Store3
15431
617k
  // |       |
15432
617k
  // Store1   Store2
15433
617k
  //
15434
617k
  // FIXME: We should be able to climb and
15435
617k
  // descend TokenFactors to find candidates as well.
15436
617k
15437
617k
  RootNode = St->getChain().getNode();
15438
617k
15439
617k
  unsigned NumNodesExplored = 0;
15440
617k
  if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
15441
76.1k
    RootNode = Ldn->getChain().getNode();
15442
76.1k
    for (auto I = RootNode->use_begin(), E = RootNode->use_end();
15443
343k
         I != E && 
NumNodesExplored < 1024266k
;
++I, ++NumNodesExplored266k
)
15444
266k
      if (I.getOperandNo() == 0 && 
isa<LoadSDNode>(*I)263k
) // walk down chain
15445
355k
        
for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); 94.0k
I2 != E2;
++I2261k
)
15446
261k
          if (I2.getOperandNo() == 0)
15447
161k
            if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
15448
137k
              BaseIndexOffset Ptr;
15449
137k
              int64_t PtrDiff;
15450
137k
              if (CandidateMatch(OtherST, Ptr, PtrDiff))
15451
90.2k
                StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
15452
137k
            }
15453
76.1k
  } else
15454
541k
    for (auto I = RootNode->use_begin(), E = RootNode->use_end();
15455
4.08M
         I != E && 
NumNodesExplored < 10243.54M
;
++I, ++NumNodesExplored3.54M
)
15456
3.54M
      if (I.getOperandNo() == 0)
15457
3.50M
        if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
15458
2.70M
          BaseIndexOffset Ptr;
15459
2.70M
          int64_t PtrDiff;
15460
2.70M
          if (CandidateMatch(OtherST, Ptr, PtrDiff))
15461
1.55M
            StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
15462
2.70M
        }
15463
617k
}
15464
15465
// We need to check that merging these stores does not cause a loop in
15466
// the DAG. Any store candidate may depend on another candidate
15467
// indirectly through its operand (we already consider dependencies
15468
// through the chain). Check in parallel by searching up from
15469
// non-chain operands of candidates.
15470
bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
15471
    SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
15472
11.0k
    SDNode *RootNode) {
15473
11.0k
  // FIXME: We should be able to truncate a full search of
15474
11.0k
  // predecessors by doing a BFS and keeping tabs the originating
15475
11.0k
  // stores from which worklist nodes come from in a similar way to
15476
11.0k
  // TokenFactor simplfication.
15477
11.0k
15478
11.0k
  SmallPtrSet<const SDNode *, 32> Visited;
15479
11.0k
  SmallVector<const SDNode *, 8> Worklist;
15480
11.0k
15481
11.0k
  // RootNode is a predecessor to all candidates so we need not search
15482
11.0k
  // past it. Add RootNode (peeking through TokenFactors). Do not count
15483
11.0k
  // these towards size check.
15484
11.0k
15485
11.0k
  Worklist.push_back(RootNode);
15486
27.9k
  while (!Worklist.empty()) {
15487
16.9k
    auto N = Worklist.pop_back_val();
15488
16.9k
    if (!Visited.insert(N).second)
15489
0
      continue; // Already present in Visited.
15490
16.9k
    if (N->getOpcode() == ISD::TokenFactor) {
15491
1.78k
      for (SDValue Op : N->ops())
15492
5.85k
        Worklist.push_back(Op.getNode());
15493
1.78k
    }
15494
16.9k
  }
15495
11.0k
15496
11.0k
  // Don't count pruning nodes towards max.
15497
11.0k
  unsigned int Max = 1024 + Visited.size();
15498
11.0k
  // Search Ops of store candidates.
15499
35.1k
  for (unsigned i = 0; i < NumStores; 
++i24.0k
) {
15500
24.0k
    SDNode *N = StoreNodes[i].MemNode;
15501
24.0k
    // Of the 4 Store Operands:
15502
24.0k
    //   * Chain (Op 0) -> We have already considered these
15503
24.0k
    //                    in candidate selection and can be
15504
24.0k
    //                    safely ignored
15505
24.0k
    //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
15506
24.0k
    //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
15507
24.0k
    //                       but aren't necessarily fromt the same base node, so
15508
24.0k
    //                       cycles possible (e.g. via indexed store).
15509
24.0k
    //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
15510
24.0k
    //               non-indexed stores). Not constant on all targets (e.g. ARM)
15511
24.0k
    //               and so can participate in a cycle.
15512
96.1k
    for (unsigned j = 1; j < N->getNumOperands(); 
++j72.1k
)
15513
72.1k
      Worklist.push_back(N->getOperand(j).getNode());
15514
24.0k
  }
15515
11.0k
  // Search through DAG. We can stop early if we find a store node.
15516
35.1k
  for (unsigned i = 0; i < NumStores; 
++i24.0k
)
15517
24.0k
    if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
15518
24.0k
                                     Max))
15519
0
      return false;
15520
11.0k
  return true;
15521
11.0k
}
15522
15523
2.54M
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
15524
2.54M
  if (OptLevel == CodeGenOpt::None)
15525
46.6k
    return false;
15526
2.49M
15527
2.49M
  EVT MemVT = St->getMemoryVT();
15528
2.49M
  int64_t ElementSizeBytes = MemVT.getStoreSize();
15529
2.49M
  unsigned NumMemElts = MemVT.isVector() ? 
MemVT.getVectorNumElements()699k
:
11.79M
;
15530
2.49M
15531
2.49M
  if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
15532
822k
    return false;
15533
1.67M
15534
1.67M
  bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
15535
1.67M
      Attribute::NoImplicitFloat);
15536
1.67M
15537
1.67M
  // This function cannot currently deal with non-byte-sized memory sizes.
15538
1.67M
  if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
15539
21.9k
    return false;
15540
1.65M
15541
1.65M
  if (!MemVT.isSimple())
15542
1.98k
    return false;
15543
1.65M
15544
1.65M
  // Perform an early exit check. Do not bother looking at stored values that
15545
1.65M
  // are not constants, loads, or extracted vector elements.
15546
1.65M
  SDValue StoredVal = peekThroughBitcasts(St->getValue());
15547
1.65M
  bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
15548
1.65M
  bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
15549
1.65M
                       
isa<ConstantFPSDNode>(StoredVal)1.21M
;
15550
1.65M
  bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15551
1.65M
                          
StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR1.62M
);
15552
1.65M
  bool IsNonTemporalStore = St->isNonTemporal();
15553
1.65M
  bool IsNonTemporalLoad =
15554
1.65M
      IsLoadSrc && 
cast<LoadSDNode>(StoredVal)->isNonTemporal()407k
;
15555
1.65M
15556
1.65M
  if (!IsConstantSrc && 
!IsLoadSrc1.21M
&&
!IsExtractVecSrc802k
)
15557
778k
    return false;
15558
871k
15559
871k
  SmallVector<MemOpLink, 8> StoreNodes;
15560
871k
  SDNode *RootNode;
15561
871k
  // Find potential store merge candidates by searching through chain sub-DAG
15562
871k
  getStoreMergeCandidates(St, StoreNodes, RootNode);
15563
871k
15564
871k
  // Check if there is anything to merge.
15565
871k
  if (StoreNodes.size() < 2)
15566
603k
    return false;
15567
267k
15568
267k
  // Sort the memory operands according to their distance from the
15569
267k
  // base pointer.
15570
4.11M
  
llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) 267k
{
15571
4.11M
    return LHS.OffsetFromBase < RHS.OffsetFromBase;
15572
4.11M
  });
15573
267k
15574
267k
  // Store Merge attempts to merge the lowest stores. This generally
15575
267k
  // works out as if successful, as the remaining stores are checked
15576
267k
  // after the first collection of stores is merged. However, in the
15577
267k
  // case that a non-mergeable store is found first, e.g., {p[-2],
15578
267k
  // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
15579
267k
  // mergeable cases. To prevent this, we prune such stores from the
15580
267k
  // front of StoreNodes here.
15581
267k
15582
267k
  bool RV = false;
15583
487k
  while (StoreNodes.size() > 1) {
15584
319k
    unsigned StartIdx = 0;
15585
638k
    while ((StartIdx + 1 < StoreNodes.size()) &&
15586
638k
           StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
15587
539k
               StoreNodes[StartIdx + 1].OffsetFromBase)
15588
319k
      ++StartIdx;
15589
319k
15590
319k
    // Bail if we don't have enough candidates to merge.
15591
319k
    if (StartIdx + 1 >= StoreNodes.size())
15592
99.2k
      return RV;
15593
219k
15594
219k
    if (StartIdx)
15595
44.3k
      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
15596
219k
15597
219k
    // Scan the memory operations on the chain and find the first
15598
219k
    // non-consecutive store memory address.
15599
219k
    unsigned NumConsecutiveStores = 1;
15600
219k
    int64_t StartAddress = StoreNodes[0].OffsetFromBase;
15601
219k
    // Check that the addresses are consecutive starting from the second
15602
219k
    // element in the list of stores.
15603
895k
    for (unsigned i = 1, e = StoreNodes.size(); i < e; 
++i675k
) {
15604
753k
      int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
15605
753k
      if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15606
77.6k
        break;
15607
675k
      NumConsecutiveStores = i + 1;
15608
675k
    }
15609
219k
15610
219k
    if (NumConsecutiveStores < 2) {
15611
0
      StoreNodes.erase(StoreNodes.begin(),
15612
0
                       StoreNodes.begin() + NumConsecutiveStores);
15613
0
      continue;
15614
0
    }
15615
219k
15616
219k
    // The node with the lowest store address.
15617
219k
    LLVMContext &Context = *DAG.getContext();
15618
219k
    const DataLayout &DL = DAG.getDataLayout();
15619
219k
15620
219k
    // Store the constants into memory as one consecutive store.
15621
219k
    if (IsConstantSrc) {
15622
385k
      while (NumConsecutiveStores >= 2) {
15623
205k
        LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15624
205k
        unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15625
205k
        unsigned FirstStoreAlign = FirstInChain->getAlignment();
15626
205k
        unsigned LastLegalType = 1;
15627
205k
        unsigned LastLegalVectorType = 1;
15628
205k
        bool LastIntegerTrunc = false;
15629
205k
        bool NonZero = false;
15630
205k
        unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
15631
778k
        for (unsigned i = 0; i < NumConsecutiveStores; 
++i572k
) {
15632
603k
          StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
15633
603k
          SDValue StoredVal = ST->getValue();
15634
603k
          bool IsElementZero = false;
15635
603k
          if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
15636
602k
            IsElementZero = C->isNullValue();
15637
1.13k
          else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
15638
1.12k
            IsElementZero = C->getConstantFPValue()->isNullValue();
15639
603k
          if (IsElementZero) {
15640
493k
            if (NonZero && 
FirstZeroAfterNonZero == NumConsecutiveStores55.4k
)
15641
31.0k
              FirstZeroAfterNonZero = i;
15642
493k
          }
15643
603k
          NonZero |= !IsElementZero;
15644
603k
15645
603k
          // Find a legal type for the constant store.
15646
603k
          unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15647
603k
          EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15648
603k
          bool IsFast = false;
15649
603k
15650
603k
          // Break early when size is too large to be legal.
15651
603k
          if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15652
30.6k
            break;
15653
572k
15654
572k
          if (TLI.isTypeLegal(StoreTy) &&
15655
572k
              
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG)216k
&&
15656
572k
              TLI.allowsMemoryAccess(Context, DL, StoreTy,
15657
215k
                                     *FirstInChain->getMemOperand(), &IsFast) &&
15658
572k
              
IsFast208k
) {
15659
208k
            LastIntegerTrunc = false;
15660
208k
            LastLegalType = i + 1;
15661
208k
            // Or check whether a truncstore is legal.
15662
363k
          } else if (TLI.getTypeAction(Context, StoreTy) ==
15663
363k
                     TargetLowering::TypePromoteInteger) {
15664
116k
            EVT LegalizedStoredValTy =
15665
116k
                TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
15666
116k
            if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15667
116k
                
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG)6.24k
&&
15668
116k
                TLI.allowsMemoryAccess(Context, DL, StoreTy,
15669
6.24k
                                       *FirstInChain->getMemOperand(),
15670
6.24k
                                       &IsFast) &&
15671
116k
                
IsFast4.64k
) {
15672
4.64k
              LastIntegerTrunc = true;
15673
4.64k
              LastLegalType = i + 1;
15674
4.64k
            }
15675
116k
          }
15676
572k
15677
572k
          // We only use vectors if the constant is known to be zero or the
15678
572k
          // target allows it and the function is not marked with the
15679
572k
          // noimplicitfloat attribute.
15680
572k
          if ((!NonZero ||
15681
572k
               
TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)153k
) &&
15682
572k
              
!NoVectors458k
) {
15683
458k
            // Find a legal type for the vector store.
15684
458k
            unsigned Elts = (i + 1) * NumMemElts;
15685
458k
            EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15686
458k
            if (TLI.isTypeLegal(Ty) && 
TLI.isTypeLegal(MemVT)142k
&&
15687
458k
                
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG)142k
&&
15688
458k
                TLI.allowsMemoryAccess(
15689
44.7k
                    Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
15690
458k
                
IsFast40.3k
)
15691
6.81k
              LastLegalVectorType = i + 1;
15692
458k
          }
15693
572k
        }
15694
205k
15695
205k
        bool UseVector = (LastLegalVectorType > LastLegalType) && 
!NoVectors1.97k
;
15696
205k
        unsigned NumElem = (UseVector) ? 
LastLegalVectorType1.97k
:
LastLegalType203k
;
15697
205k
15698
205k
        // Check if we found a legal integer type that creates a meaningful
15699
205k
        // merge.
15700
205k
        if (NumElem < 2) {
15701
194k
          // We know that candidate stores are in order and of correct
15702
194k
          // shape. While there is no mergeable sequence from the
15703
194k
          // beginning one may start later in the sequence. The only
15704
194k
          // reason a merge of size N could have failed where another of
15705
194k
          // the same size would not have, is if the alignment has
15706
194k
          // improved or we've dropped a non-zero value. Drop as many
15707
194k
          // candidates as we can here.
15708
194k
          unsigned NumSkip = 1;
15709
194k
          while (
15710
615k
              (NumSkip < NumConsecutiveStores) &&
15711
615k
              
(NumSkip < FirstZeroAfterNonZero)477k
&&
15712
615k
              
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)449k
)
15713
420k
            NumSkip++;
15714
194k
15715
194k
          StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15716
194k
          NumConsecutiveStores -= NumSkip;
15717
194k
          continue;
15718
194k
        }
15719
10.3k
15720
10.3k
        // Check that we can merge these candidates without causing a cycle.
15721
10.3k
        if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15722
10.3k
                                                      RootNode)) {
15723
0
          StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15724
0
          NumConsecutiveStores -= NumElem;
15725
0
          continue;
15726
0
        }
15727
10.3k
15728
10.3k
        RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
15729
10.3k
                                              UseVector, LastIntegerTrunc);
15730
10.3k
15731
10.3k
        // Remove merged stores for next iteration.
15732
10.3k
        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15733
10.3k
        NumConsecutiveStores -= NumElem;
15734
10.3k
      }
15735
180k
      continue;
15736
180k
    }
15737
39.1k
15738
39.1k
    // When extracting multiple vector elements, try to store them
15739
39.1k
    // in one vector store rather than a sequence of scalar stores.
15740
39.1k
    if (IsExtractVecSrc) {
15741
3.23k
      // Loop on Consecutive Stores on success.
15742
6.56k
      while (NumConsecutiveStores >= 2) {
15743
3.33k
        LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15744
3.33k
        unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15745
3.33k
        unsigned FirstStoreAlign = FirstInChain->getAlignment();
15746
3.33k
        unsigned NumStoresToMerge = 1;
15747
15.1k
        for (unsigned i = 0; i < NumConsecutiveStores; 
++i11.8k
) {
15748
12.6k
          // Find a legal type for the vector store.
15749
12.6k
          unsigned Elts = (i + 1) * NumMemElts;
15750
12.6k
          EVT Ty =
15751
12.6k
              EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15752
12.6k
          bool IsFast;
15753
12.6k
15754
12.6k
          // Break early when size is too large to be legal.
15755
12.6k
          if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
15756
851
            break;
15757
11.8k
15758
11.8k
          if (TLI.isTypeLegal(Ty) &&
15759
11.8k
              
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG)2.65k
&&
15760
11.8k
              TLI.allowsMemoryAccess(Context, DL, Ty,
15761
2.28k
                                     *FirstInChain->getMemOperand(), &IsFast) &&
15762
11.8k
              
IsFast663
)
15763
309
            NumStoresToMerge = i + 1;
15764
11.8k
        }
15765
3.33k
15766
3.33k
        // Check if we found a legal integer type creating a meaningful
15767
3.33k
        // merge.
15768
3.33k
        if (NumStoresToMerge < 2) {
15769
3.09k
          // We know that candidate stores are in order and of correct
15770
3.09k
          // shape. While there is no mergeable sequence from the
15771
3.09k
          // beginning one may start later in the sequence. The only
15772
3.09k
          // reason a merge of size N could have failed where another of
15773
3.09k
          // the same size would not have, is if the alignment has
15774
3.09k
          // improved. Drop as many candidates as we can here.
15775
3.09k
          unsigned NumSkip = 1;
15776
3.09k
          while (
15777
20.9k
              (NumSkip < NumConsecutiveStores) &&
15778
20.9k
              
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)17.8k
)
15779
17.8k
            NumSkip++;
15780
3.09k
15781
3.09k
          StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15782
3.09k
          NumConsecutiveStores -= NumSkip;
15783
3.09k
          continue;
15784
3.09k
        }
15785
233
15786
233
        // Check that we can merge these candidates without causing a cycle.
15787
233
        if (!checkMergeStoreCandidatesForDependencies(
15788
233
                StoreNodes, NumStoresToMerge, RootNode)) {
15789
0
          StoreNodes.erase(StoreNodes.begin(),
15790
0
                           StoreNodes.begin() + NumStoresToMerge);
15791
0
          NumConsecutiveStores -= NumStoresToMerge;
15792
0
          continue;
15793
0
        }
15794
233
15795
233
        RV |= MergeStoresOfConstantsOrVecElts(
15796
233
            StoreNodes, MemVT, NumStoresToMerge, false, true, false);
15797
233
15798
233
        StoreNodes.erase(StoreNodes.begin(),
15799
233
                         StoreNodes.begin() + NumStoresToMerge);
15800
233
        NumConsecutiveStores -= NumStoresToMerge;
15801
233
      }
15802
3.23k
      continue;
15803
3.23k
    }
15804
35.8k
15805
35.8k
    // Below we handle the case of multiple consecutive stores that
15806
35.8k
    // come from multiple consecutive loads. We merge them into a single
15807
35.8k
    // wide load and a single wide store.
15808
35.8k
15809
35.8k
    // Look for load nodes which are used by the stored values.
15810
35.8k
    SmallVector<MemOpLink, 8> LoadNodes;
15811
35.8k
15812
35.8k
    // Find acceptable loads. Loads need to have the same chain (token factor),
15813
35.8k
    // must not be zext, volatile, indexed, and they must be consecutive.
15814
35.8k
    BaseIndexOffset LdBasePtr;
15815
35.8k
15816
237k
    for (unsigned i = 0; i < NumConsecutiveStores; 
++i201k
) {
15817
201k
      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15818
201k
      SDValue Val = peekThroughBitcasts(St->getValue());
15819
201k
      LoadSDNode *Ld = cast<LoadSDNode>(Val);
15820
201k
15821
201k
      BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
15822
201k
      // If this is not the first ptr that we check.
15823
201k
      int64_t LdOffset = 0;
15824
201k
      if (LdBasePtr.getBase().getNode()) {
15825
165k
        // The base ptr must be the same.
15826
165k
        if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
15827
0
          break;
15828
35.8k
      } else {
15829
35.8k
        // Check that all other base pointers are the same as this one.
15830
35.8k
        LdBasePtr = LdPtr;
15831
35.8k
      }
15832
201k
15833
201k
      // We found a potential memory operand to merge.
15834
201k
      LoadNodes.push_back(MemOpLink(Ld, LdOffset));
15835
201k
    }
15836
35.8k
15837
71.5k
    while (NumConsecutiveStores >= 2 && 
LoadNodes.size() >= 236.6k
) {
15838
36.6k
      // If we have load/store pair instructions and we only have two values,
15839
36.6k
      // don't bother merging.
15840
36.6k
      unsigned RequiredAlignment;
15841
36.6k
      if (LoadNodes.size() == 2 &&
15842
36.6k
          
TLI.hasPairedLoad(MemVT, RequiredAlignment)14.7k
&&
15843
36.6k
          
StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment912
) {
15844
912
        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
15845
912
        LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
15846
912
        break;
15847
912
      }
15848
35.6k
      LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15849
35.6k
      unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15850
35.6k
      unsigned FirstStoreAlign = FirstInChain->getAlignment();
15851
35.6k
      LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
15852
35.6k
      unsigned FirstLoadAlign = FirstLoad->getAlignment();
15853
35.6k
15854
35.6k
      // Scan the memory operations on the chain and find the first
15855
35.6k
      // non-consecutive load memory address. These variables hold the index in
15856
35.6k
      // the store node array.
15857
35.6k
15858
35.6k
      unsigned LastConsecutiveLoad = 1;
15859
35.6k
15860
35.6k
      // This variable refers to the size and not index in the array.
15861
35.6k
      unsigned LastLegalVectorType = 1;
15862
35.6k
      unsigned LastLegalIntegerType = 1;
15863
35.6k
      bool isDereferenceable = true;
15864
35.6k
      bool DoIntegerTruncate = false;
15865
35.6k
      StartAddress = LoadNodes[0].OffsetFromBase;
15866
35.6k
      SDValue FirstChain = FirstLoad->getChain();
15867
151k
      for (unsigned i = 1; i < LoadNodes.size(); 
++i115k
) {
15868
133k
        // All loads must share the same chain.
15869
133k
        if (LoadNodes[i].MemNode->getChain() != FirstChain)
15870
2.10k
          break;
15871
131k
15872
131k
        int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
15873
131k
        if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15874
1.95k
          break;
15875
129k
        LastConsecutiveLoad = i;
15876
129k
15877
129k
        if (isDereferenceable && 
!LoadNodes[i].MemNode->isDereferenceable()38.4k
)
15878
24.7k
          isDereferenceable = false;
15879
129k
15880
129k
        // Find a legal type for the vector store.
15881
129k
        unsigned Elts = (i + 1) * NumMemElts;
15882
129k
        EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15883
129k
15884
129k
        // Break early when size is too large to be legal.
15885
129k
        if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15886
13.3k
          break;
15887
115k
15888
115k
        bool IsFastSt, IsFastLd;
15889
115k
        if (TLI.isTypeLegal(StoreTy) &&
15890
115k
            
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG)20.5k
&&
15891
115k
            TLI.allowsMemoryAccess(Context, DL, StoreTy,
15892
13.7k
                                   *FirstInChain->getMemOperand(), &IsFastSt) &&
15893
115k
            
IsFastSt12.1k
&&
15894
115k
            TLI.allowsMemoryAccess(Context, DL, StoreTy,
15895
675
                                   *FirstLoad->getMemOperand(), &IsFastLd) &&
15896
115k
            
IsFastLd569
) {
15897
375
          LastLegalVectorType = i + 1;
15898
375
        }
15899
115k
15900
115k
        // Find a legal type for the integer store.
15901
115k
        unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15902
115k
        StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15903
115k
        if (TLI.isTypeLegal(StoreTy) &&
15904
115k
            
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG)11.7k
&&
15905
115k
            TLI.allowsMemoryAccess(Context, DL, StoreTy,
15906
11.7k
                                   *FirstInChain->getMemOperand(), &IsFastSt) &&
15907
115k
            
IsFastSt209
&&
15908
115k
            TLI.allowsMemoryAccess(Context, DL, StoreTy,
15909
209
                                   *FirstLoad->getMemOperand(), &IsFastLd) &&
15910
115k
            
IsFastLd209
) {
15911
209
          LastLegalIntegerType = i + 1;
15912
209
          DoIntegerTruncate = false;
15913
209
          // Or check whether a truncstore and extload is legal.
15914
115k
        } else if (TLI.getTypeAction(Context, StoreTy) ==
15915
115k
                   TargetLowering::TypePromoteInteger) {
15916
70.5k
          EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
15917
70.5k
          if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15918
70.5k
              
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG)1.42k
&&
15919
70.5k
              TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
15920
1.42k
                                 StoreTy) &&
15921
70.5k
              TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
15922
1.42k
                                 StoreTy) &&
15923
70.5k
              
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy)1.41k
&&
15924
70.5k
              TLI.allowsMemoryAccess(Context, DL, StoreTy,
15925
1.41k
                                     *FirstInChain->getMemOperand(),
15926
1.41k
                                     &IsFastSt) &&
15927
70.5k
              
IsFastSt31
&&
15928
70.5k
              TLI.allowsMemoryAccess(Context, DL, StoreTy,
15929
31
                                     *FirstLoad->getMemOperand(), &IsFastLd) &&
15930
70.5k
              
IsFastLd31
) {
15931
31
            LastLegalIntegerType = i + 1;
15932
31
            DoIntegerTruncate = true;
15933
31
          }
15934
70.5k
        }
15935
115k
      }
15936
35.6k
15937
35.6k
      // Only use vector types if the vector type is larger than the integer
15938
35.6k
      // type. If they are the same, use integers.
15939
35.6k
      bool UseVectorTy =
15940
35.6k
          LastLegalVectorType > LastLegalIntegerType && 
!NoVectors324
;
15941
35.6k
      unsigned LastLegalType =
15942
35.6k
          std::max(LastLegalVectorType, LastLegalIntegerType);
15943
35.6k
15944
35.6k
      // We add +1 here because the LastXXX variables refer to location while
15945
35.6k
      // the NumElem refers to array/index size.
15946
35.6k
      unsigned NumElem =
15947
35.6k
          std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
15948
35.6k
      NumElem = std::min(LastLegalType, NumElem);
15949
35.6k
15950
35.6k
      if (NumElem < 2) {
15951
35.1k
        // We know that candidate stores are in order and of correct
15952
35.1k
        // shape. While there is no mergeable sequence from the
15953
35.1k
        // beginning one may start later in the sequence. The only
15954
35.1k
        // reason a merge of size N could have failed where another of
15955
35.1k
        // the same size would not have is if the alignment or either
15956
35.1k
        // the load or store has improved. Drop as many candidates as we
15957
35.1k
        // can here.
15958
35.1k
        unsigned NumSkip = 1;
15959
197k
        while ((NumSkip < LoadNodes.size()) &&
15960
197k
               
(LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign)163k
&&
15961
197k
               
(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)163k
)
15962
162k
          NumSkip++;
15963
35.1k
        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15964
35.1k
        LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
15965
35.1k
        NumConsecutiveStores -= NumSkip;
15966
35.1k
        continue;
15967
35.1k
      }
15968
489
15969
489
      // Check that we can merge these candidates without causing a cycle.
15970
489
      if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15971
489
                                                    RootNode)) {
15972
0
        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15973
0
        LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15974
0
        NumConsecutiveStores -= NumElem;
15975
0
        continue;
15976
0
      }
15977
489
15978
489
      // Find if it is better to use vectors or integers to load and store
15979
489
      // to memory.
15980
489
      EVT JointMemOpVT;
15981
489
      if (UseVectorTy) {
15982
324
        // Find a legal type for the vector store.
15983
324
        unsigned Elts = NumElem * NumMemElts;
15984
324
        JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15985
324
      } else {
15986
165
        unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
15987
165
        JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
15988
165
      }
15989
489
15990
489
      SDLoc LoadDL(LoadNodes[0].MemNode);
15991
489
      SDLoc StoreDL(StoreNodes[0].MemNode);
15992
489
15993
489
      // The merged loads are required to have the same incoming chain, so
15994
489
      // using the first's chain is acceptable.
15995
489
15996
489
      SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
15997
489
      AddToWorklist(NewStoreChain.getNode());
15998
489
15999
489
      MachineMemOperand::Flags LdMMOFlags =
16000
489
          isDereferenceable ? 
MachineMemOperand::MODereferenceable72
16001
489
                            : 
MachineMemOperand::MONone417
;
16002
489
      if (IsNonTemporalLoad)
16003
168
        LdMMOFlags |= MachineMemOperand::MONonTemporal;
16004
489
16005
489
      MachineMemOperand::Flags StMMOFlags =
16006
489
          IsNonTemporalStore ? 
MachineMemOperand::MONonTemporal2
16007
489
                             : 
MachineMemOperand::MONone487
;
16008
489
16009
489
      SDValue NewLoad, NewStore;
16010
489
      if (UseVectorTy || 
!DoIntegerTruncate165
) {
16011
479
        NewLoad =
16012
479
            DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
16013
479
                        FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
16014
479
                        FirstLoadAlign, LdMMOFlags);
16015
479
        NewStore = DAG.getStore(
16016
479
            NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
16017
479
            FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
16018
479
      } else { // This must be the truncstore/extload case
16019
10
        EVT ExtendedTy =
16020
10
            TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
16021
10
        NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
16022
10
                                 FirstLoad->getChain(), FirstLoad->getBasePtr(),
16023
10
                                 FirstLoad->getPointerInfo(), JointMemOpVT,
16024
10
                                 FirstLoadAlign, LdMMOFlags);
16025
10
        NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
16026
10
                                     FirstInChain->getBasePtr(),
16027
10
                                     FirstInChain->getPointerInfo(),
16028
10
                                     JointMemOpVT, FirstInChain->getAlignment(),
16029
10
                                     FirstInChain->getMemOperand()->getFlags());
16030
10
      }
16031
489
16032
489
      // Transfer chain users from old loads to the new load.
16033
1.74k
      for (unsigned i = 0; i < NumElem; 
++i1.25k
) {
16034
1.25k
        LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
16035
1.25k
        DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
16036
1.25k
                                      SDValue(NewLoad.getNode(), 1));
16037
1.25k
      }
16038
489
16039
489
      // Replace the all stores with the new store. Recursively remove
16040
489
      // corresponding value if its no longer used.
16041
1.74k
      for (unsigned i = 0; i < NumElem; 
++i1.25k
) {
16042
1.25k
        SDValue Val = StoreNodes[i].MemNode->getOperand(1);
16043
1.25k
        CombineTo(StoreNodes[i].MemNode, NewStore);
16044
1.25k
        if (Val.getNode()->use_empty())
16045
1.25k
          recursivelyDeleteUnusedNodes(Val.getNode());
16046
1.25k
      }
16047
489
16048
489
      RV = true;
16049
489
      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16050
489
      LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16051
489
      NumConsecutiveStores -= NumElem;
16052
489
    }
16053
35.8k
  }
16054
267k
  
return RV168k
;
16055
267k
}
16056
16057
102k
SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
16058
102k
  SDLoc SL(ST);
16059
102k
  SDValue ReplStore;
16060
102k
16061
102k
  // Replace the chain to avoid dependency.
16062
102k
  if (ST->isTruncatingStore()) {
16063
3.06k
    ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
16064
3.06k
                                  ST->getBasePtr(), ST->getMemoryVT(),
16065
3.06k
                                  ST->getMemOperand());
16066
99.8k
  } else {
16067
99.8k
    ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
16068
99.8k
                             ST->getMemOperand());
16069
99.8k
  }
16070
102k
16071
102k
  // Create token to keep both nodes around.
16072
102k
  SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
16073
102k
                              MVT::Other, ST->getChain(), ReplStore);
16074
102k
16075
102k
  // Make sure the new and old chains are cleaned up.
16076
102k
  AddToWorklist(Token.getNode());
16077
102k
16078
102k
  // Don't add users to work list.
16079
102k
  return CombineTo(ST, Token, false);
16080
102k
}
16081
16082
3.71k
SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
16083
3.71k
  SDValue Value = ST->getValue();
16084
3.71k
  if (Value.getOpcode() == ISD::TargetConstantFP)
16085
0
    return SDValue();
16086
3.71k
16087
3.71k
  SDLoc DL(ST);
16088
3.71k
16089
3.71k
  SDValue Chain = ST->getChain();
16090
3.71k
  SDValue Ptr = ST->getBasePtr();
16091
3.71k
16092
3.71k
  const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
16093
3.71k
16094
3.71k
  // NOTE: If the original store is volatile, this transform must not increase
16095
3.71k
  // the number of stores.  For example, on x86-32 an f64 can be stored in one
16096
3.71k
  // processor operation but an i64 (which is not legal) requires two.  So the
16097
3.71k
  // transform should not be done in this case.
16098
3.71k
16099
3.71k
  SDValue Tmp;
16100
3.71k
  switch (CFP->getSimpleValueType(0).SimpleTy) {
16101
3.71k
  default:
16102
0
    llvm_unreachable("Unknown FP type");
16103
3.71k
  case MVT::f16:    // We don't do this for these yet.
16104
545
  case MVT::f80:
16105
545
  case MVT::f128:
16106
545
  case MVT::ppcf128:
16107
545
    return SDValue();
16108
1.46k
  case MVT::f32:
16109
1.46k
    if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
16110
1.46k
        
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)18
) {
16111
1.46k
      ;
16112
1.46k
      Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
16113
1.46k
                            bitcastToAPInt().getZExtValue(), SDLoc(CFP),
16114
1.46k
                            MVT::i32);
16115
1.46k
      return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
16116
1.46k
    }
16117
0
16118
0
    return SDValue();
16119
1.70k
  case MVT::f64:
16120
1.70k
    if ((TLI.isTypeLegal(MVT::i64) && 
!LegalOperations1.34k
&&
16121
1.70k
         
!ST->isVolatile()1.34k
) ||
16122
1.70k
        
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)366
) {
16123
1.34k
      ;
16124
1.34k
      Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
16125
1.34k
                            getZExtValue(), SDLoc(CFP), MVT::i64);
16126
1.34k
      return DAG.getStore(Chain, DL, Tmp,
16127
1.34k
                          Ptr, ST->getMemOperand());
16128
1.34k
    }
16129
357
16130
357
    if (!ST->isVolatile() &&
16131
357
        
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)349
) {
16132
345
      // Many FP stores are not made apparent until after legalize, e.g. for
16133
345
      // argument passing.  Since this is so common, custom legalize the
16134
345
      // 64-bit integer store into two 32-bit stores.
16135
345
      uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
16136
345
      SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
16137
345
      SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
16138
345
      if (DAG.getDataLayout().isBigEndian())
16139
29
        std::swap(Lo, Hi);
16140
345
16141
345
      unsigned Alignment = ST->getAlignment();
16142
345
      MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16143
345
      AAMDNodes AAInfo = ST->getAAInfo();
16144
345
16145
345
      SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16146
345
                                 ST->getAlignment(), MMOFlags, AAInfo);
16147
345
      Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
16148
345
                        DAG.getConstant(4, DL, Ptr.getValueType()));
16149
345
      Alignment = MinAlign(Alignment, 4U);
16150
345
      SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
16151
345
                                 ST->getPointerInfo().getWithOffset(4),
16152
345
                                 Alignment, MMOFlags, AAInfo);
16153
345
      return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
16154
345
                         St0, St1);
16155
345
    }
16156
12
16157
12
    return SDValue();
16158
3.71k
  }
16159
3.71k
}
16160
16161
3.23M
SDValue DAGCombiner::visitSTORE(SDNode *N) {
16162
3.23M
  StoreSDNode *ST  = cast<StoreSDNode>(N);
16163
3.23M
  SDValue Chain = ST->getChain();
16164
3.23M
  SDValue Value = ST->getValue();
16165
3.23M
  SDValue Ptr   = ST->getBasePtr();
16166
3.23M
16167
3.23M
  // If this is a store of a bit convert, store the input value if the
16168
3.23M
  // resultant store does not need a higher alignment than the original.
16169
3.23M
  if (Value.getOpcode() == ISD::BITCAST && 
!ST->isTruncatingStore()173k
&&
16170
3.23M
      
ST->isUnindexed()172k
) {
16171
172k
    EVT SVT = Value.getOperand(0).getValueType();
16172
172k
    // If the store is volatile, we only want to change the store type if the
16173
172k
    // resulting store is legal. Otherwise we might increase the number of
16174
172k
    // memory accesses. We don't care if the original type was legal or not
16175
172k
    // as we assume software couldn't rely on the number of accesses of an
16176
172k
    // illegal type.
16177
172k
    if (((!LegalOperations && 
!ST->isVolatile()13.4k
) ||
16178
172k
         
TLI.isOperationLegal(ISD::STORE, SVT)159k
) &&
16179
172k
        TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
16180
19.0k
                                     DAG, *ST->getMemOperand())) {
16181
7.78k
      return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
16182
7.78k
                          ST->getPointerInfo(), ST->getAlignment(),
16183
7.78k
                          ST->getMemOperand()->getFlags(), ST->getAAInfo());
16184
7.78k
    }
16185
3.22M
  }
16186
3.22M
16187
3.22M
  // Turn 'store undef, Ptr' -> nothing.
16188
3.22M
  if (Value.isUndef() && 
ST->isUnindexed()3.06k
)
16189
3.06k
    return Chain;
16190
3.22M
16191
3.22M
  // Try to infer better alignment information than the store already has.
16192
3.22M
  if (OptLevel != CodeGenOpt::None && 
ST->isUnindexed()3.17M
) {
16193
3.17M
    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
16194
1.01M
      if (Align > ST->getAlignment() && 
ST->getSrcValueOffset() % Align == 015.7k
) {
16195
13.0k
        SDValue NewStore =
16196
13.0k
            DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
16197
13.0k
                              ST->getMemoryVT(), Align,
16198
13.0k
                              ST->getMemOperand()->getFlags(), ST->getAAInfo());
16199
13.0k
        // NewStore will always be N as we are only refining the alignment
16200
13.0k
        assert(NewStore.getNode() == N);
16201
13.0k
        (void)NewStore;
16202
13.0k
      }
16203
1.01M
    }
16204
3.17M
  }
16205
3.22M
16206
3.22M
  // Try transforming a pair floating point load / store ops to integer
16207
3.22M
  // load / store ops.
16208
3.22M
  if (SDValue NewST = TransformFPLoadStorePair(N))
16209
40
    return NewST;
16210
3.22M
16211
3.22M
  // Try transforming several stores into STORE (BSWAP).
16212
3.22M
  if (SDValue Store = MatchStoreCombine(ST))
16213
75
    return Store;
16214
3.22M
16215
3.22M
  if (ST->isUnindexed()) {
16216
3.22M
    // Walk up chain skipping non-aliasing memory nodes, on this store and any
16217
3.22M
    // adjacent stores.
16218
3.22M
    if (findBetterNeighborChains(ST)) {
16219
162k
      // replaceStoreChain uses CombineTo, which handled all of the worklist
16220
162k
      // manipulation. Return the original node to not do anything else.
16221
162k
      return SDValue(ST, 0);
16222
162k
    }
16223
3.05M
    Chain = ST->getChain();
16224
3.05M
  }
16225
3.22M
16226
3.22M
  // FIXME: is there such a thing as a truncating indexed store?
16227
3.22M
  
if (3.06M
ST->isTruncatingStore()3.06M
&&
ST->isUnindexed()305k
&&
16228
3.06M
      
Value.getValueType().isInteger()305k
&&
16229
3.06M
      
(305k
!isa<ConstantSDNode>(Value)305k
||
16230
305k
       
!cast<ConstantSDNode>(Value)->isOpaque()51.9k
)) {
16231
305k
    APInt TruncDemandedBits =
16232
305k
        APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
16233
305k
                             ST->getMemoryVT().getScalarSizeInBits());
16234
305k
16235
305k
    // See if we can simplify the input to this truncstore with knowledge that
16236
305k
    // only the low bits are being used.  For example:
16237
305k
    // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
16238
305k
    SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits);
16239
305k
    AddToWorklist(Value.getNode());
16240
305k
    if (Shorter)
16241
2.28k
      return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
16242
2.28k
                               ST->getMemOperand());
16243
302k
16244
302k
    // Otherwise, see if we can simplify the operation with
16245
302k
    // SimplifyDemandedBits, which only works if the value has a single use.
16246
302k
    if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
16247
5.30k
      // Re-visit the store if anything changed and the store hasn't been merged
16248
5.30k
      // with another node (N is deleted) SimplifyDemandedBits will add Value's
16249
5.30k
      // node back to the worklist if necessary, but we also need to re-visit
16250
5.30k
      // the Store node itself.
16251
5.30k
      if (N->getOpcode() != ISD::DELETED_NODE)
16252
5.30k
        AddToWorklist(N);
16253
5.30k
      return SDValue(N, 0);
16254
5.30k
    }
16255
3.05M
  }
16256
3.05M
16257
3.05M
  // If this is a load followed by a store to the same location, then the store
16258
3.05M
  // is dead/noop.
16259
3.05M
  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
16260
671k
    if (Ld->getBasePtr() == Ptr && 
ST->getMemoryVT() == Ld->getMemoryVT()11.2k
&&
16261
671k
        
ST->isUnindexed()11.2k
&&
!ST->isVolatile()11.2k
&&
16262
671k
        // There can't be any side effects between the load and store, such as
16263
671k
        // a call or store.
16264
671k
        
Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))1.21k
) {
16265
271
      // The store is dead, remove it.
16266
271
      return Chain;
16267
271
    }
16268
3.05M
  }
16269
3.05M
16270
3.05M
  if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
16271
121k
    if (ST->isUnindexed() && 
!ST->isVolatile()121k
&&
ST1->isUnindexed()104k
&&
16272
121k
        
!ST1->isVolatile()104k
) {
16273
104k
      if (ST1->getBasePtr() == Ptr && 
ST1->getValue() == Value5.77k
&&
16274
104k
          
ST->getMemoryVT() == ST1->getMemoryVT()201
) {
16275
201
        // If this is a store followed by a store with the same value to the
16276
201
        // same location, then the store is dead/noop.
16277
201
        return Chain;
16278
201
      }
16279
104k
16280
104k
      if (OptLevel != CodeGenOpt::None && 
ST1->hasOneUse()71.9k
&&
16281
104k
          
!ST1->getBasePtr().isUndef()30.2k
) {
16282
29.8k
        const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
16283
29.8k
        const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
16284
29.8k
        unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
16285
29.8k
        unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
16286
29.8k
        // If this is a store who's preceding store to a subset of the current
16287
29.8k
        // location and no one other node is chained to that store we can
16288
29.8k
        // effectively drop the store. Do not remove stores to undef as they may
16289
29.8k
        // be used as data sinks.
16290
29.8k
        if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
16291
650
          CombineTo(ST1, ST1->getChain());
16292
650
          return SDValue();
16293
650
        }
16294
29.2k
16295
29.2k
        // If ST stores to a subset of preceding store's write set, we may be
16296
29.2k
        // able to fold ST's value into the preceding stored value. As we know
16297
29.2k
        // the other uses of ST1's chain are unconcerned with ST, this folding
16298
29.2k
        // will not affect those nodes.
16299
29.2k
        int64_t BitOffset;
16300
29.2k
        if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
16301
29.2k
                               BitOffset)) {
16302
920
          SDValue ChainValue = ST1->getValue();
16303
920
          if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
16304
352
            if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
16305
330
              APInt Val = C1->getAPIntValue();
16306
330
              APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
16307
330
              // FIXME: Handle Big-endian mode.
16308
330
              if (!DAG.getDataLayout().isBigEndian()) {
16309
324
                Val.insertBits(InsertVal, BitOffset);
16310
324
                SDValue NewSDVal =
16311
324
                    DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
16312
324
                                    C1->isTargetOpcode(), C1->isOpaque());
16313
324
                SDNode *NewST1 = DAG.UpdateNodeOperands(
16314
324
                    ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
16315
324
                    ST1->getOperand(3));
16316
324
                return CombineTo(ST, SDValue(NewST1, 0));
16317
324
              }
16318
3.05M
            }
16319
352
          }
16320
920
        } // End ST subset of ST1 case.
16321
29.2k
      }
16322
104k
    }
16323
121k
  }
16324
3.05M
16325
3.05M
  // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
16326
3.05M
  // truncating store.  We can do this even if this is already a truncstore.
16327
3.05M
  if ((Value.getOpcode() == ISD::FP_ROUND || 
Value.getOpcode() == ISD::TRUNCATE3.05M
)
16328
3.05M
      && 
Value.getNode()->hasOneUse()39.6k
&&
ST->isUnindexed()32.7k
&&
16329
3.05M
      TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
16330
32.6k
                            ST->getMemoryVT())) {
16331
12.7k
    return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
16332
12.7k
                             Ptr, ST->getMemoryVT(), ST->getMemOperand());
16333
12.7k
  }
16334
3.04M
16335
3.04M
  // Always perform this optimization before types are legal. If the target
16336
3.04M
  // prefers, also try this after legalization to catch stores that were created
16337
3.04M
  // by intrinsics or other nodes.
16338
3.04M
  if (!LegalTypes || 
(TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))1.80M
) {
16339
2.54M
    while (true) {
16340
2.54M
      // There can be multiple store sequences on the same chain.
16341
2.54M
      // Keep trying to merge store sequences until we are unable to do so
16342
2.54M
      // or until we merge the last store on the chain.
16343
2.54M
      bool Changed = MergeConsecutiveStores(ST);
16344
2.54M
      if (!Changed) 
break2.53M
;
16345
9.80k
      // Return N as merge only uses CombineTo and no worklist clean
16346
9.80k
      // up is necessary.
16347
9.80k
      if (N->getOpcode() == ISD::DELETED_NODE || 
!isa<StoreSDNode>(N)1.52k
)
16348
8.27k
        return SDValue(N, 0);
16349
9.80k
    }
16350
2.54M
  }
16351
3.04M
16352
3.04M
  // Try transforming N to an indexed store.
16353
3.04M
  
if (3.03M
CombineToPreIndexedLoadStore(N)3.03M
||
CombineToPostIndexedLoadStore(N)3.03M
)
16354
5.78k
    return SDValue(N, 0);
16355
3.02M
16356
3.02M
  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
16357
3.02M
  //
16358
3.02M
  // Make sure to do this only after attempting to merge stores in order to
16359
3.02M
  //  avoid changing the types of some subset of stores due to visit order,
16360
3.02M
  //  preventing their merging.
16361
3.02M
  if (isa<ConstantFPSDNode>(ST->getValue())) {
16362
3.71k
    if (SDValue NewSt = replaceStoreOfFPConstant(ST))
16363
3.15k
      return NewSt;
16364
3.02M
  }
16365
3.02M
16366
3.02M
  if (SDValue NewSt = splitMergedValStore(ST))
16367
0
    return NewSt;
16368
3.02M
16369
3.02M
  return ReduceLoadOpStoreWidth(N);
16370
3.02M
}
16371
16372
84.7k
SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
16373
84.7k
  const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
16374
84.7k
  if (!LifetimeEnd->hasOffset())
16375
950
    return SDValue();
16376
83.8k
16377
83.8k
  const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
16378
83.8k
                                        LifetimeEnd->getOffset(), false);
16379
83.8k
16380
83.8k
  // We walk up the chains to find stores.
16381
83.8k
  SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
16382
260k
  while (!Chains.empty()) {
16383
176k
    SDValue Chain = Chains.back();
16384
176k
    Chains.pop_back();
16385
176k
    if (!Chain.hasOneUse())
16386
27.7k
      continue;
16387
149k
    switch (Chain.getOpcode()) {
16388
149k
    case ISD::TokenFactor:
16389
90.2k
      for (unsigned Nops = Chain.getNumOperands(); Nops;)
16390
74.5k
        Chains.push_back(Chain.getOperand(--Nops));
16391
15.7k
      break;
16392
149k
    case ISD::LIFETIME_START:
16393
19.5k
    case ISD::LIFETIME_END:
16394
19.5k
      // We can forward past any lifetime start/end that can be proven not to
16395
19.5k
      // alias the node.
16396
19.5k
      if (!isAlias(Chain.getNode(), N))
16397
18.9k
        Chains.push_back(Chain.getOperand(0));
16398
19.5k
      break;
16399
43.0k
    case ISD::STORE: {
16400
43.0k
      StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
16401
43.0k
      if (ST->isVolatile() || 
ST->isIndexed()42.9k
)
16402
59
        continue;
16403
42.9k
      const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
16404
42.9k
      // If we store purely within object bounds just before its lifetime ends,
16405
42.9k
      // we can remove the store.
16406
42.9k
      if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
16407
42.9k
                                   ST->getMemoryVT().getStoreSizeInBits())) {
16408
428
        LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
16409
428
                   dbgs() << "\nwithin LIFETIME_END of : ";
16410
428
                   LifetimeEndBase.dump(); dbgs() << "\n");
16411
428
        CombineTo(ST, ST->getChain());
16412
428
        return SDValue(N, 0);
16413
428
      }
16414
42.9k
    }
16415
149k
    }
16416
149k
  }
16417
83.8k
  
return SDValue()83.4k
;
16418
83.8k
}
16419
16420
/// For the instruction sequence of store below, F and I values
16421
/// are bundled together as an i64 value before being stored into memory.
16422
/// Sometimes it is more efficent to generate separate stores for F and I,
16423
/// which can remove the bitwise instructions or sink them to colder places.
16424
///
16425
///   (store (or (zext (bitcast F to i32) to i64),
16426
///              (shl (zext I to i64), 32)), addr)  -->
16427
///   (store F, addr) and (store I, addr+4)
16428
///
16429
/// Similarly, splitting for other merged store can also be beneficial, like:
16430
/// For pair of {i32, i32}, i64 store --> two i32 stores.
16431
/// For pair of {i32, i16}, i64 store --> two i32 stores.
16432
/// For pair of {i16, i16}, i32 store --> two i16 stores.
16433
/// For pair of {i16, i8},  i32 store --> two i16 stores.
16434
/// For pair of {i8, i8},   i16 store --> two i8 stores.
16435
///
16436
/// We allow each target to determine specifically which kind of splitting is
16437
/// supported.
16438
///
16439
/// The store patterns are commonly seen from the simple code snippet below
16440
/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
16441
///   void goo(const std::pair<int, float> &);
16442
///   hoo() {
16443
///     ...
16444
///     goo(std::make_pair(tmp, ftmp));
16445
///     ...
16446
///   }
16447
///
16448
3.02M
SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
16449
3.02M
  if (OptLevel == CodeGenOpt::None)
16450
47.0k
    return SDValue();
16451
2.97M
16452
2.97M
  SDValue Val = ST->getValue();
16453
2.97M
  SDLoc DL(ST);
16454
2.97M
16455
2.97M
  // Match OR operand.
16456
2.97M
  if (!Val.getValueType().isScalarInteger() || 
Val.getOpcode() != ISD::OR1.85M
)
16457
2.95M
    return SDValue();
16458
20.7k
16459
20.7k
  // Match SHL operand and get Lower and Higher parts of Val.
16460
20.7k
  SDValue Op1 = Val.getOperand(0);
16461
20.7k
  SDValue Op2 = Val.getOperand(1);
16462
20.7k
  SDValue Lo, Hi;
16463
20.7k
  if (Op1.getOpcode() != ISD::SHL) {
16464
18.7k
    std::swap(Op1, Op2);
16465
18.7k
    if (Op1.getOpcode() != ISD::SHL)
16466
14.4k
      return SDValue();
16467
6.28k
  }
16468
6.28k
  Lo = Op2;
16469
6.28k
  Hi = Op1.getOperand(0);
16470
6.28k
  if (!Op1.hasOneUse())
16471
349
    return SDValue();
16472
5.93k
16473
5.93k
  // Match shift amount to HalfValBitSize.
16474
5.93k
  unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
16475
5.93k
  ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
16476
5.93k
  if (!ShAmt || 
ShAmt->getAPIntValue() != HalfValBitSize3.89k
)
16477
4.12k
    return SDValue();
16478
1.81k
16479
1.81k
  // Lo and Hi are zero-extended from int with size less equal than 32
16480
1.81k
  // to i64.
16481
1.81k
  if (Lo.getOpcode() != ISD::ZERO_EXTEND || 
!Lo.hasOneUse()405
||
16482
1.81k
      
!Lo.getOperand(0).getValueType().isScalarInteger()405
||
16483
1.81k
      
Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize405
||
16484
1.81k
      
Hi.getOpcode() != ISD::ZERO_EXTEND405
||
!Hi.hasOneUse()111
||
16485
1.81k
      
!Hi.getOperand(0).getValueType().isScalarInteger()111
||
16486
1.81k
      
Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize111
)
16487
1.70k
    return SDValue();
16488
111
16489
111
  // Use the EVT of low and high parts before bitcast as the input
16490
111
  // of target query.
16491
111
  EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
16492
111
                  ? 
Lo.getOperand(0).getValueType()0
16493
111
                  : Lo.getValueType();
16494
111
  EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
16495
111
                   ? 
Hi.getOperand(0).getValueType()0
16496
111
                   : Hi.getValueType();
16497
111
  if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
16498
111
    return SDValue();
16499
0
16500
0
  // Start to split store.
16501
0
  unsigned Alignment = ST->getAlignment();
16502
0
  MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16503
0
  AAMDNodes AAInfo = ST->getAAInfo();
16504
0
16505
0
  // Change the sizes of Lo and Hi's value types to HalfValBitSize.
16506
0
  EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
16507
0
  Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
16508
0
  Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
16509
0
16510
0
  SDValue Chain = ST->getChain();
16511
0
  SDValue Ptr = ST->getBasePtr();
16512
0
  // Lower value store.
16513
0
  SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16514
0
                             ST->getAlignment(), MMOFlags, AAInfo);
16515
0
  Ptr =
16516
0
      DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
16517
0
                  DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
16518
0
  // Higher value store.
16519
0
  SDValue St1 =
16520
0
      DAG.getStore(St0, DL, Hi, Ptr,
16521
0
                   ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
16522
0
                   Alignment / 2, MMOFlags, AAInfo);
16523
0
  return St1;
16524
0
}
16525
16526
/// Convert a disguised subvector insertion into a shuffle:
16527
/// insert_vector_elt V, (bitcast X from vector type), IdxC -->
16528
/// bitcast(shuffle (bitcast V), (extended X), Mask)
16529
/// Note: We do not use an insert_subvector node because that requires a legal
16530
/// subvector type.
16531
79.1k
SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
16532
79.1k
  SDValue InsertVal = N->getOperand(1);
16533
79.1k
  if (InsertVal.getOpcode() != ISD::BITCAST || 
!InsertVal.hasOneUse()1.59k
||
16534
79.1k
      
!InsertVal.getOperand(0).getValueType().isVector()1.53k
)
16535
78.0k
    return SDValue();
16536
1.09k
16537
1.09k
  SDValue SubVec = InsertVal.getOperand(0);
16538
1.09k
  SDValue DestVec = N->getOperand(0);
16539
1.09k
  EVT SubVecVT = SubVec.getValueType();
16540
1.09k
  EVT VT = DestVec.getValueType();
16541
1.09k
  unsigned NumSrcElts = SubVecVT.getVectorNumElements();
16542
1.09k
  unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
16543
1.09k
  unsigned NumMaskVals = ExtendRatio * NumSrcElts;
16544
1.09k
16545
1.09k
  // Step 1: Create a shuffle mask that implements this insert operation. The
16546
1.09k
  // vector that we are inserting into will be operand 0 of the shuffle, so
16547
1.09k
  // those elements are just 'i'. The inserted subvector is in the first
16548
1.09k
  // positions of operand 1 of the shuffle. Example:
16549
1.09k
  // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
16550
1.09k
  SmallVector<int, 16> Mask(NumMaskVals);
16551
107k
  for (unsigned i = 0; i != NumMaskVals; 
++i105k
) {
16552
105k
    if (i / NumSrcElts == InsIndex)
16553
13.6k
      Mask[i] = (i % NumSrcElts) + NumMaskVals;
16554
92.2k
    else
16555
92.2k
      Mask[i] = i;
16556
105k
  }
16557
1.09k
16558
1.09k
  // Bail out if the target can not handle the shuffle we want to create.
16559
1.09k
  EVT SubVecEltVT = SubVecVT.getVectorElementType();
16560
1.09k
  EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
16561
1.09k
  if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
16562
1.04k
    return SDValue();
16563
42
16564
42
  // Step 2: Create a wide vector from the inserted source vector by appending
16565
42
  // undefined elements. This is the same size as our destination vector.
16566
42
  SDLoc DL(N);
16567
42
  SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
16568
42
  ConcatOps[0] = SubVec;
16569
42
  SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
16570
42
16571
42
  // Step 3: Shuffle in the padded subvector.
16572
42
  SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
16573
42
  SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
16574
42
  AddToWorklist(PaddedSubV.getNode());
16575
42
  AddToWorklist(DestVecBC.getNode());
16576
42
  AddToWorklist(Shuf.getNode());
16577
42
  return DAG.getBitcast(VT, Shuf);
16578
42
}
16579
16580
80.2k
SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
16581
80.2k
  SDValue InVec = N->getOperand(0);
16582
80.2k
  SDValue InVal = N->getOperand(1);
16583
80.2k
  SDValue EltNo = N->getOperand(2);
16584
80.2k
  SDLoc DL(N);
16585
80.2k
16586
80.2k
  // If the inserted element is an UNDEF, just use the input vector.
16587
80.2k
  if (InVal.isUndef())
16588
235
    return InVec;
16589
80.0k
16590
80.0k
  EVT VT = InVec.getValueType();
16591
80.0k
  unsigned NumElts = VT.getVectorNumElements();
16592
80.0k
16593
80.0k
  // Remove redundant insertions:
16594
80.0k
  // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
16595
80.0k
  if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16596
80.0k
      
InVec == InVal.getOperand(0)21.6k
&&
EltNo == InVal.getOperand(1)33
)
16597
11
    return InVec;
16598
79.9k
16599
79.9k
  auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16600
79.9k
  if (!IndexC) {
16601
861
    // If this is variable insert to undef vector, it might be better to splat:
16602
861
    // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
16603
861
    if (InVec.isUndef() && 
TLI.shouldSplatInsEltVarIndex(VT)149
) {
16604
60
      SmallVector<SDValue, 8> Ops(NumElts, InVal);
16605
60
      return DAG.getBuildVector(VT, DL, Ops);
16606
60
    }
16607
801
    return SDValue();
16608
801
  }
16609
79.1k
16610
79.1k
  // We must know which element is being inserted for folds below here.
16611
79.1k
  unsigned Elt = IndexC->getZExtValue();
16612
79.1k
  if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
16613
42
    return Shuf;
16614
79.0k
16615
79.0k
  // Canonicalize insert_vector_elt dag nodes.
16616
79.0k
  // Example:
16617
79.0k
  // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
16618
79.0k
  // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
16619
79.0k
  //
16620
79.0k
  // Do this only if the child insert_vector node has one use; also
16621
79.0k
  // do this only if indices are both constants and Idx1 < Idx0.
16622
79.0k
  if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && 
InVec.hasOneUse()22.1k
16623
79.0k
      && 
isa<ConstantSDNode>(InVec.getOperand(2))22.1k
) {
16624
22.1k
    unsigned OtherElt = InVec.getConstantOperandVal(2);
16625
22.1k
    if (Elt < OtherElt) {
16626
305
      // Swap nodes.
16627
305
      SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
16628
305
                                  InVec.getOperand(0), InVal, EltNo);
16629
305
      AddToWorklist(NewOp.getNode());
16630
305
      return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
16631
305
                         VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
16632
305
    }
16633
78.7k
  }
16634
78.7k
16635
78.7k
  // If we can't generate a legal BUILD_VECTOR, exit
16636
78.7k
  if (LegalOperations && 
!TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)22.3k
)
16637
20.4k
    return SDValue();
16638
58.3k
16639
58.3k
  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
16640
58.3k
  // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
16641
58.3k
  // vector elements.
16642
58.3k
  SmallVector<SDValue, 8> Ops;
16643
58.3k
  // Do not combine these two vectors if the output vector will not replace
16644
58.3k
  // the input vector.
16645
58.3k
  if (InVec.getOpcode() == ISD::BUILD_VECTOR && 
InVec.hasOneUse()19.5k
) {
16646
19.1k
    Ops.append(InVec.getNode()->op_begin(),
16647
19.1k
               InVec.getNode()->op_end());
16648
39.2k
  } else if (InVec.isUndef()) {
16649
13.5k
    Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
16650
25.6k
  } else {
16651
25.6k
    return SDValue();
16652
25.6k
  }
16653
32.7k
  assert(Ops.size() == NumElts && "Unexpected vector size");
16654
32.7k
16655
32.7k
  // Insert the element
16656
32.7k
  if (Elt < Ops.size()) {
16657
32.7k
    // All the operands of BUILD_VECTOR must have the same type;
16658
32.7k
    // we enforce that here.
16659
32.7k
    EVT OpVT = Ops[0].getValueType();
16660
32.7k
    Ops[Elt] = OpVT.isInteger() ? 
DAG.getAnyExtOrTrunc(InVal, DL, OpVT)19.7k
:
InVal12.9k
;
16661
32.7k
  }
16662
32.7k
16663
32.7k
  // Return the new vector
16664
32.7k
  return DAG.getBuildVector(VT, DL, Ops);
16665
32.7k
}
16666
16667
SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
16668
                                                  SDValue EltNo,
16669
789
                                                  LoadSDNode *OriginalLoad) {
16670
789
  assert(!OriginalLoad->isVolatile());
16671
789
16672
789
  EVT ResultVT = EVE->getValueType(0);
16673
789
  EVT VecEltVT = InVecVT.getVectorElementType();
16674
789
  unsigned Align = OriginalLoad->getAlignment();
16675
789
  unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
16676
789
      VecEltVT.getTypeForEVT(*DAG.getContext()));
16677
789
16678
789
  if (NewAlign > Align || 
!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)781
)
16679
47
    return SDValue();
16680
742
16681
742
  ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
16682
742
    
ISD::NON_EXTLOAD0
: ISD::EXTLOAD;
16683
742
  if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
16684
14
    return SDValue();
16685
728
16686
728
  Align = NewAlign;
16687
728
16688
728
  SDValue NewPtr = OriginalLoad->getBasePtr();
16689
728
  SDValue Offset;
16690
728
  EVT PtrType = NewPtr.getValueType();
16691
728
  MachinePointerInfo MPI;
16692
728
  SDLoc DL(EVE);
16693
728
  if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
16694
719
    int Elt = ConstEltNo->getZExtValue();
16695
719
    unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
16696
719
    Offset = DAG.getConstant(PtrOff, DL, PtrType);
16697
719
    MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
16698
719
  } else {
16699
9
    Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
16700
9
    Offset = DAG.getNode(
16701
9
        ISD::MUL, DL, PtrType, Offset,
16702
9
        DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
16703
9
    // Discard the pointer info except the address space because the memory
16704
9
    // operand can't represent this new access since the offset is variable.
16705
9
    MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
16706
9
  }
16707
728
  NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
16708
728
16709
728
  // The replacement we need to do here is a little tricky: we need to
16710
728
  // replace an extractelement of a load with a load.
16711
728
  // Use ReplaceAllUsesOfValuesWith to do the replacement.
16712
728
  // Note that this replacement assumes that the extractvalue is the only
16713
728
  // use of the load; that's okay because we don't want to perform this
16714
728
  // transformation in other cases anyway.
16715
728
  SDValue Load;
16716
728
  SDValue Chain;
16717
728
  if (ResultVT.bitsGT(VecEltVT)) {
16718
0
    // If the result type of vextract is wider than the load, then issue an
16719
0
    // extending load instead.
16720
0
    ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
16721
0
                                                  VecEltVT)
16722
0
                                   ? ISD::ZEXTLOAD
16723
0
                                   : ISD::EXTLOAD;
16724
0
    Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
16725
0
                          OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
16726
0
                          Align, OriginalLoad->getMemOperand()->getFlags(),
16727
0
                          OriginalLoad->getAAInfo());
16728
0
    Chain = Load.getValue(1);
16729
728
  } else {
16730
728
    Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
16731
728
                       MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
16732
728
                       OriginalLoad->getAAInfo());
16733
728
    Chain = Load.getValue(1);
16734
728
    if (ResultVT.bitsLT(VecEltVT))
16735
0
      Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
16736
728
    else
16737
728
      Load = DAG.getBitcast(ResultVT, Load);
16738
728
  }
16739
728
  WorklistRemover DeadNodes(*this);
16740
728
  SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
16741
728
  SDValue To[] = { Load, Chain };
16742
728
  DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
16743
728
  // Since we're explicitly calling ReplaceAllUses, add the new node to the
16744
728
  // worklist explicitly as well.
16745
728
  AddToWorklist(Load.getNode());
16746
728
  AddUsersToWorklist(Load.getNode()); // Add users too
16747
728
  // Make sure to revisit this node to clean it up; it will usually be dead.
16748
728
  AddToWorklist(EVE);
16749
728
  ++OpsNarrowed;
16750
728
  return SDValue(EVE, 0);
16751
728
}
16752
16753
/// Transform a vector binary operation into a scalar binary operation by moving
16754
/// the math/logic after an extract element of a vector.
16755
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
16756
556k
                                       bool LegalOperations) {
16757
556k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16758
556k
  SDValue Vec = ExtElt->getOperand(0);
16759
556k
  SDValue Index = ExtElt->getOperand(1);
16760
556k
  auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16761
556k
  if (!IndexC || 
!TLI.isBinOp(Vec.getOpcode())551k
||
!Vec.hasOneUse()22.3k
||
16762
556k
      
Vec.getNode()->getNumValues() != 111.0k
)
16763
545k
    return SDValue();
16764
11.0k
16765
11.0k
  // Targets may want to avoid this to prevent an expensive register transfer.
16766
11.0k
  if (!TLI.shouldScalarizeBinop(Vec))
16767
4.15k
    return SDValue();
16768
6.85k
16769
6.85k
  // Extracting an element of a vector constant is constant-folded, so this
16770
6.85k
  // transform is just replacing a vector op with a scalar op while moving the
16771
6.85k
  // extract.
16772
6.85k
  SDValue Op0 = Vec.getOperand(0);
16773
6.85k
  SDValue Op1 = Vec.getOperand(1);
16774
6.85k
  if (isAnyConstantBuildVector(Op0, true) ||
16775
6.85k
      
isAnyConstantBuildVector(Op1, true)6.47k
) {
16776
1.00k
    // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
16777
1.00k
    // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
16778
1.00k
    SDLoc DL(ExtElt);
16779
1.00k
    EVT VT = ExtElt->getValueType(0);
16780
1.00k
    SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
16781
1.00k
    SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
16782
1.00k
    return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
16783
1.00k
  }
16784
5.84k
16785
5.84k
  return SDValue();
16786
5.84k
}
16787
16788
613k
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
16789
613k
  SDValue VecOp = N->getOperand(0);
16790
613k
  SDValue Index = N->getOperand(1);
16791
613k
  EVT ScalarVT = N->getValueType(0);
16792
613k
  EVT VecVT = VecOp.getValueType();
16793
613k
  if (VecOp.isUndef())
16794
55
    return DAG.getUNDEF(ScalarVT);
16795
613k
16796
613k
  // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
16797
613k
  //
16798
613k
  // This only really matters if the index is non-constant since other combines
16799
613k
  // on the constant elements already work.
16800
613k
  SDLoc DL(N);
16801
613k
  if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
16802
613k
      
Index == VecOp.getOperand(2)1.52k
) {
16803
36
    SDValue Elt = VecOp.getOperand(1);
16804
36
    return VecVT.isInteger() ? 
DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT)25
:
Elt11
;
16805
36
  }
16806
613k
16807
613k
  // (vextract (scalar_to_vector val, 0) -> val
16808
613k
  if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16809
435
    // Check if the result type doesn't match the inserted element type. A
16810
435
    // SCALAR_TO_VECTOR may truncate the inserted element and the
16811
435
    // EXTRACT_VECTOR_ELT may widen the extracted vector.
16812
435
    SDValue InOp = VecOp.getOperand(0);
16813
435
    if (InOp.getValueType() != ScalarVT) {
16814
5
      assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16815
5
      return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16816
5
    }
16817
430
    return InOp;
16818
430
  }
16819
612k
16820
612k
  // extract_vector_elt of out-of-bounds element -> UNDEF
16821
612k
  auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16822
612k
  unsigned NumElts = VecVT.getVectorNumElements();
16823
612k
  if (IndexC && 
IndexC->getAPIntValue().uge(NumElts)608k
)
16824
4
    return DAG.getUNDEF(ScalarVT);
16825
612k
16826
612k
  // extract_vector_elt (build_vector x, y), 1 -> y
16827
612k
  if (IndexC && 
VecOp.getOpcode() == ISD::BUILD_VECTOR608k
&&
16828
612k
      
TLI.isTypeLegal(VecVT)56.0k
&&
16829
612k
      
(55.7k
VecOp.hasOneUse()55.7k
||
TLI.aggressivelyPreferBuildVectorSources(VecVT)38.7k
)) {
16830
54.8k
    SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
16831
54.8k
    EVT InEltVT = Elt.getValueType();
16832
54.8k
16833
54.8k
    // Sometimes build_vector's scalar input types do not match result type.
16834
54.8k
    if (ScalarVT == InEltVT)
16835
54.8k
      return Elt;
16836
557k
16837
557k
    // TODO: It may be useful to truncate if free if the build_vector implicitly
16838
557k
    // converts.
16839
557k
  }
16840
557k
16841
557k
  // TODO: These transforms should not require the 'hasOneUse' restriction, but
16842
557k
  // there are regressions on multiple targets without it. We can end up with a
16843
557k
  // mess of scalar and vector code if we reduce only part of the DAG to scalar.
16844
557k
  if (IndexC && 
VecOp.getOpcode() == ISD::BITCAST553k
&&
VecVT.isInteger()137k
&&
16845
557k
      
VecOp.hasOneUse()121k
) {
16846
31.7k
    // The vector index of the LSBs of the source depend on the endian-ness.
16847
31.7k
    bool IsLE = DAG.getDataLayout().isLittleEndian();
16848
31.7k
    unsigned ExtractIndex = IndexC->getZExtValue();
16849
31.7k
    // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
16850
31.7k
    unsigned BCTruncElt = IsLE ? 
031.4k
:
NumElts - 1342
;
16851
31.7k
    SDValue BCSrc = VecOp.getOperand(0);
16852
31.7k
    if (ExtractIndex == BCTruncElt && 
BCSrc.getValueType().isScalarInteger()12.6k
)
16853
1.37k
      return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
16854
30.3k
16855
30.3k
    if (LegalTypes && 
BCSrc.getValueType().isInteger()28.2k
&&
16856
30.3k
        
BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR26.7k
) {
16857
43
      // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
16858
43
      // trunc i64 X to i32
16859
43
      SDValue X = BCSrc.getOperand(0);
16860
43
      assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
16861
43
             "Extract element and scalar to vector can't change element type "
16862
43
             "from FP to integer.");
16863
43
      unsigned XBitWidth = X.getValueSizeInBits();
16864
43
      unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
16865
43
      BCTruncElt = IsLE ? 
015
:
XBitWidth / VecEltBitWidth - 128
;
16866
43
16867
43
      // An extract element return value type can be wider than its vector
16868
43
      // operand element type. In that case, the high bits are undefined, so
16869
43
      // it's possible that we may need to extend rather than truncate.
16870
43
      if (ExtractIndex == BCTruncElt && 
XBitWidth > VecEltBitWidth36
) {
16871
31
        assert(XBitWidth % VecEltBitWidth == 0 &&
16872
31
               "Scalar bitwidth must be a multiple of vector element bitwidth");
16873
31
        return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
16874
31
      }
16875
556k
    }
16876
30.3k
  }
16877
556k
16878
556k
  if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
16879
1.00k
    return BO;
16880
555k
16881
555k
  // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
16882
555k
  // We only perform this optimization before the op legalization phase because
16883
555k
  // we may introduce new vector instructions which are not backed by TD
16884
555k
  // patterns. For example on AVX, extracting elements from a wide vector
16885
555k
  // without using extract_subvector. However, if we can find an underlying
16886
555k
  // scalar value, then we can always use that.
16887
555k
  if (IndexC && 
VecOp.getOpcode() == ISD::VECTOR_SHUFFLE550k
) {
16888
2.51k
    auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
16889
2.51k
    // Find the new index to extract from.
16890
2.51k
    int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
16891
2.51k
16892
2.51k
    // Extracting an undef index is undef.
16893
2.51k
    if (OrigElt == -1)
16894
40
      return DAG.getUNDEF(ScalarVT);
16895
2.47k
16896
2.47k
    // Select the right vector half to extract from.
16897
2.47k
    SDValue SVInVec;
16898
2.47k
    if (OrigElt < (int)NumElts) {
16899
1.82k
      SVInVec = VecOp.getOperand(0);
16900
1.82k
    } else {
16901
645
      SVInVec = VecOp.getOperand(1);
16902
645
      OrigElt -= NumElts;
16903
645
    }
16904
2.47k
16905
2.47k
    if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
16906
193
      SDValue InOp = SVInVec.getOperand(OrigElt);
16907
193
      if (InOp.getValueType() != ScalarVT) {
16908
0
        assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16909
0
        InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16910
0
      }
16911
193
16912
193
      return InOp;
16913
193
    }
16914
2.27k
16915
2.27k
    // FIXME: We should handle recursing on other vector shuffles and
16916
2.27k
    // scalar_to_vector here as well.
16917
2.27k
16918
2.27k
    if (!LegalOperations ||
16919
2.27k
        // FIXME: Should really be just isOperationLegalOrCustom.
16920
2.27k
        
TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT)161
||
16921
2.27k
        
TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)150
) {
16922
2.15k
      EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16923
2.15k
      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
16924
2.15k
                         DAG.getConstant(OrigElt, DL, IndexTy));
16925
2.15k
    }
16926
553k
  }
16927
553k
16928
553k
  // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
16929
553k
  // simplify it based on the (valid) extraction indices.
16930
2.53M
  
if (553k
llvm::all_of(VecOp->uses(), [&](SDNode *Use) 553k
{
16931
2.53M
        return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16932
2.53M
               
Use->getOperand(0) == VecOp2.43M
&&
16933
2.53M
               
isa<ConstantSDNode>(Use->getOperand(1))2.43M
;
16934
2.53M
      })) {
16935
444k
    APInt DemandedElts = APInt::getNullValue(NumElts);
16936
2.23M
    for (SDNode *Use : VecOp->uses()) {
16937
2.23M
      auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
16938
2.23M
      if (CstElt->getAPIntValue().ult(NumElts))
16939
2.23M
        DemandedElts.setBit(CstElt->getZExtValue());
16940
2.23M
    }
16941
444k
    if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
16942
2.07k
      // We simplified the vector operand of this extract element. If this
16943
2.07k
      // extract is not dead, visit it again so it is folded properly.
16944
2.07k
      if (N->getOpcode() != ISD::DELETED_NODE)
16945
2.06k
        AddToWorklist(N);
16946
2.07k
      return SDValue(N, 0);
16947
2.07k
    }
16948
550k
  }
16949
550k
16950
550k
  // Everything under here is trying to match an extract of a loaded value.
16951
550k
  // If the result of load has to be truncated, then it's not necessarily
16952
550k
  // profitable.
16953
550k
  bool BCNumEltsChanged = false;
16954
550k
  EVT ExtVT = VecVT.getVectorElementType();
16955
550k
  EVT LVT = ExtVT;
16956
550k
  if (ScalarVT.bitsLT(LVT) && 
!TLI.isTruncateFree(LVT, ScalarVT)0
)
16957
0
    return SDValue();
16958
550k
16959
550k
  if (VecOp.getOpcode() == ISD::BITCAST) {
16960
135k
    // Don't duplicate a load with other uses.
16961
135k
    if (!VecOp.hasOneUse())
16962
101k
      return SDValue();
16963
33.6k
16964
33.6k
    EVT BCVT = VecOp.getOperand(0).getValueType();
16965
33.6k
    if (!BCVT.isVector() || 
ExtVT.bitsGT(BCVT.getVectorElementType())24.7k
)
16966
18.3k
      return SDValue();
16967
15.2k
    if (NumElts != BCVT.getVectorNumElements())
16968
13.2k
      BCNumEltsChanged = true;
16969
15.2k
    VecOp = VecOp.getOperand(0);
16970
15.2k
    ExtVT = BCVT.getVectorElementType();
16971
15.2k
  }
16972
550k
16973
550k
  // extract (vector load $addr), i --> load $addr + i * size
16974
550k
  
if (430k
!LegalOperations430k
&&
!IndexC191k
&&
VecOp.hasOneUse()4.49k
&&
16975
430k
      
ISD::isNormalLoad(VecOp.getNode())893
&&
16976
430k
      
!Index->hasPredecessor(VecOp.getNode())93
) {
16977
65
    auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
16978
65
    if (VecLoad && !VecLoad->isVolatile())
16979
52
      return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
16980
430k
  }
16981
430k
16982
430k
  // Perform only after legalization to ensure build_vector / vector_shuffle
16983
430k
  // optimizations have already been done.
16984
430k
  if (!LegalOperations || 
!IndexC239k
)
16985
191k
    return SDValue();
16986
239k
16987
239k
  // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
16988
239k
  // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
16989
239k
  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
16990
239k
  int Elt = IndexC->getZExtValue();
16991
239k
  LoadSDNode *LN0 = nullptr;
16992
239k
  if (ISD::isNormalLoad(VecOp.getNode())) {
16993
131k
    LN0 = cast<LoadSDNode>(VecOp);
16994
131k
  } else 
if (107k
VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR107k
&&
16995
107k
             
VecOp.getOperand(0).getValueType() == ExtVT48
&&
16996
107k
             
ISD::isNormalLoad(VecOp.getOperand(0).getNode())48
) {
16997
27
    // Don't duplicate a load with other uses.
16998
27
    if (!VecOp.hasOneUse())
16999
23
      return SDValue();
17000
4
17001
4
    LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
17002
4
  }
17003
239k
  
if (auto *239k
Shuf239k
= dyn_cast<ShuffleVectorSDNode>(VecOp)) {
17004
482
    // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
17005
482
    // =>
17006
482
    // (load $addr+1*size)
17007
482
17008
482
    // Don't duplicate a load with other uses.
17009
482
    if (!VecOp.hasOneUse())
17010
51
      return SDValue();
17011
431
17012
431
    // If the bit convert changed the number of elements, it is unsafe
17013
431
    // to examine the mask.
17014
431
    if (BCNumEltsChanged)
17015
363
      return SDValue();
17016
68
17017
68
    // Select the input vector, guarding against out of range extract vector.
17018
68
    int Idx = (Elt > (int)NumElts) ? 
-10
: Shuf->getMaskElt(Elt);
17019
68
    VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : 
VecOp.getOperand(1)0
;
17020
68
17021
68
    if (VecOp.getOpcode() == ISD::BITCAST) {
17022
43
      // Don't duplicate a load with other uses.
17023
43
      if (!VecOp.hasOneUse())
17024
29
        return SDValue();
17025
14
17026
14
      VecOp = VecOp.getOperand(0);
17027
14
    }
17028
68
    
if (39
ISD::isNormalLoad(VecOp.getNode())39
) {
17029
17
      LN0 = cast<LoadSDNode>(VecOp);
17030
17
      Elt = (Idx < (int)NumElts) ? Idx : 
Idx - (int)NumElts0
;
17031
17
      Index = DAG.getConstant(Elt, DL, Index.getValueType());
17032
17
    }
17033
39
  }
17034
239k
17035
239k
  // Make sure we found a non-volatile load and the extractelement is
17036
239k
  // the only use.
17037
239k
  
if (238k
!LN0238k
||
!LN0->hasNUsesOfValue(1,0)131k
||
LN0->isVolatile()918
)
17038
237k
    return SDValue();
17039
737
17040
737
  // If Idx was -1 above, Elt is going to be -1, so just return undef.
17041
737
  if (Elt == -1)
17042
0
    return DAG.getUNDEF(LVT);
17043
737
17044
737
  return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
17045
737
}
17046
17047
// Simplify (build_vec (ext )) to (bitcast (build_vec ))
17048
518k
SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
17049
518k
  // We perform this optimization post type-legalization because
17050
518k
  // the type-legalizer often scalarizes integer-promoted vectors.
17051
518k
  // Performing this optimization before may create bit-casts which
17052
518k
  // will be type-legalized to complex code sequences.
17053
518k
  // We perform this optimization only before the operation legalizer because we
17054
518k
  // may introduce illegal operations.
17055
518k
  if (Level != AfterLegalizeVectorOps && 
Level != AfterLegalizeTypes425k
)
17056
293k
    return SDValue();
17057
225k
17058
225k
  unsigned NumInScalars = N->getNumOperands();
17059
225k
  SDLoc DL(N);
17060
225k
  EVT VT = N->getValueType(0);
17061
225k
17062
225k
  // Check to see if this is a BUILD_VECTOR of a bunch of values
17063
225k
  // which come from any_extend or zero_extend nodes. If so, we can create
17064
225k
  // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
17065
225k
  // optimizations. We do not handle sign-extend because we can't fill the sign
17066
225k
  // using shuffles.
17067
225k
  EVT SourceType = MVT::Other;
17068
225k
  bool AllAnyExt = true;
17069
225k
17070
241k
  for (unsigned i = 0; i != NumInScalars; 
++i15.2k
) {
17071
237k
    SDValue In = N->getOperand(i);
17072
237k
    // Ignore undef inputs.
17073
237k
    if (In.isUndef()) 
continue3.41k
;
17074
234k
17075
234k
    bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
17076
234k
    bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
17077
234k
17078
234k
    // Abort if the element is not an extension.
17079
234k
    if (!ZeroExt && 
!AnyExt228k
) {
17080
222k
      SourceType = MVT::Other;
17081
222k
      break;
17082
222k
    }
17083
11.8k
17084
11.8k
    // The input is a ZeroExt or AnyExt. Check the original type.
17085
11.8k
    EVT InTy = In.getOperand(0).getValueType();
17086
11.8k
17087
11.8k
    // Check that all of the widened source types are the same.
17088
11.8k
    if (SourceType == MVT::Other)
17089
4.24k
      // First time.
17090
4.24k
      SourceType = InTy;
17091
7.62k
    else if (InTy != SourceType) {
17092
2
      // Multiple income types. Abort.
17093
2
      SourceType = MVT::Other;
17094
2
      break;
17095
2
    }
17096
11.8k
17097
11.8k
    // Check if all of the extends are ANY_EXTENDs.
17098
11.8k
    AllAnyExt &= AnyExt;
17099
11.8k
  }
17100
225k
17101
225k
  // In order to have valid types, all of the inputs must be extended from the
17102
225k
  // same source type and all of the inputs must be any or zero extend.
17103
225k
  // Scalar sizes must be a power of two.
17104
225k
  EVT OutScalarTy = VT.getScalarType();
17105
225k
  bool ValidTypes = SourceType != MVT::Other &&
17106
225k
                 
isPowerOf2_32(OutScalarTy.getSizeInBits())3.61k
&&
17107
225k
                 
isPowerOf2_32(SourceType.getSizeInBits())3.61k
;
17108
225k
17109
225k
  // Create a new simpler BUILD_VECTOR sequence which other optimizations can
17110
225k
  // turn into a single shuffle instruction.
17111
225k
  if (!ValidTypes)
17112
222k
    return SDValue();
17113
3.61k
17114
3.61k
  bool isLE = DAG.getDataLayout().isLittleEndian();
17115
3.61k
  unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
17116
3.61k
  assert(ElemRatio > 1 && "Invalid element size ratio");
17117
3.61k
  SDValue Filler = AllAnyExt ? 
DAG.getUNDEF(SourceType)1.77k
:
17118
3.61k
                               
DAG.getConstant(0, DL, SourceType)1.84k
;
17119
3.61k
17120
3.61k
  unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
17121
3.61k
  SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
17122
3.61k
17123
3.61k
  // Populate the new build_vector
17124
16.2k
  for (unsigned i = 0, e = N->getNumOperands(); i != e; 
++i12.6k
) {
17125
12.6k
    SDValue Cast = N->getOperand(i);
17126
12.6k
    assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
17127
12.6k
            Cast.getOpcode() == ISD::ZERO_EXTEND ||
17128
12.6k
            Cast.isUndef()) && "Invalid cast opcode");
17129
12.6k
    SDValue In;
17130
12.6k
    if (Cast.isUndef())
17131
1.55k
      In = DAG.getUNDEF(SourceType);
17132
11.0k
    else
17133
11.0k
      In = Cast->getOperand(0);
17134
12.6k
    unsigned Index = isLE ? 
(i * ElemRatio)12.5k
:
17135
12.6k
                            
(i * ElemRatio + (ElemRatio - 1))24
;
17136
12.6k
17137
12.6k
    assert(Index < Ops.size() && "Invalid index");
17138
12.6k
    Ops[Index] = In;
17139
12.6k
  }
17140
3.61k
17141
3.61k
  // The type of the new BUILD_VECTOR node.
17142
3.61k
  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
17143
3.61k
  assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
17144
3.61k
         "Invalid vector size");
17145
3.61k
  // Check if the new vector type is legal.
17146
3.61k
  if (!isTypeLegal(VecVT) ||
17147
3.61k
      
(2.30k
!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT)2.30k
&&
17148
2.30k
       
TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)1.77k
))
17149
1.32k
    return SDValue();
17150
2.28k
17151
2.28k
  // Make the new BUILD_VECTOR.
17152
2.28k
  SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
17153
2.28k
17154
2.28k
  // The new BUILD_VECTOR node has the potential to be further optimized.
17155
2.28k
  AddToWorklist(BV.getNode());
17156
2.28k
  // Bitcast to the desired type.
17157
2.28k
  return DAG.getBitcast(VT, BV);
17158
2.28k
}
17159
17160
SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
17161
                                           ArrayRef<int> VectorMask,
17162
                                           SDValue VecIn1, SDValue VecIn2,
17163
13.4k
                                           unsigned LeftIdx, bool DidSplitVec) {
17164
13.4k
  MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17165
13.4k
  SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
17166
13.4k
17167
13.4k
  EVT VT = N->getValueType(0);
17168
13.4k
  EVT InVT1 = VecIn1.getValueType();
17169
13.4k
  EVT InVT2 = VecIn2.getNode() ? 
VecIn2.getValueType()3.12k
:
InVT110.3k
;
17170
13.4k
17171
13.4k
  unsigned NumElems = VT.getVectorNumElements();
17172
13.4k
  unsigned ShuffleNumElems = NumElems;
17173
13.4k
17174
13.4k
  // If we artificially split a vector in two already, then the offsets in the
17175
13.4k
  // operands will all be based off of VecIn1, even those in VecIn2.
17176
13.4k
  unsigned Vec2Offset = DidSplitVec ? 
0410
:
InVT1.getVectorNumElements()13.0k
;
17177
13.4k
17178
13.4k
  // We can't generate a shuffle node with mismatched input and output types.
17179
13.4k
  // Try to make the types match the type of the output.
17180
13.4k
  if (InVT1 != VT || 
InVT2 != VT9.06k
) {
17181
4.42k
    if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && 
InVT1 == InVT22.13k
) {
17182
2.12k
      // If the output vector length is a multiple of both input lengths,
17183
2.12k
      // we can concatenate them and pad the rest with undefs.
17184
2.12k
      unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
17185
2.12k
      assert(NumConcats >= 2 && "Concat needs at least two inputs!");
17186
2.12k
      SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
17187
2.12k
      ConcatOps[0] = VecIn1;
17188
2.12k
      ConcatOps[1] = VecIn2 ? 
VecIn288
:
DAG.getUNDEF(InVT1)2.03k
;
17189
2.12k
      VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17190
2.12k
      VecIn2 = SDValue();
17191
2.30k
    } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
17192
1.38k
      if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
17193
319
        return SDValue();
17194
1.06k
17195
1.06k
      if (!VecIn2.getNode()) {
17196
768
        // If we only have one input vector, and it's twice the size of the
17197
768
        // output, split it in two.
17198
768
        VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
17199
768
                             DAG.getConstant(NumElems, DL, IdxTy));
17200
768
        VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
17201
768
        // Since we now have shorter input vectors, adjust the offset of the
17202
768
        // second vector's start.
17203
768
        Vec2Offset = NumElems;
17204
768
      } else 
if (300
InVT2.getSizeInBits() <= InVT1.getSizeInBits()300
) {
17205
296
        // VecIn1 is wider than the output, and we have another, possibly
17206
296
        // smaller input. Pad the smaller input with undefs, shuffle at the
17207
296
        // input vector width, and extract the output.
17208
296
        // The shuffle type is different than VT, so check legality again.
17209
296
        if (LegalOperations &&
17210
296
            
!TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1)0
)
17211
0
          return SDValue();
17212
296
17213
296
        // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
17214
296
        // lower it back into a BUILD_VECTOR. So if the inserted type is
17215
296
        // illegal, don't even try.
17216
296
        if (InVT1 != InVT2) {
17217
18
          if (!TLI.isTypeLegal(InVT2))
17218
1
            return SDValue();
17219
17
          VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
17220
17
                               DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
17221
17
        }
17222
296
        ShuffleNumElems = NumElems * 2;
17223
295
      } else {
17224
4
        // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
17225
4
        // than VecIn1. We can't handle this for now - this case will disappear
17226
4
        // when we start sorting the vectors by type.
17227
4
        return SDValue();
17228
4
      }
17229
918
    } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
17230
918
               
InVT1.getSizeInBits() == VT.getSizeInBits()10
) {
17231
10
      SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
17232
10
      ConcatOps[0] = VecIn2;
17233
10
      VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17234
908
    } else {
17235
908
      // TODO: Support cases where the length mismatch isn't exactly by a
17236
908
      // factor of 2.
17237
908
      // TODO: Move this check upwards, so that if we have bad type
17238
908
      // mismatches, we don't create any DAG nodes.
17239
908
      return SDValue();
17240
908
    }
17241
12.2k
  }
17242
12.2k
17243
12.2k
  // Initialize mask to undef.
17244
12.2k
  SmallVector<int, 8> Mask(ShuffleNumElems, -1);
17245
12.2k
17246
12.2k
  // Only need to run up to the number of elements actually used, not the
17247
12.2k
  // total number of elements in the shuffle - if we are shuffling a wider
17248
12.2k
  // vector, the high lanes should be set to undef.
17249
143k
  for (unsigned i = 0; i != NumElems; 
++i131k
) {
17250
131k
    if (VectorMask[i] <= 0)
17251
50.9k
      continue;
17252
80.1k
17253
80.1k
    unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
17254
80.1k
    if (VectorMask[i] == (int)LeftIdx) {
17255
61.7k
      Mask[i] = ExtIndex;
17256
61.7k
    } else 
if (18.4k
VectorMask[i] == (int)LeftIdx + 118.4k
) {
17257
10.3k
      Mask[i] = Vec2Offset + ExtIndex;
17258
10.3k
    }
17259
80.1k
  }
17260
12.2k
17261
12.2k
  // The type the input vectors may have changed above.
17262
12.2k
  InVT1 = VecIn1.getValueType();
17263
12.2k
17264
12.2k
  // If we already have a VecIn2, it should have the same type as VecIn1.
17265
12.2k
  // If we don't, get an undef/zero vector of the appropriate type.
17266
12.2k
  VecIn2 = VecIn2.getNode() ? 
VecIn23.56k
:
DAG.getUNDEF(InVT1)8.68k
;
17267
12.2k
  assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
17268
12.2k
17269
12.2k
  SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
17270
12.2k
  if (ShuffleNumElems > NumElems)
17271
295
    Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
17272
12.2k
17273
12.2k
  return Shuffle;
17274
12.2k
}
17275
17276
516k
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
17277
516k
  assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
17278
516k
17279
516k
  // First, determine where the build vector is not undef.
17280
516k
  // TODO: We could extend this to handle zero elements as well as undefs.
17281
516k
  int NumBVOps = BV->getNumOperands();
17282
516k
  int ZextElt = -1;
17283
1.06M
  for (int i = 0; i != NumBVOps; 
++i544k
) {
17284
1.05M
    SDValue Op = BV->getOperand(i);
17285
1.05M
    if (Op.isUndef())
17286
27.9k
      continue;
17287
1.02M
    if (ZextElt == -1)
17288
516k
      ZextElt = i;
17289
506k
    else
17290
506k
      return SDValue();
17291
1.02M
  }
17292
516k
  // Bail out if there's no non-undef element.
17293
516k
  
if (9.92k
ZextElt == -19.92k
)
17294
0
    return SDValue();
17295
9.92k
17296
9.92k
  // The build vector contains some number of undef elements and exactly
17297
9.92k
  // one other element. That other element must be a zero-extended scalar
17298
9.92k
  // extracted from a vector at a constant index to turn this into a shuffle.
17299
9.92k
  // Also, require that the build vector does not implicitly truncate/extend
17300
9.92k
  // its elements.
17301
9.92k
  // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
17302
9.92k
  EVT VT = BV->getValueType(0);
17303
9.92k
  SDValue Zext = BV->getOperand(ZextElt);
17304
9.92k
  if (Zext.getOpcode() != ISD::ZERO_EXTEND || 
!Zext.hasOneUse()66
||
17305
9.92k
      
Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT66
||
17306
9.92k
      
!isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1))65
||
17307
9.92k
      
Zext.getValueSizeInBits() != VT.getScalarSizeInBits()65
)
17308
9.85k
    return SDValue();
17309
65
17310
65
  // The zero-extend must be a multiple of the source size, and we must be
17311
65
  // building a vector of the same size as the source of the extract element.
17312
65
  SDValue Extract = Zext.getOperand(0);
17313
65
  unsigned DestSize = Zext.getValueSizeInBits();
17314
65
  unsigned SrcSize = Extract.getValueSizeInBits();
17315
65
  if (DestSize % SrcSize != 0 ||
17316
65
      Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
17317
1
    return SDValue();
17318
64
17319
64
  // Create a shuffle mask that will combine the extracted element with zeros
17320
64
  // and undefs.
17321
64
  int ZextRatio = DestSize / SrcSize;
17322
64
  int NumMaskElts = NumBVOps * ZextRatio;
17323
64
  SmallVector<int, 32> ShufMask(NumMaskElts, -1);
17324
448
  for (int i = 0; i != NumMaskElts; 
++i384
) {
17325
384
    if (i / ZextRatio == ZextElt) {
17326
192
      // The low bits of the (potentially translated) extracted element map to
17327
192
      // the source vector. The high bits map to zero. We will use a zero vector
17328
192
      // as the 2nd source operand of the shuffle, so use the 1st element of
17329
192
      // that vector (mask value is number-of-elements) for the high bits.
17330
192
      if (i % ZextRatio == 0)
17331
64
        ShufMask[i] = Extract.getConstantOperandVal(1);
17332
128
      else
17333
128
        ShufMask[i] = NumMaskElts;
17334
192
    }
17335
384
17336
384
    // Undef elements of the build vector remain undef because we initialize
17337
384
    // the shuffle mask with -1.
17338
384
  }
17339
64
17340
64
  // Turn this into a shuffle with zero if that's legal.
17341
64
  EVT VecVT = Extract.getOperand(0).getValueType();
17342
64
  if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
17343
8
    return SDValue();
17344
56
17345
56
  // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
17346
56
  // bitcast (shuffle V, ZeroVec, VectorMask)
17347
56
  SDLoc DL(BV);
17348
56
  SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
17349
56
  SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
17350
56
                                      ShufMask);
17351
56
  return DAG.getBitcast(VT, Shuf);
17352
56
}
17353
17354
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
17355
// operations. If the types of the vectors we're extracting from allow it,
17356
// turn this into a vector_shuffle node.
17357
516k
SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
17358
516k
  SDLoc DL(N);
17359
516k
  EVT VT = N->getValueType(0);
17360
516k
17361
516k
  // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
17362
516k
  if (!isTypeLegal(VT))
17363
0
    return SDValue();
17364
516k
17365
516k
  if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
17366
56
    return V;
17367
516k
17368
516k
  // May only combine to shuffle after legalize if shuffle is legal.
17369
516k
  if (LegalOperations && 
!TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)238k
)
17370
237k
    return SDValue();
17371
278k
17372
278k
  bool UsesZeroVector = false;
17373
278k
  unsigned NumElems = N->getNumOperands();
17374
278k
17375
278k
  // Record, for each element of the newly built vector, which input vector
17376
278k
  // that element comes from. -1 stands for undef, 0 for the zero vector,
17377
278k
  // and positive values for the input vectors.
17378
278k
  // VectorMask maps each element to its vector number, and VecIn maps vector
17379
278k
  // numbers to their initial SDValues.
17380
278k
17381
278k
  SmallVector<int, 8> VectorMask(NumElems, -1);
17382
278k
  SmallVector<SDValue, 8> VecIn;
17383
278k
  VecIn.push_back(SDValue());
17384
278k
17385
859k
  for (unsigned i = 0; i != NumElems; 
++i581k
) {
17386
805k
    SDValue Op = N->getOperand(i);
17387
805k
17388
805k
    if (Op.isUndef())
17389
61.7k
      continue;
17390
743k
17391
743k
    // See if we can use a blend with a zero vector.
17392
743k
    // TODO: Should we generalize this to a blend with an arbitrary constant
17393
743k
    // vector?
17394
743k
    if (isNullConstant(Op) || 
isNullFPConstant(Op)334k
) {
17395
437k
      UsesZeroVector = true;
17396
437k
      VectorMask[i] = 0;
17397
437k
      continue;
17398
437k
    }
17399
305k
17400
305k
    // Not an undef or zero. If the input is something other than an
17401
305k
    // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
17402
305k
    if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
17403
305k
        
!isa<ConstantSDNode>(Op.getOperand(1))93.6k
)
17404
212k
      return SDValue();
17405
93.2k
    SDValue ExtractedFromVec = Op.getOperand(0);
17406
93.2k
17407
93.2k
    const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
17408
93.2k
    if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
17409
2
      return SDValue();
17410
93.2k
17411
93.2k
    // All inputs must have the same element type as the output.
17412
93.2k
    if (VT.getVectorElementType() !=
17413
93.2k
        ExtractedFromVec.getValueType().getVectorElementType())
17414
11.3k
      return SDValue();
17415
81.9k
17416
81.9k
    // Have we seen this input vector before?
17417
81.9k
    // The vectors are expected to be tiny (usually 1 or 2 elements), so using
17418
81.9k
    // a map back from SDValues to numbers isn't worth it.
17419
81.9k
    unsigned Idx = std::distance(
17420
81.9k
        VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
17421
81.9k
    if (Idx == VecIn.size())
17422
17.2k
      VecIn.push_back(ExtractedFromVec);
17423
81.9k
17424
81.9k
    VectorMask[i] = Idx;
17425
81.9k
  }
17426
278k
17427
278k
  // If we didn't find at least one input vector, bail out.
17428
278k
  
if (54.7k
VecIn.size() < 254.7k
)
17429
41.9k
    return SDValue();
17430
12.8k
17431
12.8k
  // If all the Operands of BUILD_VECTOR extract from same
17432
12.8k
  // vector, then split the vector efficiently based on the maximum
17433
12.8k
  // vector access index and adjust the VectorMask and
17434
12.8k
  // VecIn accordingly.
17435
12.8k
  bool DidSplitVec = false;
17436
12.8k
  if (VecIn.size() == 2) {
17437
10.6k
    unsigned MaxIndex = 0;
17438
10.6k
    unsigned NearestPow2 = 0;
17439
10.6k
    SDValue Vec = VecIn.back();
17440
10.6k
    EVT InVT = Vec.getValueType();
17441
10.6k
    MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17442
10.6k
    SmallVector<unsigned, 8> IndexVec(NumElems, 0);
17443
10.6k
17444
119k
    for (unsigned i = 0; i < NumElems; 
i++109k
) {
17445
109k
      if (VectorMask[i] <= 0)
17446
50.7k
        continue;
17447
58.3k
      unsigned Index = N->getOperand(i).getConstantOperandVal(1);
17448
58.3k
      IndexVec[i] = Index;
17449
58.3k
      MaxIndex = std::max(MaxIndex, Index);
17450
58.3k
    }
17451
10.6k
17452
10.6k
    NearestPow2 = PowerOf2Ceil(MaxIndex);
17453
10.6k
    if (InVT.isSimple() && 
NearestPow2 > 210.4k
&&
MaxIndex < NearestPow27.05k
&&
17454
10.6k
        
NumElems * 2 < NearestPow26.50k
) {
17455
596
      unsigned SplitSize = NearestPow2 / 2;
17456
596
      EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
17457
596
                                     InVT.getVectorElementType(), SplitSize);
17458
596
      if (TLI.isTypeLegal(SplitVT)) {
17459
410
        SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
17460
410
                                     DAG.getConstant(SplitSize, DL, IdxTy));
17461
410
        SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
17462
410
                                     DAG.getConstant(0, DL, IdxTy));
17463
410
        VecIn.pop_back();
17464
410
        VecIn.push_back(VecIn1);
17465
410
        VecIn.push_back(VecIn2);
17466
410
        DidSplitVec = true;
17467
410
17468
3.58k
        for (unsigned i = 0; i < NumElems; 
i++3.17k
) {
17469
3.17k
          if (VectorMask[i] <= 0)
17470
216
            continue;
17471
2.95k
          VectorMask[i] = (IndexVec[i] < SplitSize) ? 
11.47k
:
21.48k
;
17472
2.95k
        }
17473
410
      }
17474
596
    }
17475
10.6k
  }
17476
12.8k
17477
12.8k
  // TODO: We want to sort the vectors by descending length, so that adjacent
17478
12.8k
  // pairs have similar length, and the longer vector is always first in the
17479
12.8k
  // pair.
17480
12.8k
17481
12.8k
  // TODO: Should this fire if some of the input vectors has illegal type (like
17482
12.8k
  // it does now), or should we let legalization run its course first?
17483
12.8k
17484
12.8k
  // Shuffle phase:
17485
12.8k
  // Take pairs of vectors, and shuffle them so that the result has elements
17486
12.8k
  // from these vectors in the correct places.
17487
12.8k
  // For example, given:
17488
12.8k
  // t10: i32 = extract_vector_elt t1, Constant:i64<0>
17489
12.8k
  // t11: i32 = extract_vector_elt t2, Constant:i64<0>
17490
12.8k
  // t12: i32 = extract_vector_elt t3, Constant:i64<0>
17491
12.8k
  // t13: i32 = extract_vector_elt t1, Constant:i64<1>
17492
12.8k
  // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
17493
12.8k
  // We will generate:
17494
12.8k
  // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
17495
12.8k
  // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
17496
12.8k
  SmallVector<SDValue, 4> Shuffles;
17497
25.0k
  for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; 
++In12.2k
) {
17498
13.4k
    unsigned LeftIdx = 2 * In + 1;
17499
13.4k
    SDValue VecLeft = VecIn[LeftIdx];
17500
13.4k
    SDValue VecRight =
17501
13.4k
        (LeftIdx + 1) < VecIn.size() ? 
VecIn[LeftIdx + 1]3.12k
:
SDValue()10.3k
;
17502
13.4k
17503
13.4k
    if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
17504
12.2k
                                                VecRight, LeftIdx, DidSplitVec))
17505
12.2k
      Shuffles.push_back(Shuffle);
17506
1.23k
    else
17507
1.23k
      return SDValue();
17508
13.4k
  }
17509
12.8k
17510
12.8k
  // If we need the zero vector as an "ingredient" in the blend tree, add it
17511
12.8k
  // to the list of shuffles.
17512
12.8k
  
if (11.5k
UsesZeroVector11.5k
)
17513
722
    Shuffles.push_back(VT.isInteger() ? 
DAG.getConstant(0, DL, VT)523
17514
722
                                      : 
DAG.getConstantFP(0.0, DL, VT)199
);
17515
11.5k
17516
11.5k
  // If we only have one shuffle, we're done.
17517
11.5k
  if (Shuffles.size() == 1)
17518
10.2k
    return Shuffles[0];
17519
1.35k
17520
1.35k
  // Update the vector mask to point to the post-shuffle vectors.
17521
1.35k
  for (int &Vec : VectorMask)
17522
13.2k
    if (Vec == 0)
17523
2.79k
      Vec = Shuffles.size() - 1;
17524
10.4k
    else
17525
10.4k
      Vec = (Vec - 1) / 2;
17526
1.35k
17527
1.35k
  // More than one shuffle. Generate a binary tree of blends, e.g. if from
17528
1.35k
  // the previous step we got the set of shuffles t10, t11, t12, t13, we will
17529
1.35k
  // generate:
17530
1.35k
  // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
17531
1.35k
  // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
17532
1.35k
  // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
17533
1.35k
  // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
17534
1.35k
  // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
17535
1.35k
  // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
17536
1.35k
  // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
17537
1.35k
17538
1.35k
  // Make sure the initial size of the shuffle list is even.
17539
1.35k
  if (Shuffles.size() % 2)
17540
5
    Shuffles.push_back(DAG.getUNDEF(VT));
17541
1.35k
17542
2.72k
  for (unsigned CurSize = Shuffles.size(); CurSize > 1; 
CurSize /= 21.36k
) {
17543
1.36k
    if (CurSize % 2) {
17544
0
      Shuffles[CurSize] = DAG.getUNDEF(VT);
17545
0
      CurSize++;
17546
0
    }
17547
2.73k
    for (unsigned In = 0, Len = CurSize / 2; In < Len; 
++In1.36k
) {
17548
1.36k
      int Left = 2 * In;
17549
1.36k
      int Right = 2 * In + 1;
17550
1.36k
      SmallVector<int, 8> Mask(NumElems, -1);
17551
14.7k
      for (unsigned i = 0; i != NumElems; 
++i13.4k
) {
17552
13.4k
        if (VectorMask[i] == Left) {
17553
6.48k
          Mask[i] = i;
17554
6.48k
          VectorMask[i] = In;
17555
6.91k
        } else if (VectorMask[i] == Right) {
17556
6.72k
          Mask[i] = i + NumElems;
17557
6.72k
          VectorMask[i] = In;
17558
6.72k
        }
17559
13.4k
      }
17560
1.36k
17561
1.36k
      Shuffles[In] =
17562
1.36k
          DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
17563
1.36k
    }
17564
1.36k
  }
17565
1.35k
  return Shuffles[0];
17566
1.35k
}
17567
17568
// Try to turn a build vector of zero extends of extract vector elts into a
17569
// a vector zero extend and possibly an extract subvector.
17570
// TODO: Support sign extend?
17571
// TODO: Allow undef elements?
17572
518k
SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
17573
518k
  if (LegalOperations)
17574
239k
    return SDValue();
17575
279k
17576
279k
  EVT VT = N->getValueType(0);
17577
279k
17578
279k
  bool FoundZeroExtend = false;
17579
279k
  SDValue Op0 = N->getOperand(0);
17580
279k
  auto checkElem = [&](SDValue Op) -> int64_t {
17581
279k
    unsigned Opc = Op.getOpcode();
17582
279k
    FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
17583
279k
    if ((Opc == ISD::ZERO_EXTEND || 
Opc == ISD::ANY_EXTEND278k
) &&
17584
279k
        
Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT2.94k
&&
17585
279k
        
Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0)1.42k
)
17586
1.42k
      if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
17587
1.42k
        return C->getZExtValue();
17588
278k
    return -1;
17589
278k
  };
17590
279k
17591
279k
  // Make sure the first element matches
17592
279k
  // (zext (extract_vector_elt X, C))
17593
279k
  int64_t Offset = checkElem(Op0);
17594
279k
  if (Offset < 0)
17595
278k
    return SDValue();
17596
1.39k
17597
1.39k
  unsigned NumElems = N->getNumOperands();
17598
1.39k
  SDValue In = Op0.getOperand(0).getOperand(0);
17599
1.39k
  EVT InSVT = In.getValueType().getScalarType();
17600
1.39k
  EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
17601
1.39k
17602
1.39k
  // Don't create an illegal input type after type legalization.
17603
1.39k
  if (LegalTypes && 
!TLI.isTypeLegal(InVT)1.30k
)
17604
1.30k
    return SDValue();
17605
88
17606
88
  // Ensure all the elements come from the same vector and are adjacent.
17607
117
  
for (unsigned i = 1; 88
i != NumElems;
++i29
) {
17608
101
    if ((Offset + i) != checkElem(N->getOperand(i)))
17609
72
      return SDValue();
17610
101
  }
17611
88
17612
88
  SDLoc DL(N);
17613
16
  In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
17614
16
                   Op0.getOperand(0).getOperand(1));
17615
16
  return DAG.getNode(FoundZeroExtend ? 
ISD::ZERO_EXTEND14
:
ISD::ANY_EXTEND2
, DL,
17616
16
                     VT, In);
17617
88
}
17618
17619
518k
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
17620
518k
  EVT VT = N->getValueType(0);
17621
518k
17622
518k
  // A vector built entirely of undefs is undef.
17623
518k
  if (ISD::allOperandsUndef(N))
17624
6
    return DAG.getUNDEF(VT);
17625
518k
17626
518k
  // If this is a splat of a bitcast from another vector, change to a
17627
518k
  // concat_vector.
17628
518k
  // For example:
17629
518k
  //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
17630
518k
  //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
17631
518k
  //
17632
518k
  // If X is a build_vector itself, the concat can become a larger build_vector.
17633
518k
  // TODO: Maybe this is useful for non-splat too?
17634
518k
  if (!LegalOperations) {
17635
279k
    if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
17636
134k
      Splat = peekThroughBitcasts(Splat);
17637
134k
      EVT SrcVT = Splat.getValueType();
17638
134k
      if (SrcVT.isVector()) {
17639
9
        unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
17640
9
        EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
17641
9
                                     SrcVT.getVectorElementType(), NumElts);
17642
9
        if (!LegalTypes || 
TLI.isTypeLegal(NewVT)3
) {
17643
8
          SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
17644
8
          SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
17645
8
                                       NewVT, Ops);
17646
8
          return DAG.getBitcast(VT, Concat);
17647
8
        }
17648
518k
      }
17649
134k
    }
17650
279k
  }
17651
518k
17652
518k
  // Check if we can express BUILD VECTOR via subvector extract.
17653
518k
  if (!LegalTypes && 
(N->getNumOperands() > 1)147k
) {
17654
145k
    SDValue Op0 = N->getOperand(0);
17655
299k
    auto checkElem = [&](SDValue Op) -> uint64_t {
17656
299k
      if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
17657
299k
          
(Op0.getOperand(0) == Op.getOperand(0))15.6k
)
17658
15.5k
        if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
17659
14.9k
          return CNode->getZExtValue();
17660
284k
      return -1;
17661
284k
    };
17662
145k
17663
145k
    int Offset = checkElem(Op0);
17664
154k
    for (unsigned i = 0; i < N->getNumOperands(); 
++i9.11k
) {
17665
154k
      if (Offset + i != checkElem(N->getOperand(i))) {
17666
145k
        Offset = -1;
17667
145k
        break;
17668
145k
      }
17669
154k
    }
17670
145k
17671
145k
    if ((Offset == 0) &&
17672
145k
        
(Op0.getOperand(0).getValueType() == N->getValueType(0))46
)
17673
0
      return Op0.getOperand(0);
17674
145k
    if ((Offset != -1) &&
17675
145k
        ((Offset % N->getValueType(0).getVectorNumElements()) ==
17676
88
         0)) // IDX must be multiple of output size.
17677
69
      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
17678
69
                         Op0.getOperand(0), Op0.getOperand(1));
17679
518k
  }
17680
518k
17681
518k
  if (SDValue V = convertBuildVecZextToZext(N))
17682
16
    return V;
17683
518k
17684
518k
  if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
17685
2.28k
    return V;
17686
516k
17687
516k
  if (SDValue V = reduceBuildVecToShuffle(N))
17688
11.6k
    return V;
17689
504k
17690
504k
  return SDValue();
17691
504k
}
17692
17693
51.0k
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
17694
51.0k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17695
51.0k
  EVT OpVT = N->getOperand(0).getValueType();
17696
51.0k
17697
51.0k
  // If the operands are legal vectors, leave them alone.
17698
51.0k
  if (TLI.isTypeLegal(OpVT))
17699
45.3k
    return SDValue();
17700
5.68k
17701
5.68k
  SDLoc DL(N);
17702
5.68k
  EVT VT = N->getValueType(0);
17703
5.68k
  SmallVector<SDValue, 8> Ops;
17704
5.68k
17705
5.68k
  EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
17706
5.68k
  SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17707
5.68k
17708
5.68k
  // Keep track of what we encounter.
17709
5.68k
  bool AnyInteger = false;
17710
5.68k
  bool AnyFP = false;
17711
5.74k
  for (const SDValue &Op : N->ops()) {
17712
5.74k
    if (ISD::BITCAST == Op.getOpcode() &&
17713
5.74k
        
!Op.getOperand(0).getValueType().isVector()250
)
17714
66
      Ops.push_back(Op.getOperand(0));
17715
5.68k
    else if (ISD::UNDEF == Op.getOpcode())
17716
25
      Ops.push_back(ScalarUndef);
17717
5.65k
    else
17718
5.65k
      return SDValue();
17719
91
17720
91
    // Note whether we encounter an integer or floating point scalar.
17721
91
    // If it's neither, bail out, it could be something weird like x86mmx.
17722
91
    EVT LastOpVT = Ops.back().getValueType();
17723
91
    if (LastOpVT.isFloatingPoint())
17724
9
      AnyFP = true;
17725
82
    else if (LastOpVT.isInteger())
17726
79
      AnyInteger = true;
17727
3
    else
17728
3
      return SDValue();
17729
91
  }
17730
5.68k
17731
5.68k
  // If any of the operands is a floating point scalar bitcast to a vector,
17732
5.68k
  // use floating point types throughout, and bitcast everything.
17733
5.68k
  // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
17734
5.68k
  
if (26
AnyFP26
) {
17735
3
    SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
17736
3
    ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17737
3
    if (AnyInteger) {
17738
8
      for (SDValue &Op : Ops) {
17739
8
        if (Op.getValueType() == SVT)
17740
5
          continue;
17741
3
        if (Op.isUndef())
17742
1
          Op = ScalarUndef;
17743
2
        else
17744
2
          Op = DAG.getBitcast(SVT, Op);
17745
3
      }
17746
2
    }
17747
3
  }
17748
26
17749
26
  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
17750
26
                               VT.getSizeInBits() / SVT.getSizeInBits());
17751
26
  return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
17752
5.68k
}
17753
17754
// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
17755
// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
17756
// most two distinct vectors the same size as the result, attempt to turn this
17757
// into a legal shuffle.
17758
19.2k
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
17759
19.2k
  EVT VT = N->getValueType(0);
17760
19.2k
  EVT OpVT = N->getOperand(0).getValueType();
17761
19.2k
  int NumElts = VT.getVectorNumElements();
17762
19.2k
  int NumOpElts = OpVT.getVectorNumElements();
17763
19.2k
17764
19.2k
  SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
17765
19.2k
  SmallVector<int, 8> Mask;
17766
19.2k
17767
21.7k
  for (SDValue Op : N->ops()) {
17768
21.7k
    Op = peekThroughBitcasts(Op);
17769
21.7k
17770
21.7k
    // UNDEF nodes convert to UNDEF shuffle mask values.
17771
21.7k
    if (Op.isUndef()) {
17772
2.33k
      Mask.append((unsigned)NumOpElts, -1);
17773
2.33k
      continue;
17774
2.33k
    }
17775
19.4k
17776
19.4k
    if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17777
17.6k
      return SDValue();
17778
1.76k
17779
1.76k
    // What vector are we extracting the subvector from and at what index?
17780
1.76k
    SDValue ExtVec = Op.getOperand(0);
17781
1.76k
17782
1.76k
    // We want the EVT of the original extraction to correctly scale the
17783
1.76k
    // extraction index.
17784
1.76k
    EVT ExtVT = ExtVec.getValueType();
17785
1.76k
    ExtVec = peekThroughBitcasts(ExtVec);
17786
1.76k
17787
1.76k
    // UNDEF nodes convert to UNDEF shuffle mask values.
17788
1.76k
    if (ExtVec.isUndef()) {
17789
0
      Mask.append((unsigned)NumOpElts, -1);
17790
0
      continue;
17791
0
    }
17792
1.76k
17793
1.76k
    if (!isa<ConstantSDNode>(Op.getOperand(1)))
17794
0
      return SDValue();
17795
1.76k
    int ExtIdx = Op.getConstantOperandVal(1);
17796
1.76k
17797
1.76k
    // Ensure that we are extracting a subvector from a vector the same
17798
1.76k
    // size as the result.
17799
1.76k
    if (ExtVT.getSizeInBits() != VT.getSizeInBits())
17800
424
      return SDValue();
17801
1.33k
17802
1.33k
    // Scale the subvector index to account for any bitcast.
17803
1.33k
    int NumExtElts = ExtVT.getVectorNumElements();
17804
1.33k
    if (0 == (NumExtElts % NumElts))
17805
1.31k
      ExtIdx /= (NumExtElts / NumElts);
17806
21
    else if (0 == (NumElts % NumExtElts))
17807
21
      ExtIdx *= (NumElts / NumExtElts);
17808
0
    else
17809
0
      return SDValue();
17810
1.33k
17811
1.33k
    // At most we can reference 2 inputs in the final shuffle.
17812
1.33k
    if (SV0.isUndef() || 
SV0 == ExtVec143
) {
17813
1.20k
      SV0 = ExtVec;
17814
6.08k
      for (int i = 0; i != NumOpElts; 
++i4.88k
)
17815
4.88k
        Mask.push_back(i + ExtIdx);
17816
1.20k
    } else 
if (133
SV1.isUndef()133
||
SV1 == ExtVec0
) {
17817
133
      SV1 = ExtVec;
17818
878
      for (int i = 0; i != NumOpElts; 
++i745
)
17819
745
        Mask.push_back(i + ExtIdx + NumElts);
17820
133
    } else {
17821
0
      return SDValue();
17822
0
    }
17823
1.33k
  }
17824
19.2k
17825
19.2k
  
if (1.17k
!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)1.17k
)
17826
15
    return SDValue();
17827
1.15k
17828
1.15k
  return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
17829
1.15k
                              DAG.getBitcast(VT, SV1), Mask);
17830
1.15k
}
17831
17832
55.1k
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
17833
55.1k
  // If we only have one input vector, we don't need to do any concatenation.
17834
55.1k
  if (N->getNumOperands() == 1)
17835
0
    return N->getOperand(0);
17836
55.1k
17837
55.1k
  // Check if all of the operands are undefs.
17838
55.1k
  EVT VT = N->getValueType(0);
17839
55.1k
  if (ISD::allOperandsUndef(N))
17840
1
    return DAG.getUNDEF(VT);
17841
55.1k
17842
55.1k
  // Optimize concat_vectors where all but the first of the vectors are undef.
17843
65.1k
  
if (55.1k
std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) 55.1k
{
17844
65.1k
        return Op.isUndef();
17845
65.1k
      })) {
17846
10.8k
    SDValue In = N->getOperand(0);
17847
10.8k
    assert(In.getValueType().isVector() && "Must concat vectors");
17848
10.8k
17849
10.8k
    SDValue Scalar = peekThroughOneUseBitcasts(In);
17850
10.8k
17851
10.8k
    // concat_vectors(scalar_to_vector(scalar), undef) ->
17852
10.8k
    //     scalar_to_vector(scalar)
17853
10.8k
    if (!LegalOperations && 
Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR9.02k
&&
17854
10.8k
         
Scalar.hasOneUse()31
) {
17855
31
      EVT SVT = Scalar.getValueType().getVectorElementType();
17856
31
      if (SVT == Scalar.getOperand(0).getValueType())
17857
31
        Scalar = Scalar.getOperand(0);
17858
31
    }
17859
10.8k
17860
10.8k
    // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
17861
10.8k
    if (!Scalar.getValueType().isVector()) {
17862
156
      // If the bitcast type isn't legal, it might be a trunc of a legal type;
17863
156
      // look through the trunc so we can still do the transform:
17864
156
      //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
17865
156
      if (Scalar->getOpcode() == ISD::TRUNCATE &&
17866
156
          
!TLI.isTypeLegal(Scalar.getValueType())6
&&
17867
156
          
TLI.isTypeLegal(Scalar->getOperand(0).getValueType())4
)
17868
4
        Scalar = Scalar->getOperand(0);
17869
156
17870
156
      EVT SclTy = Scalar.getValueType();
17871
156
17872
156
      if (!SclTy.isFloatingPoint() && 
!SclTy.isInteger()73
)
17873
17
        return SDValue();
17874
139
17875
139
      // Bail out if the vector size is not a multiple of the scalar size.
17876
139
      if (VT.getSizeInBits() % SclTy.getSizeInBits())
17877
3
        return SDValue();
17878
136
17879
136
      unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
17880
136
      if (VNTNumElms < 2)
17881
0
        return SDValue();
17882
136
17883
136
      EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
17884
136
      if (!TLI.isTypeLegal(NVT) || 
!TLI.isTypeLegal(Scalar.getValueType())130
)
17885
7
        return SDValue();
17886
129
17887
129
      SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
17888
129
      return DAG.getBitcast(VT, Res);
17889
129
    }
17890
10.8k
  }
17891
54.9k
17892
54.9k
  // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
17893
54.9k
  // We have already tested above for an UNDEF only concatenation.
17894
54.9k
  // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
17895
54.9k
  // -> (BUILD_VECTOR A, B, ..., C, D, ...)
17896
61.6k
  
auto IsBuildVectorOrUndef = [](const SDValue &Op) 54.9k
{
17897
61.6k
    return ISD::UNDEF == Op.getOpcode() || 
ISD::BUILD_VECTOR == Op.getOpcode()58.8k
;
17898
61.6k
  };
17899
54.9k
  if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
17900
3.91k
    SmallVector<SDValue, 8> Opnds;
17901
3.91k
    EVT SVT = VT.getScalarType();
17902
3.91k
17903
3.91k
    EVT MinVT = SVT;
17904
3.91k
    if (!SVT.isFloatingPoint()) {
17905
3.52k
      // If BUILD_VECTOR are from built from integer, they may have different
17906
3.52k
      // operand types. Get the smallest type and truncate all operands to it.
17907
3.52k
      bool FoundMinVT = false;
17908
3.52k
      for (const SDValue &Op : N->ops())
17909
8.70k
        if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17910
6.77k
          EVT OpSVT = Op.getOperand(0).getValueType();
17911
6.77k
          MinVT = (!FoundMinVT || 
OpSVT.bitsLE(MinVT)3.25k
) ? OpSVT :
MinVT0
;
17912
6.77k
          FoundMinVT = true;
17913
6.77k
        }
17914
3.52k
      assert(FoundMinVT && "Concat vector type mismatch");
17915
3.52k
    }
17916
3.91k
17917
9.66k
    for (const SDValue &Op : N->ops()) {
17918
9.66k
      EVT OpVT = Op.getValueType();
17919
9.66k
      unsigned NumElts = OpVT.getVectorNumElements();
17920
9.66k
17921
9.66k
      if (ISD::UNDEF == Op.getOpcode())
17922
2.22k
        Opnds.append(NumElts, DAG.getUNDEF(MinVT));
17923
9.66k
17924
9.66k
      if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17925
7.43k
        if (SVT.isFloatingPoint()) {
17926
656
          assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
17927
656
          Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
17928
6.77k
        } else {
17929
29.6k
          for (unsigned i = 0; i != NumElts; 
++i22.9k
)
17930
22.9k
            Opnds.push_back(
17931
22.9k
                DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
17932
6.77k
        }
17933
7.43k
      }
17934
9.66k
    }
17935
3.91k
17936
3.91k
    assert(VT.getVectorNumElements() == Opnds.size() &&
17937
3.91k
           "Concat vector type mismatch");
17938
3.91k
    return DAG.getBuildVector(VT, SDLoc(N), Opnds);
17939
3.91k
  }
17940
51.0k
17941
51.0k
  // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
17942
51.0k
  if (SDValue V = combineConcatVectorOfScalars(N, DAG))
17943
26
    return V;
17944
51.0k
17945
51.0k
  // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
17946
51.0k
  if (Level < AfterLegalizeVectorOps && 
TLI.isTypeLegal(VT)37.6k
)
17947
19.2k
    if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
17948
1.15k
      return V;
17949
49.8k
17950
49.8k
  // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
17951
49.8k
  // nodes often generate nop CONCAT_VECTOR nodes.
17952
49.8k
  // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
17953
49.8k
  // place the incoming vectors at the exact same location.
17954
49.8k
  SDValue SingleSource = SDValue();
17955
49.8k
  unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
17956
49.8k
17957
50.7k
  for (unsigned i = 0, e = N->getNumOperands(); i != e; 
++i895
) {
17958
50.7k
    SDValue Op = N->getOperand(i);
17959
50.7k
17960
50.7k
    if (Op.isUndef())
17961
607
      continue;
17962
50.1k
17963
50.1k
    // Check if this is the identity extract:
17964
50.1k
    if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17965
49.0k
      return SDValue();
17966
1.08k
17967
1.08k
    // Find the single incoming vector for the extract_subvector.
17968
1.08k
    if (SingleSource.getNode()) {
17969
252
      if (Op.getOperand(0) != SingleSource)
17970
251
        return SDValue();
17971
829
    } else {
17972
829
      SingleSource = Op.getOperand(0);
17973
829
17974
829
      // Check the source type is the same as the type of the result.
17975
829
      // If not, this concat may extend the vector, so we can not
17976
829
      // optimize it away.
17977
829
      if (SingleSource.getValueType() != N->getValueType(0))
17978
532
        return SDValue();
17979
298
    }
17980
298
17981
298
    auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17982
298
    // The extract index must be constant.
17983
298
    if (!CS)
17984
0
      return SDValue();
17985
298
17986
298
    // Check that we are reading from the identity index.
17987
298
    unsigned IdentityIndex = i * PartNumElem;
17988
298
    if (CS->getAPIntValue() != IdentityIndex)
17989
10
      return SDValue();
17990
298
  }
17991
49.8k
17992
49.8k
  
if (1
SingleSource.getNode()1
)
17993
1
    return SingleSource;
17994
0
17995
0
  return SDValue();
17996
0
}
17997
17998
// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
17999
// if the subvector can be sourced for free.
18000
72.5k
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
18001
72.5k
  if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
18002
72.5k
      
V.getOperand(1).getValueType() == SubVT3.70k
&&
V.getOperand(2) == Index3.04k
) {
18003
2.98k
    return V.getOperand(1);
18004
2.98k
  }
18005
69.5k
  auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18006
69.5k
  if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
18007
69.5k
      
V.getOperand(0).getValueType() == SubVT14.4k
&&
18008
69.5k
      
(IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 013.8k
) {
18009
13.8k
    uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
18010
13.8k
    return V.getOperand(SubIdx);
18011
13.8k
  }
18012
55.6k
  return SDValue();
18013
55.6k
}
18014
18015
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
18016
153k
                                              SelectionDAG &DAG) {
18017
153k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18018
153k
  SDValue BinOp = Extract->getOperand(0);
18019
153k
  unsigned BinOpcode = BinOp.getOpcode();
18020
153k
  if (!TLI.isBinOp(BinOpcode) || 
BinOp.getNode()->getNumValues() != 136.2k
)
18021
116k
    return SDValue();
18022
36.2k
18023
36.2k
  SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
18024
36.2k
  SDValue Index = Extract->getOperand(1);
18025
36.2k
  EVT SubVT = Extract->getValueType(0);
18026
36.2k
  SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
18027
36.2k
  SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
18028
36.2k
18029
36.2k
  // TODO: We could handle the case where only 1 operand is being inserted by
18030
36.2k
  //       creating an extract of the other operand, but that requires checking
18031
36.2k
  //       number of uses and/or costs.
18032
36.2k
  if (!Sub0 || 
!Sub111.0k
||
!TLI.isOperationLegalOrCustom(BinOpcode, SubVT)2.14k
)
18033
34.2k
    return SDValue();
18034
1.96k
18035
1.96k
  // We are inserting both operands of the wide binop only to extract back
18036
1.96k
  // to the narrow vector size. Eliminate all of the insert/extract:
18037
1.96k
  // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
18038
1.96k
  return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
18039
1.96k
                     BinOp->getFlags());
18040
1.96k
}
18041
18042
/// If we are extracting a subvector produced by a wide binary operator try
18043
/// to use a narrow binary operator and/or avoid concatenation and extraction.
18044
153k
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
18045
153k
  // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
18046
153k
  // some of these bailouts with other transforms.
18047
153k
18048
153k
  if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
18049
1.96k
    return V;
18050
151k
18051
151k
  // The extract index must be a constant, so we can map it to a concat operand.
18052
151k
  auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
18053
151k
  if (!ExtractIndexC)
18054
0
    return SDValue();
18055
151k
18056
151k
  // We are looking for an optionally bitcasted wide vector binary operator
18057
151k
  // feeding an extract subvector.
18058
151k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18059
151k
  SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
18060
151k
  unsigned BOpcode = BinOp.getOpcode();
18061
151k
  if (!TLI.isBinOp(BOpcode) || 
BinOp.getNode()->getNumValues() != 134.6k
)
18062
116k
    return SDValue();
18063
34.6k
18064
34.6k
  // The binop must be a vector type, so we can extract some fraction of it.
18065
34.6k
  EVT WideBVT = BinOp.getValueType();
18066
34.6k
  if (!WideBVT.isVector())
18067
68
    return SDValue();
18068
34.5k
18069
34.5k
  EVT VT = Extract->getValueType(0);
18070
34.5k
  unsigned ExtractIndex = ExtractIndexC->getZExtValue();
18071
34.5k
  assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
18072
34.5k
         "Extract index is not a multiple of the vector length.");
18073
34.5k
18074
34.5k
  // Bail out if this is not a proper multiple width extraction.
18075
34.5k
  unsigned WideWidth = WideBVT.getSizeInBits();
18076
34.5k
  unsigned NarrowWidth = VT.getSizeInBits();
18077
34.5k
  if (WideWidth % NarrowWidth != 0)
18078
14
    return SDValue();
18079
34.5k
18080
34.5k
  // Bail out if we are extracting a fraction of a single operation. This can
18081
34.5k
  // occur because we potentially looked through a bitcast of the binop.
18082
34.5k
  unsigned NarrowingRatio = WideWidth / NarrowWidth;
18083
34.5k
  unsigned WideNumElts = WideBVT.getVectorNumElements();
18084
34.5k
  if (WideNumElts % NarrowingRatio != 0)
18085
12
    return SDValue();
18086
34.5k
18087
34.5k
  // Bail out if the target does not support a narrower version of the binop.
18088
34.5k
  EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
18089
34.5k
                                   WideNumElts / NarrowingRatio);
18090
34.5k
  if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
18091
1.48k
    return SDValue();
18092
33.0k
18093
33.0k
  // If extraction is cheap, we don't need to look at the binop operands
18094
33.0k
  // for concat ops. The narrow binop alone makes this transform profitable.
18095
33.0k
  // We can't just reuse the original extract index operand because we may have
18096
33.0k
  // bitcasted.
18097
33.0k
  unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
18098
33.0k
  unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
18099
33.0k
  EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
18100
33.0k
  if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
18101
33.0k
      
BinOp.hasOneUse()28.6k
&&
Extract->getOperand(0)->hasOneUse()5.26k
) {
18102
5.12k
    // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
18103
5.12k
    SDLoc DL(Extract);
18104
5.12k
    SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
18105
5.12k
    SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18106
5.12k
                            BinOp.getOperand(0), NewExtIndex);
18107
5.12k
    SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18108
5.12k
                            BinOp.getOperand(1), NewExtIndex);
18109
5.12k
    SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
18110
5.12k
                                      BinOp.getNode()->getFlags());
18111
5.12k
    return DAG.getBitcast(VT, NarrowBinOp);
18112
5.12k
  }
18113
27.9k
18114
27.9k
  // Only handle the case where we are doubling and then halving. A larger ratio
18115
27.9k
  // may require more than two narrow binops to replace the wide binop.
18116
27.9k
  if (NarrowingRatio != 2)
18117
2.50k
    return SDValue();
18118
25.4k
18119
25.4k
  // TODO: The motivating case for this transform is an x86 AVX1 target. That
18120
25.4k
  // target has temptingly almost legal versions of bitwise logic ops in 256-bit
18121
25.4k
  // flavors, but no other 256-bit integer support. This could be extended to
18122
25.4k
  // handle any binop, but that may require fixing/adding other folds to avoid
18123
25.4k
  // codegen regressions.
18124
25.4k
  if (BOpcode != ISD::AND && 
BOpcode != ISD::OR21.9k
&&
BOpcode != ISD::XOR20.5k
)
18125
19.7k
    return SDValue();
18126
5.71k
18127
5.71k
  // We need at least one concatenation operation of a binop operand to make
18128
5.71k
  // this transform worthwhile. The concat must double the input vector sizes.
18129
11.4k
  
auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue 5.71k
{
18130
11.4k
    if (V.getOpcode() == ISD::CONCAT_VECTORS && 
V.getNumOperands() == 2532
)
18131
532
      return V.getOperand(ConcatOpNum);
18132
10.9k
    return SDValue();
18133
10.9k
  };
18134
5.71k
  SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
18135
5.71k
  SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
18136
5.71k
18137
5.71k
  if (SubVecL || 
SubVecR5.23k
) {
18138
532
    // If a binop operand was not the result of a concat, we must extract a
18139
532
    // half-sized operand for our new narrow binop:
18140
532
    // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
18141
532
    // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
18142
532
    // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
18143
532
    SDLoc DL(Extract);
18144
532
    SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
18145
532
    SDValue X = SubVecL ? 
DAG.getBitcast(NarrowBVT, SubVecL)483
18146
532
                        : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18147
49
                                      BinOp.getOperand(0), IndexC);
18148
532
18149
532
    SDValue Y = SubVecR ? 
DAG.getBitcast(NarrowBVT, SubVecR)49
18150
532
                        : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18151
483
                                      BinOp.getOperand(1), IndexC);
18152
532
18153
532
    SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
18154
532
    return DAG.getBitcast(VT, NarrowBinOp);
18155
532
  }
18156
5.18k
18157
5.18k
  return SDValue();
18158
5.18k
}
18159
18160
/// If we are extracting a subvector from a wide vector load, convert to a
18161
/// narrow load to eliminate the extraction:
18162
/// (extract_subvector (load wide vector)) --> (load narrow vector)
18163
168k
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
18164
168k
  // TODO: Add support for big-endian. The offset calculation must be adjusted.
18165
168k
  if (DAG.getDataLayout().isBigEndian())
18166
566
    return SDValue();
18167
167k
18168
167k
  auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
18169
167k
  auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
18170
167k
  if (!Ld || 
Ld->getExtensionType()13.6k
||
Ld->isVolatile()13.4k
||
!ExtIdx13.4k
)
18171
154k
    return SDValue();
18172
13.4k
18173
13.4k
  // Allow targets to opt-out.
18174
13.4k
  EVT VT = Extract->getValueType(0);
18175
13.4k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18176
13.4k
  if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
18177
9.89k
    return SDValue();
18178
3.54k
18179
3.54k
  // The narrow load will be offset from the base address of the old load if
18180
3.54k
  // we are extracting from something besides index 0 (little-endian).
18181
3.54k
  SDLoc DL(Extract);
18182
3.54k
  SDValue BaseAddr = Ld->getOperand(1);
18183
3.54k
  unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
18184
3.54k
18185
3.54k
  // TODO: Use "BaseIndexOffset" to make this more effective.
18186
3.54k
  SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
18187
3.54k
  MachineFunction &MF = DAG.getMachineFunction();
18188
3.54k
  MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
18189
3.54k
                                                   VT.getStoreSize());
18190
3.54k
  SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
18191
3.54k
  DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
18192
3.54k
  return NewLd;
18193
3.54k
}
18194
18195
176k
SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
18196
176k
  EVT NVT = N->getValueType(0);
18197
176k
  SDValue V = N->getOperand(0);
18198
176k
18199
176k
  // Extract from UNDEF is UNDEF.
18200
176k
  if (V.isUndef())
18201
4
    return DAG.getUNDEF(NVT);
18202
176k
18203
176k
  if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
18204
168k
    if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
18205
3.54k
      return NarrowLoad;
18206
173k
18207
173k
  // Combine an extract of an extract into a single extract_subvector.
18208
173k
  // ext (ext X, C), 0 --> ext X, C
18209
173k
  SDValue Index = N->getOperand(1);
18210
173k
  if (isNullConstant(Index) && 
V.getOpcode() == ISD::EXTRACT_SUBVECTOR97.3k
&&
18211
173k
      
V.hasOneUse()7.47k
&&
isa<ConstantSDNode>(V.getOperand(1))1.47k
) {
18212
1.47k
    if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
18213
1.47k
                                    V.getConstantOperandVal(1)) &&
18214
1.47k
        
TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)1.02k
) {
18215
1.02k
      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
18216
1.02k
                         V.getOperand(1));
18217
1.02k
    }
18218
172k
  }
18219
172k
18220
172k
  // Try to move vector bitcast after extract_subv by scaling extraction index:
18221
172k
  // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
18222
172k
  if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST &&
18223
172k
      
V.getOperand(0).getValueType().isVector()15.3k
) {
18224
5.97k
    SDValue SrcOp = V.getOperand(0);
18225
5.97k
    EVT SrcVT = SrcOp.getValueType();
18226
5.97k
    unsigned SrcNumElts = SrcVT.getVectorNumElements();
18227
5.97k
    unsigned DestNumElts = V.getValueType().getVectorNumElements();
18228
5.97k
    if ((SrcNumElts % DestNumElts) == 0) {
18229
3.33k
      unsigned SrcDestRatio = SrcNumElts / DestNumElts;
18230
3.33k
      unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
18231
3.33k
      EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
18232
3.33k
                                      NewExtNumElts);
18233
3.33k
      if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
18234
3.06k
        unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio;
18235
3.06k
        SDLoc DL(N);
18236
3.06k
        SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
18237
3.06k
        SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
18238
3.06k
                                         V.getOperand(0), NewIndex);
18239
3.06k
        return DAG.getBitcast(NVT, NewExtract);
18240
3.06k
      }
18241
169k
    }
18242
5.97k
    // TODO - handle (DestNumElts % SrcNumElts) == 0
18243
5.97k
  }
18244
169k
18245
169k
  // Combine:
18246
169k
  //    (extract_subvec (concat V1, V2, ...), i)
18247
169k
  // Into:
18248
169k
  //    Vi if possible
18249
169k
  // Only operand 0 is checked as 'concat' assumes all inputs of the same
18250
169k
  // type.
18251
169k
  if (V.getOpcode() == ISD::CONCAT_VECTORS && 
isa<ConstantSDNode>(Index)3.89k
&&
18252
169k
      
V.getOperand(0).getValueType() == NVT3.89k
) {
18253
2.48k
    unsigned Idx = N->getConstantOperandVal(1);
18254
2.48k
    unsigned NumElems = NVT.getVectorNumElements();
18255
2.48k
    assert((Idx % NumElems) == 0 &&
18256
2.48k
           "IDX in concat is not a multiple of the result vector length.");
18257
2.48k
    return V->getOperand(Idx / NumElems);
18258
2.48k
  }
18259
166k
18260
166k
  V = peekThroughBitcasts(V);
18261
166k
18262
166k
  // If the input is a build vector. Try to make a smaller build vector.
18263
166k
  if (V.getOpcode() == ISD::BUILD_VECTOR) {
18264
10.2k
    if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) {
18265
10.2k
      EVT InVT = V.getValueType();
18266
10.2k
      unsigned ExtractSize = NVT.getSizeInBits();
18267
10.2k
      unsigned EltSize = InVT.getScalarSizeInBits();
18268
10.2k
      // Only do this if we won't split any elements.
18269
10.2k
      if (ExtractSize % EltSize == 0) {
18270
10.2k
        unsigned NumElems = ExtractSize / EltSize;
18271
10.2k
        EVT EltVT = InVT.getVectorElementType();
18272
10.2k
        EVT ExtractVT = NumElems == 1 ? 
EltVT274
18273
10.2k
                                      : EVT::getVectorVT(*DAG.getContext(),
18274
10.0k
                                                         EltVT, NumElems);
18275
10.2k
        if ((Level < AfterLegalizeDAG ||
18276
10.2k
             
(496
NumElems == 1496
||
18277
496
              
TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)241
)) &&
18278
10.2k
            
(10.0k
!LegalTypes10.0k
||
TLI.isTypeLegal(ExtractVT)1.79k
)) {
18279
10.0k
          unsigned IdxVal = IdxC->getZExtValue();
18280
10.0k
          IdxVal *= NVT.getScalarSizeInBits();
18281
10.0k
          IdxVal /= EltSize;
18282
10.0k
18283
10.0k
          if (NumElems == 1) {
18284
274
            SDValue Src = V->getOperand(IdxVal);
18285
274
            if (EltVT != Src.getValueType())
18286
0
              Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
18287
274
            return DAG.getBitcast(NVT, Src);
18288
274
          }
18289
9.77k
18290
9.77k
          // Extract the pieces from the original build_vector.
18291
9.77k
          SDValue BuildVec = DAG.getBuildVector(
18292
9.77k
              ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
18293
9.77k
          return DAG.getBitcast(NVT, BuildVec);
18294
9.77k
        }
18295
10.2k
      }
18296
10.2k
    }
18297
10.2k
  }
18298
156k
18299
156k
  if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
18300
3.29k
    // Handle only simple case where vector being inserted and vector
18301
3.29k
    // being extracted are of same size.
18302
3.29k
    EVT SmallVT = V.getOperand(1).getValueType();
18303
3.29k
    if (!NVT.bitsEq(SmallVT))
18304
988
      return SDValue();
18305
2.30k
18306
2.30k
    // Only handle cases where both indexes are constants.
18307
2.30k
    auto *ExtIdx = dyn_cast<ConstantSDNode>(Index);
18308
2.30k
    auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
18309
2.30k
    if (InsIdx && ExtIdx) {
18310
2.30k
      // Combine:
18311
2.30k
      //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
18312
2.30k
      // Into:
18313
2.30k
      //    indices are equal or bit offsets are equal => V1
18314
2.30k
      //    otherwise => (extract_subvec V1, ExtIdx)
18315
2.30k
      if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
18316
2.30k
          ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
18317
1.64k
        return DAG.getBitcast(NVT, V.getOperand(1));
18318
654
      return DAG.getNode(
18319
654
          ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
18320
654
          DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
18321
654
          Index);
18322
654
    }
18323
2.30k
  }
18324
153k
18325
153k
  if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
18326
7.61k
    return NarrowBOp;
18327
145k
18328
145k
  if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18329
1.03k
    return SDValue(N, 0);
18330
144k
18331
144k
  return SDValue();
18332
144k
}
18333
18334
/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
18335
/// followed by concatenation. Narrow vector ops may have better performance
18336
/// than wide ops, and this can unlock further narrowing of other vector ops.
18337
/// Targets can invert this transform later if it is not profitable.
18338
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
18339
107k
                                         SelectionDAG &DAG) {
18340
107k
  SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
18341
107k
  if (N0.getOpcode() != ISD::CONCAT_VECTORS || 
N0.getNumOperands() != 22.16k
||
18342
107k
      
N1.getOpcode() != ISD::CONCAT_VECTORS1.72k
||
N1.getNumOperands() != 2342
||
18343
107k
      
!N0.getOperand(1).isUndef()342
||
!N1.getOperand(1).isUndef()242
)
18344
107k
    return SDValue();
18345
236
18346
236
  // Split the wide shuffle mask into halves. Any mask element that is accessing
18347
236
  // operand 1 is offset down to account for narrowing of the vectors.
18348
236
  ArrayRef<int> Mask = Shuf->getMask();
18349
236
  EVT VT = Shuf->getValueType(0);
18350
236
  unsigned NumElts = VT.getVectorNumElements();
18351
236
  unsigned HalfNumElts = NumElts / 2;
18352
236
  SmallVector<int, 16> Mask0(HalfNumElts, -1);
18353
236
  SmallVector<int, 16> Mask1(HalfNumElts, -1);
18354
4.77k
  for (unsigned i = 0; i != NumElts; 
++i4.53k
) {
18355
4.53k
    if (Mask[i] == -1)
18356
579
      continue;
18357
3.95k
    int M = Mask[i] < (int)NumElts ? 
Mask[i]2.24k
:
Mask[i] - (int)HalfNumElts1.71k
;
18358
3.95k
    if (i < HalfNumElts)
18359
2.22k
      Mask0[i] = M;
18360
1.73k
    else
18361
1.73k
      Mask1[i - HalfNumElts] = M;
18362
3.95k
  }
18363
236
18364
236
  // Ask the target if this is a valid transform.
18365
236
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18366
236
  EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
18367
236
                                HalfNumElts);
18368
236
  if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
18369
236
      
!TLI.isShuffleMaskLegal(Mask1, HalfVT)146
)
18370
90
    return SDValue();
18371
146
18372
146
  // shuffle (concat X, undef), (concat Y, undef), Mask -->
18373
146
  // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
18374
146
  SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
18375
146
  SDLoc DL(Shuf);
18376
146
  SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
18377
146
  SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
18378
146
  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
18379
146
}
18380
18381
// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
18382
// or turn a shuffle of a single concat into simpler shuffle then concat.
18383
3.38k
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
18384
3.38k
  EVT VT = N->getValueType(0);
18385
3.38k
  unsigned NumElts = VT.getVectorNumElements();
18386
3.38k
18387
3.38k
  SDValue N0 = N->getOperand(0);
18388
3.38k
  SDValue N1 = N->getOperand(1);
18389
3.38k
  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
18390
3.38k
  ArrayRef<int> Mask = SVN->getMask();
18391
3.38k
18392
3.38k
  SmallVector<SDValue, 4> Ops;
18393
3.38k
  EVT ConcatVT = N0.getOperand(0).getValueType();
18394
3.38k
  unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
18395
3.38k
  unsigned NumConcats = NumElts / NumElemsPerConcat;
18396
3.38k
18397
46.6k
  auto IsUndefMaskElt = [](int i) { return i == -1; };
18398
3.38k
18399
3.38k
  // Special case: shuffle(concat(A,B)) can be more efficiently represented
18400
3.38k
  // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
18401
3.38k
  // half vector elements.
18402
3.38k
  if (NumElemsPerConcat * 2 == NumElts && 
N1.isUndef()2.46k
&&
18403
3.38k
      llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
18404
1.62k
                   IsUndefMaskElt)) {
18405
742
    N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
18406
742
                              N0.getOperand(1),
18407
742
                              Mask.slice(0, NumElemsPerConcat));
18408
742
    N1 = DAG.getUNDEF(ConcatVT);
18409
742
    return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
18410
742
  }
18411
2.64k
18412
2.64k
  // Look at every vector that's inserted. We're looking for exact
18413
2.64k
  // subvector-sized copies from a concatenated vector
18414
6.61k
  
for (unsigned I = 0; 2.64k
I != NumConcats;
++I3.96k
) {
18415
5.52k
    unsigned Begin = I * NumElemsPerConcat;
18416
5.52k
    ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
18417
5.52k
18418
5.52k
    // Make sure we're dealing with a copy.
18419
5.52k
    if (llvm::all_of(SubMask, IsUndefMaskElt)) {
18420
1.43k
      Ops.push_back(DAG.getUNDEF(ConcatVT));
18421
1.43k
      continue;
18422
1.43k
    }
18423
4.08k
18424
4.08k
    int OpIdx = -1;
18425
26.5k
    for (int i = 0; i != (int)NumElemsPerConcat; 
++i22.4k
) {
18426
23.9k
      if (IsUndefMaskElt(SubMask[i]))
18427
2.92k
        continue;
18428
21.0k
      if ((SubMask[i] % (int)NumElemsPerConcat) != i)
18429
1.49k
        return SDValue();
18430
19.5k
      int EltOpIdx = SubMask[i] / NumElemsPerConcat;
18431
19.5k
      if (0 <= OpIdx && 
EltOpIdx != OpIdx15.8k
)
18432
63
        return SDValue();
18433
19.4k
      OpIdx = EltOpIdx;
18434
19.4k
    }
18435
4.08k
    assert(0 <= OpIdx && "Unknown concat_vectors op");
18436
2.52k
18437
2.52k
    if (OpIdx < (int)N0.getNumOperands())
18438
1.99k
      Ops.push_back(N0.getOperand(OpIdx));
18439
535
    else
18440
535
      Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
18441
2.52k
  }
18442
2.64k
18443
2.64k
  
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops)1.08k
;
18444
2.64k
}
18445
18446
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
18447
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
18448
//
18449
// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
18450
// a simplification in some sense, but it isn't appropriate in general: some
18451
// BUILD_VECTORs are substantially cheaper than others. The general case
18452
// of a BUILD_VECTOR requires inserting each element individually (or
18453
// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
18454
// all constants is a single constant pool load.  A BUILD_VECTOR where each
18455
// element is identical is a splat.  A BUILD_VECTOR where most of the operands
18456
// are undef lowers to a small number of element insertions.
18457
//
18458
// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
18459
// We don't fold shuffles where one side is a non-zero constant, and we don't
18460
// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
18461
// non-constant operands. This seems to work out reasonably well in practice.
18462
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
18463
                                       SelectionDAG &DAG,
18464
103k
                                       const TargetLowering &TLI) {
18465
103k
  EVT VT = SVN->getValueType(0);
18466
103k
  unsigned NumElts = VT.getVectorNumElements();
18467
103k
  SDValue N0 = SVN->getOperand(0);
18468
103k
  SDValue N1 = SVN->getOperand(1);
18469
103k
18470
103k
  if (!N0->hasOneUse())
18471
33.1k
    return SDValue();
18472
70.7k
18473
70.7k
  // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
18474
70.7k
  // discussed above.
18475
70.7k
  if (!N1.isUndef()) {
18476
28.7k
    if (!N1->hasOneUse())
18477
6.01k
      return SDValue();
18478
22.7k
18479
22.7k
    bool N0AnyConst = isAnyConstantBuildVector(N0);
18480
22.7k
    bool N1AnyConst = isAnyConstantBuildVector(N1);
18481
22.7k
    if (N0AnyConst && 
!N1AnyConst811
&&
!ISD::isBuildVectorAllZeros(N0.getNode())763
)
18482
19
      return SDValue();
18483
22.6k
    if (!N0AnyConst && 
N1AnyConst21.8k
&&
!ISD::isBuildVectorAllZeros(N1.getNode())4.88k
)
18484
46
      return SDValue();
18485
64.6k
  }
18486
64.6k
18487
64.6k
  // If both inputs are splats of the same value then we can safely merge this
18488
64.6k
  // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
18489
64.6k
  bool IsSplat = false;
18490
64.6k
  auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
18491
64.6k
  auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
18492
64.6k
  if (BV0 && 
BV11.64k
)
18493
161
    if (SDValue Splat0 = BV0->getSplatValue())
18494
87
      IsSplat = (Splat0 == BV1->getSplatValue());
18495
64.6k
18496
64.6k
  SmallVector<SDValue, 8> Ops;
18497
64.6k
  SmallSet<SDValue, 16> DuplicateOps;
18498
88.6k
  for (int M : SVN->getMask()) {
18499
88.6k
    SDValue Op = DAG.getUNDEF(VT.getScalarType());
18500
88.6k
    if (M >= 0) {
18501
70.3k
      int Idx = M < (int)NumElts ? 
M65.7k
:
M - NumElts4.68k
;
18502
70.3k
      SDValue &S = (M < (int)NumElts ? 
N065.7k
:
N14.68k
);
18503
70.3k
      if (S.getOpcode() == ISD::BUILD_VECTOR) {
18504
6.25k
        Op = S.getOperand(Idx);
18505
64.1k
      } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18506
598
        SDValue Op0 = S.getOperand(0);
18507
598
        Op = Idx == 0 ? 
Op0592
:
DAG.getUNDEF(Op0.getValueType())6
;
18508
63.5k
      } else {
18509
63.5k
        // Operand can't be combined - bail out.
18510
63.5k
        return SDValue();
18511
63.5k
      }
18512
25.0k
    }
18513
25.0k
18514
25.0k
    // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
18515
25.0k
    // generating a splat; semantically, this is fine, but it's likely to
18516
25.0k
    // generate low-quality code if the target can't reconstruct an appropriate
18517
25.0k
    // shuffle.
18518
25.0k
    if (!Op.isUndef() && 
!isa<ConstantSDNode>(Op)6.82k
&&
!isa<ConstantFPSDNode>(Op)3.97k
)
18519
3.65k
      if (!IsSplat && !DuplicateOps.insert(Op).second)
18520
349
        return SDValue();
18521
24.7k
18522
24.7k
    Ops.push_back(Op);
18523
24.7k
  }
18524
64.6k
18525
64.6k
  // BUILD_VECTOR requires all inputs to be of the same type, find the
18526
64.6k
  // maximum type and extend them all.
18527
64.6k
  EVT SVT = VT.getScalarType();
18528
802
  if (SVT.isInteger())
18529
784
    for (SDValue &Op : Ops)
18530
8.92k
      SVT = (SVT.bitsLT(Op.getValueType()) ? 
Op.getValueType()31
:
SVT8.89k
);
18531
802
  if (SVT != VT.getScalarType())
18532
31
    for (SDValue &Op : Ops)
18533
360
      Op = TLI.isZExtFree(Op.getValueType(), SVT)
18534
360
               ? 
DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)0
18535
360
               : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
18536
802
  return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
18537
64.6k
}
18538
18539
// Match shuffles that can be converted to any_vector_extend_in_reg.
18540
// This is often generated during legalization.
18541
// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
18542
// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
18543
static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
18544
                                            SelectionDAG &DAG,
18545
                                            const TargetLowering &TLI,
18546
116k
                                            bool LegalOperations) {
18547
116k
  EVT VT = SVN->getValueType(0);
18548
116k
  bool IsBigEndian = DAG.getDataLayout().isBigEndian();
18549
116k
18550
116k
  // TODO Add support for big-endian when we have a test case.
18551
116k
  if (!VT.isInteger() || 
IsBigEndian98.6k
)
18552
20.3k
    return SDValue();
18553
96.3k
18554
96.3k
  unsigned NumElts = VT.getVectorNumElements();
18555
96.3k
  unsigned EltSizeInBits = VT.getScalarSizeInBits();
18556
96.3k
  ArrayRef<int> Mask = SVN->getMask();
18557
96.3k
  SDValue N0 = SVN->getOperand(0);
18558
96.3k
18559
96.3k
  // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
18560
215k
  auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
18561
425k
    for (unsigned i = 0; i != NumElts; 
++i210k
) {
18562
422k
      if (Mask[i] < 0)
18563
87.5k
        continue;
18564
334k
      if ((i % Scale) == 0 && 
Mask[i] == (int)(i / Scale)231k
)
18565
123k
        continue;
18566
211k
      return false;
18567
211k
    }
18568
215k
    
return true3.87k
;
18569
215k
  };
18570
96.3k
18571
96.3k
  // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
18572
96.3k
  // power-of-2 extensions as they are the most likely.
18573
311k
  for (unsigned Scale = 2; Scale < NumElts; 
Scale *= 2214k
) {
18574
215k
    // Check for non power of 2 vector sizes
18575
215k
    if (NumElts % Scale != 0)
18576
58
      continue;
18577
215k
    if (!isAnyExtend(Scale))
18578
211k
      continue;
18579
3.87k
18580
3.87k
    EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
18581
3.87k
    EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
18582
3.87k
    // Never create an illegal type. Only create unsupported operations if we
18583
3.87k
    // are pre-legalization.
18584
3.87k
    if (TLI.isTypeLegal(OutVT))
18585
3.86k
      if (!LegalOperations ||
18586
3.86k
          
TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT)3.33k
)
18587
526
        return DAG.getBitcast(VT,
18588
526
                              DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
18589
526
                                          SDLoc(SVN), OutVT, N0));
18590
3.87k
  }
18591
96.3k
18592
96.3k
  
return SDValue()95.8k
;
18593
96.3k
}
18594
18595
// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
18596
// each source element of a large type into the lowest elements of a smaller
18597
// destination type. This is often generated during legalization.
18598
// If the source node itself was a '*_extend_vector_inreg' node then we should
18599
// then be able to remove it.
18600
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
18601
116k
                                        SelectionDAG &DAG) {
18602
116k
  EVT VT = SVN->getValueType(0);
18603
116k
  bool IsBigEndian = DAG.getDataLayout().isBigEndian();
18604
116k
18605
116k
  // TODO Add support for big-endian when we have a test case.
18606
116k
  if (!VT.isInteger() || 
IsBigEndian98.1k
)
18607
20.3k
    return SDValue();
18608
95.8k
18609
95.8k
  SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
18610
95.8k
18611
95.8k
  unsigned Opcode = N0.getOpcode();
18612
95.8k
  if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
18613
95.8k
      
Opcode != ISD::SIGN_EXTEND_VECTOR_INREG95.8k
&&
18614
95.8k
      
Opcode != ISD::ZERO_EXTEND_VECTOR_INREG95.5k
)
18615
95.4k
    return SDValue();
18616
381
18617
381
  SDValue N00 = N0.getOperand(0);
18618
381
  ArrayRef<int> Mask = SVN->getMask();
18619
381
  unsigned NumElts = VT.getVectorNumElements();
18620
381
  unsigned EltSizeInBits = VT.getScalarSizeInBits();
18621
381
  unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
18622
381
  unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
18623
381
18624
381
  if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
18625
0
    return SDValue();
18626
381
  unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
18627
381
18628
381
  // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
18629
381
  // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
18630
381
  // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
18631
381
  auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
18632
75
    for (unsigned i = 0; i != NumElts; 
++i55
) {
18633
68
      if (Mask[i] < 0)
18634
32
        continue;
18635
36
      if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
18636
23
        continue;
18637
13
      return false;
18638
13
    }
18639
20
    
return true7
;
18640
20
  };
18641
381
18642
381
  // At the moment we just handle the case where we've truncated back to the
18643
381
  // same size as before the extension.
18644
381
  // TODO: handle more extension/truncation cases as cases arise.
18645
381
  if (EltSizeInBits != ExtSrcSizeInBits)
18646
361
    return SDValue();
18647
20
18648
20
  // We can remove *extend_vector_inreg only if the truncation happens at
18649
20
  // the same scale as the extension.
18650
20
  if (isTruncate(ExtScale))
18651
7
    return DAG.getBitcast(VT, N00);
18652
13
18653
13
  return SDValue();
18654
13
}
18655
18656
// Combine shuffles of splat-shuffles of the form:
18657
// shuffle (shuffle V, undef, splat-mask), undef, M
18658
// If splat-mask contains undef elements, we need to be careful about
18659
// introducing undef's in the folded mask which are not the result of composing
18660
// the masks of the shuffles.
18661
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
18662
127k
                                        SelectionDAG &DAG) {
18663
127k
  if (!Shuf->getOperand(1).isUndef())
18664
42.7k
    return SDValue();
18665
84.2k
  auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18666
84.2k
  if (!Splat || 
!Splat->isSplat()3.15k
)
18667
84.1k
    return SDValue();
18668
122
18669
122
  ArrayRef<int> ShufMask = Shuf->getMask();
18670
122
  ArrayRef<int> SplatMask = Splat->getMask();
18671
122
  assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
18672
122
18673
122
  // Prefer simplifying to the splat-shuffle, if possible. This is legal if
18674
122
  // every undef mask element in the splat-shuffle has a corresponding undef
18675
122
  // element in the user-shuffle's mask or if the composition of mask elements
18676
122
  // would result in undef.
18677
122
  // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
18678
122
  // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
18679
122
  //   In this case it is not legal to simplify to the splat-shuffle because we
18680
122
  //   may be exposing the users of the shuffle an undef element at index 1
18681
122
  //   which was not there before the combine.
18682
122
  // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
18683
122
  //   In this case the composition of masks yields SplatMask, so it's ok to
18684
122
  //   simplify to the splat-shuffle.
18685
122
  // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
18686
122
  //   In this case the composed mask includes all undef elements of SplatMask
18687
122
  //   and in addition sets element zero to undef. It is safe to simplify to
18688
122
  //   the splat-shuffle.
18689
122
  auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
18690
122
                                       ArrayRef<int> SplatMask) {
18691
899
    for (unsigned i = 0, e = UserMask.size(); i != e; 
++i777
)
18692
841
      if (UserMask[i] != -1 && 
SplatMask[i] == -1401
&&
18693
841
          
SplatMask[UserMask[i]] != -1240
)
18694
64
        return false;
18695
122
    
return true58
;
18696
122
  };
18697
122
  if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
18698
58
    return Shuf->getOperand(0);
18699
64
18700
64
  // Create a new shuffle with a mask that is composed of the two shuffles'
18701
64
  // masks.
18702
64
  SmallVector<int, 32> NewMask;
18703
64
  for (int Idx : ShufMask)
18704
432
    NewMask.push_back(Idx == -1 ? 
-1237
:
SplatMask[Idx]195
);
18705
64
18706
64
  return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
18707
64
                              Splat->getOperand(0), Splat->getOperand(1),
18708
64
                              NewMask);
18709
64
}
18710
18711
/// If the shuffle mask is taking exactly one element from the first vector
18712
/// operand and passing through all other elements from the second vector
18713
/// operand, return the index of the mask element that is choosing an element
18714
/// from the first operand. Otherwise, return -1.
18715
251k
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
18716
251k
  int MaskSize = Mask.size();
18717
251k
  int EltFromOp0 = -1;
18718
251k
  // TODO: This does not match if there are undef elements in the shuffle mask.
18719
251k
  // Should we ignore undefs in the shuffle mask instead? The trade-off is
18720
251k
  // removing an instruction (a shuffle), but losing the knowledge that some
18721
251k
  // vector lanes are not needed.
18722
547k
  for (int i = 0; i != MaskSize; 
++i295k
) {
18723
541k
    if (Mask[i] >= 0 && 
Mask[i] < MaskSize506k
) {
18724
253k
      // We're looking for a shuffle of exactly one element from operand 0.
18725
253k
      if (EltFromOp0 != -1)
18726
109k
        return -1;
18727
144k
      EltFromOp0 = i;
18728
288k
    } else if (Mask[i] != i + MaskSize) {
18729
136k
      // Nothing from operand 1 can change lanes.
18730
136k
      return -1;
18731
136k
    }
18732
541k
  }
18733
251k
  
return EltFromOp05.50k
;
18734
251k
}
18735
18736
/// If a shuffle inserts exactly one element from a source vector operand into
18737
/// another vector operand and we can access the specified element as a scalar,
18738
/// then we can eliminate the shuffle.
18739
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
18740
127k
                                      SelectionDAG &DAG) {
18741
127k
  // First, check if we are taking one element of a vector and shuffling that
18742
127k
  // element into another vector.
18743
127k
  ArrayRef<int> Mask = Shuf->getMask();
18744
127k
  SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
18745
127k
  SDValue Op0 = Shuf->getOperand(0);
18746
127k
  SDValue Op1 = Shuf->getOperand(1);
18747
127k
  int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
18748
127k
  if (ShufOp0Index == -1) {
18749
123k
    // Commute mask and check again.
18750
123k
    ShuffleVectorSDNode::commuteMask(CommutedMask);
18751
123k
    ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
18752
123k
    if (ShufOp0Index == -1)
18753
121k
      return SDValue();
18754
2.03k
    // Commute operands to match the commuted shuffle mask.
18755
2.03k
    std::swap(Op0, Op1);
18756
2.03k
    Mask = CommutedMask;
18757
2.03k
  }
18758
127k
18759
127k
  // The shuffle inserts exactly one element from operand 0 into operand 1.
18760
127k
  // Now see if we can access that element as a scalar via a real insert element
18761
127k
  // instruction.
18762
127k
  // TODO: We can try harder to locate the element as a scalar. Examples: it
18763
127k
  // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
18764
127k
  assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
18765
5.50k
         "Shuffle mask value must be from operand 0");
18766
5.50k
  if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
18767
5.14k
    return SDValue();
18768
357
18769
357
  auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
18770
357
  if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
18771
5
    return SDValue();
18772
352
18773
352
  // There's an existing insertelement with constant insertion index, so we
18774
352
  // don't need to check the legality/profitability of a replacement operation
18775
352
  // that differs at most in the constant value. The target should be able to
18776
352
  // lower any of those in a similar way. If not, legalization will expand this
18777
352
  // to a scalar-to-vector plus shuffle.
18778
352
  //
18779
352
  // Note that the shuffle may move the scalar from the position that the insert
18780
352
  // element used. Therefore, our new insert element occurs at the shuffle's
18781
352
  // mask index value, not the insert's index value.
18782
352
  // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
18783
352
  SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
18784
352
                                        Op0.getOperand(2).getValueType());
18785
352
  return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
18786
352
                     Op1, Op0.getOperand(1), NewInsIndex);
18787
352
}
18788
18789
/// If we have a unary shuffle of a shuffle, see if it can be folded away
18790
/// completely. This has the potential to lose undef knowledge because the first
18791
/// shuffle may not have an undef mask element where the second one does. So
18792
/// only call this after doing simplifications based on demanded elements.
18793
116k
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
18794
116k
  // shuf (shuf0 X, Y, Mask0), undef, Mask
18795
116k
  auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18796
116k
  if (!Shuf0 || 
!Shuf->getOperand(1).isUndef()7.42k
)
18797
114k
    return SDValue();
18798
2.72k
18799
2.72k
  ArrayRef<int> Mask = Shuf->getMask();
18800
2.72k
  ArrayRef<int> Mask0 = Shuf0->getMask();
18801
6.16k
  for (int i = 0, e = (int)Mask.size(); i != e; 
++i3.43k
) {
18802
6.15k
    // Ignore undef elements.
18803
6.15k
    if (Mask[i] == -1)
18804
1.90k
      continue;
18805
4.25k
    assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
18806
4.25k
18807
4.25k
    // Is the element of the shuffle operand chosen by this shuffle the same as
18808
4.25k
    // the element chosen by the shuffle operand itself?
18809
4.25k
    if (Mask0[Mask[i]] != Mask0[i])
18810
2.71k
      return SDValue();
18811
4.25k
  }
18812
2.72k
  // Every element of this shuffle is identical to the result of the previous
18813
2.72k
  // shuffle, so we can replace this value.
18814
2.72k
  
return Shuf->getOperand(0)12
;
18815
2.72k
}
18816
18817
128k
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
18818
128k
  EVT VT = N->getValueType(0);
18819
128k
  unsigned NumElts = VT.getVectorNumElements();
18820
128k
18821
128k
  SDValue N0 = N->getOperand(0);
18822
128k
  SDValue N1 = N->getOperand(1);
18823
128k
18824
128k
  assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
18825
128k
18826
128k
  // Canonicalize shuffle undef, undef -> undef
18827
128k
  if (N0.isUndef() && 
N1.isUndef()243
)
18828
16
    return DAG.getUNDEF(VT);
18829
128k
18830
128k
  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
18831
128k
18832
128k
  // Canonicalize shuffle v, v -> v, undef
18833
128k
  if (N0 == N1) {
18834
35
    SmallVector<int, 8> NewMask;
18835
227
    for (unsigned i = 0; i != NumElts; 
++i192
) {
18836
192
      int Idx = SVN->getMaskElt(i);
18837
192
      if (Idx >= (int)NumElts) 
Idx -= NumElts81
;
18838
192
      NewMask.push_back(Idx);
18839
192
    }
18840
35
    return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
18841
35
  }
18842
128k
18843
128k
  // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
18844
128k
  if (N0.isUndef())
18845
227
    return DAG.getCommutedVectorShuffle(*SVN);
18846
127k
18847
127k
  // Remove references to rhs if it is undef
18848
127k
  if (N1.isUndef()) {
18849
84.7k
    bool Changed = false;
18850
84.7k
    SmallVector<int, 8> NewMask;
18851
1.34M
    for (unsigned i = 0; i != NumElts; 
++i1.25M
) {
18852
1.25M
      int Idx = SVN->getMaskElt(i);
18853
1.25M
      if (Idx >= (int)NumElts) {
18854
1.94k
        Idx = -1;
18855
1.94k
        Changed = true;
18856
1.94k
      }
18857
1.25M
      NewMask.push_back(Idx);
18858
1.25M
    }
18859
84.7k
    if (Changed)
18860
454
      return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
18861
127k
  }
18862
127k
18863
127k
  if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
18864
352
    return InsElt;
18865
127k
18866
127k
  // A shuffle of a single vector that is a splatted value can always be folded.
18867
127k
  if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
18868
122
    return V;
18869
126k
18870
126k
  // If it is a splat, check if the argument vector is another splat or a
18871
126k
  // build_vector.
18872
126k
  if (SVN->isSplat() && 
SVN->getSplatIndex() < (int)NumElts31.4k
) {
18873
31.4k
    int SplatIndex = SVN->getSplatIndex();
18874
31.4k
    if (TLI.isExtractVecEltCheap(VT, SplatIndex) &&
18875
31.4k
        
TLI.isBinOp(N0.getOpcode())2.10k
&&
N0.getNode()->getNumValues() == 148
) {
18876
48
      // splat (vector_bo L, R), Index -->
18877
48
      // splat (scalar_bo (extelt L, Index), (extelt R, Index))
18878
48
      SDValue L = N0.getOperand(0), R = N0.getOperand(1);
18879
48
      SDLoc DL(N);
18880
48
      EVT EltVT = VT.getScalarType();
18881
48
      SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL);
18882
48
      SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
18883
48
      SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
18884
48
      SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
18885
48
                                  N0.getNode()->getFlags());
18886
48
      SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
18887
48
      SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
18888
48
      return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
18889
48
    }
18890
31.4k
18891
31.4k
    // If this is a bit convert that changes the element type of the vector but
18892
31.4k
    // not the number of vector elements, look through it.  Be careful not to
18893
31.4k
    // look though conversions that change things like v4f32 to v2f64.
18894
31.4k
    SDNode *V = N0.getNode();
18895
31.4k
    if (V->getOpcode() == ISD::BITCAST) {
18896
2.23k
      SDValue ConvInput = V->getOperand(0);
18897
2.23k
      if (ConvInput.getValueType().isVector() &&
18898
2.23k
          
ConvInput.getValueType().getVectorNumElements() == NumElts2.17k
)
18899
77
        V = ConvInput.getNode();
18900
2.23k
    }
18901
31.4k
18902
31.4k
    if (V->getOpcode() == ISD::BUILD_VECTOR) {
18903
7.09k
      assert(V->getNumOperands() == NumElts &&
18904
7.09k
             "BUILD_VECTOR has wrong number of operands");
18905
7.09k
      SDValue Base;
18906
7.09k
      bool AllSame = true;
18907
7.13k
      for (unsigned i = 0; i != NumElts; 
++i45
) {
18908
7.13k
        if (!V->getOperand(i).isUndef()) {
18909
7.09k
          Base = V->getOperand(i);
18910
7.09k
          break;
18911
7.09k
        }
18912
7.13k
      }
18913
7.09k
      // Splat of <u, u, u, u>, return <u, u, u, u>
18914
7.09k
      if (!Base.getNode())
18915
0
        return N0;
18916
14.1k
      
for (unsigned i = 0; 7.09k
i != NumElts;
++i7.08k
) {
18917
14.1k
        if (V->getOperand(i) != Base) {
18918
7.08k
          AllSame = false;
18919
7.08k
          break;
18920
7.08k
        }
18921
14.1k
      }
18922
7.09k
      // Splat of <x, x, x, x>, return <x, x, x, x>
18923
7.09k
      if (AllSame)
18924
4
        return N0;
18925
7.08k
18926
7.08k
      // Canonicalize any other splat as a build_vector.
18927
7.08k
      SDValue Splatted = V->getOperand(SplatIndex);
18928
7.08k
      SmallVector<SDValue, 8> Ops(NumElts, Splatted);
18929
7.08k
      SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
18930
7.08k
18931
7.08k
      // We may have jumped through bitcasts, so the type of the
18932
7.08k
      // BUILD_VECTOR may not match the type of the shuffle.
18933
7.08k
      if (V->getValueType(0) != VT)
18934
0
        NewBV = DAG.getBitcast(VT, NewBV);
18935
7.08k
      return NewBV;
18936
7.08k
    }
18937
31.4k
  }
18938
119k
18939
119k
  // Simplify source operands based on shuffle mask.
18940
119k
  if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18941
3.01k
    return SDValue(N, 0);
18942
116k
18943
116k
  // This is intentionally placed after demanded elements simplification because
18944
116k
  // it could eliminate knowledge of undef elements created by this shuffle.
18945
116k
  if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
18946
12
    return ShufOp;
18947
116k
18948
116k
  // Match shuffles that can be converted to any_vector_extend_in_reg.
18949
116k
  if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
18950
526
    return V;
18951
116k
18952
116k
  // Combine "truncate_vector_in_reg" style shuffles.
18953
116k
  if (SDValue V = combineTruncationShuffle(SVN, DAG))
18954
7
    return V;
18955
116k
18956
116k
  if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
18957
116k
      
Level < AfterLegalizeVectorOps3.99k
&&
18958
116k
      
(3.80k
N1.isUndef()3.80k
||
18959
3.80k
      
(1.28k
N1.getOpcode() == ISD::CONCAT_VECTORS1.28k
&&
18960
3.38k
       
N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()864
))) {
18961
3.38k
    if (SDValue V = partitionShuffleOfConcats(N, DAG))
18962
1.82k
      return V;
18963
114k
  }
18964
114k
18965
114k
  // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
18966
114k
  // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
18967
114k
  if (Level < AfterLegalizeDAG && 
TLI.isTypeLegal(VT)111k
)
18968
103k
    if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
18969
802
      return Res;
18970
113k
18971
113k
  // If this shuffle only has a single input that is a bitcasted shuffle,
18972
113k
  // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
18973
113k
  // back to their original types.
18974
113k
  if (N0.getOpcode() == ISD::BITCAST && 
N0.hasOneUse()24.2k
&&
18975
113k
      
N1.isUndef()22.2k
&&
Level < AfterLegalizeVectorOps10.9k
&&
18976
113k
      
TLI.isTypeLegal(VT)4.84k
) {
18977
4.79k
    auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
18978
198
      if (Scale == 1)
18979
99
        return SmallVector<int, 8>(Mask.begin(), Mask.end());
18980
99
18981
99
      SmallVector<int, 8> NewMask;
18982
99
      for (int M : Mask)
18983
1.61k
        
for (int s = 0; 412
s != Scale;
++s1.20k
)
18984
1.20k
          NewMask.push_back(M < 0 ? 
-1124
:
Scale * M + s1.07k
);
18985
99
      return NewMask;
18986
99
    };
18987
4.79k
18988
4.79k
    SDValue BC0 = peekThroughOneUseBitcasts(N0);
18989
4.79k
    if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && 
BC0.hasOneUse()99
) {
18990
99
      EVT SVT = VT.getScalarType();
18991
99
      EVT InnerVT = BC0->getValueType(0);
18992
99
      EVT InnerSVT = InnerVT.getScalarType();
18993
99
18994
99
      // Determine which shuffle works with the smaller scalar type.
18995
99
      EVT ScaleVT = SVT.bitsLT(InnerSVT) ? 
VT70
:
InnerVT29
;
18996
99
      EVT ScaleSVT = ScaleVT.getScalarType();
18997
99
18998
99
      if (TLI.isTypeLegal(ScaleVT) &&
18999
99
          0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
19000
99
          0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
19001
99
        int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
19002
99
        int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
19003
99
19004
99
        // Scale the shuffle masks to the smaller scalar type.
19005
99
        ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
19006
99
        SmallVector<int, 8> InnerMask =
19007
99
            ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
19008
99
        SmallVector<int, 8> OuterMask =
19009
99
            ScaleShuffleMask(SVN->getMask(), OuterScale);
19010
99
19011
99
        // Merge the shuffle masks.
19012
99
        SmallVector<int, 8> NewMask;
19013
99
        for (int M : OuterMask)
19014
1.20k
          NewMask.push_back(M < 0 ? 
-1202
:
InnerMask[M]998
);
19015
99
19016
99
        // Test for shuffle mask legality over both commutations.
19017
99
        SDValue SV0 = BC0->getOperand(0);
19018
99
        SDValue SV1 = BC0->getOperand(1);
19019
99
        bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
19020
99
        if (!LegalMask) {
19021
2
          std::swap(SV0, SV1);
19022
2
          ShuffleVectorSDNode::commuteMask(NewMask);
19023
2
          LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
19024
2
        }
19025
99
19026
99
        if (LegalMask) {
19027
97
          SV0 = DAG.getBitcast(ScaleVT, SV0);
19028
97
          SV1 = DAG.getBitcast(ScaleVT, SV1);
19029
97
          return DAG.getBitcast(
19030
97
              VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
19031
97
        }
19032
113k
      }
19033
99
    }
19034
4.79k
  }
19035
113k
19036
113k
  // Canonicalize shuffles according to rules:
19037
113k
  //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
19038
113k
  //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
19039
113k
  //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
19040
113k
  if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
19041
113k
      
N0.getOpcode() != ISD::VECTOR_SHUFFLE3.37k
&&
Level < AfterLegalizeDAG1.15k
&&
19042
113k
      
TLI.isTypeLegal(VT)1.04k
) {
19043
1.03k
    // The incoming shuffle must be of the same type as the result of the
19044
1.03k
    // current shuffle.
19045
1.03k
    assert(N1->getOperand(0).getValueType() == VT &&
19046
1.03k
           "Shuffle types don't match");
19047
1.03k
19048
1.03k
    SDValue SV0 = N1->getOperand(0);
19049
1.03k
    SDValue SV1 = N1->getOperand(1);
19050
1.03k
    bool HasSameOp0 = N0 == SV0;
19051
1.03k
    bool IsSV1Undef = SV1.isUndef();
19052
1.03k
    if (HasSameOp0 || 
IsSV1Undef975
||
N0 == SV1342
)
19053
696
      // Commute the operands of this shuffle so that next rule
19054
696
      // will trigger.
19055
696
      return DAG.getCommutedVectorShuffle(*SVN);
19056
112k
  }
19057
112k
19058
112k
  // Try to fold according to rules:
19059
112k
  //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
19060
112k
  //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
19061
112k
  //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
19062
112k
  // Don't try to fold shuffles with illegal type.
19063
112k
  // Only fold if this shuffle is the only user of the other shuffle.
19064
112k
  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && 
N->isOnlyUserOf(N0.getNode())7.39k
&&
19065
112k
      
Level < AfterLegalizeDAG5.50k
&&
TLI.isTypeLegal(VT)5.22k
) {
19066
5.14k
    ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
19067
5.14k
19068
5.14k
    // Don't try to fold splats; they're likely to simplify somehow, or they
19069
5.14k
    // might be free.
19070
5.14k
    if (OtherSV->isSplat())
19071
117
      return SDValue();
19072
5.02k
19073
5.02k
    // The incoming shuffle must be of the same type as the result of the
19074
5.02k
    // current shuffle.
19075
5.02k
    assert(OtherSV->getOperand(0).getValueType() == VT &&
19076
5.02k
           "Shuffle types don't match");
19077
5.02k
19078
5.02k
    SDValue SV0, SV1;
19079
5.02k
    SmallVector<int, 4> Mask;
19080
5.02k
    // Compute the combined shuffle mask for a shuffle with SV0 as the first
19081
5.02k
    // operand, and SV1 as the second operand.
19082
51.6k
    for (unsigned i = 0; i != NumElts; 
++i46.6k
) {
19083
47.9k
      int Idx = SVN->getMaskElt(i);
19084
47.9k
      if (Idx < 0) {
19085
12.8k
        // Propagate Undef.
19086
12.8k
        Mask.push_back(Idx);
19087
12.8k
        continue;
19088
12.8k
      }
19089
35.0k
19090
35.0k
      SDValue CurrentVec;
19091
35.0k
      if (Idx < (int)NumElts) {
19092
22.4k
        // This shuffle index refers to the inner shuffle N0. Lookup the inner
19093
22.4k
        // shuffle mask to identify which vector is actually referenced.
19094
22.4k
        Idx = OtherSV->getMaskElt(Idx);
19095
22.4k
        if (Idx < 0) {
19096
772
          // Propagate Undef.
19097
772
          Mask.push_back(Idx);
19098
772
          continue;
19099
772
        }
19100
21.7k
19101
21.7k
        CurrentVec = (Idx < (int) NumElts) ? 
OtherSV->getOperand(0)17.2k
19102
21.7k
                                           : 
OtherSV->getOperand(1)4.48k
;
19103
21.7k
      } else {
19104
12.5k
        // This shuffle index references an element within N1.
19105
12.5k
        CurrentVec = N1;
19106
12.5k
      }
19107
35.0k
19108
35.0k
      // Simple case where 'CurrentVec' is UNDEF.
19109
35.0k
      
if (34.2k
CurrentVec.isUndef()34.2k
) {
19110
0
        Mask.push_back(-1);
19111
0
        continue;
19112
0
      }
19113
34.2k
19114
34.2k
      // Canonicalize the shuffle index. We don't know yet if CurrentVec
19115
34.2k
      // will be the first or second operand of the combined shuffle.
19116
34.2k
      Idx = Idx % NumElts;
19117
34.2k
      if (!SV0.getNode() || 
SV0 == CurrentVec29.2k
) {
19118
18.5k
        // Ok. CurrentVec is the left hand side.
19119
18.5k
        // Update the mask accordingly.
19120
18.5k
        SV0 = CurrentVec;
19121
18.5k
        Mask.push_back(Idx);
19122
18.5k
        continue;
19123
18.5k
      }
19124
15.7k
19125
15.7k
      // Bail out if we cannot convert the shuffle pair into a single shuffle.
19126
15.7k
      if (SV1.getNode() && 
SV1 != CurrentVec12.4k
)
19127
1.26k
        return SDValue();
19128
14.4k
19129
14.4k
      // Ok. CurrentVec is the right hand side.
19130
14.4k
      // Update the mask accordingly.
19131
14.4k
      SV1 = CurrentVec;
19132
14.4k
      Mask.push_back(Idx + NumElts);
19133
14.4k
    }
19134
5.02k
19135
5.02k
    // Check if all indices in Mask are Undef. In case, propagate Undef.
19136
5.02k
    bool isUndefMask = true;
19137
8.74k
    for (unsigned i = 0; i != NumElts && isUndefMask; 
++i4.98k
)
19138
4.98k
      isUndefMask &= Mask[i] < 0;
19139
3.76k
19140
3.76k
    if (isUndefMask)
19141
0
      return DAG.getUNDEF(VT);
19142
3.76k
19143
3.76k
    if (!SV0.getNode())
19144
0
      SV0 = DAG.getUNDEF(VT);
19145
3.76k
    if (!SV1.getNode())
19146
1.78k
      SV1 = DAG.getUNDEF(VT);
19147
3.76k
19148
3.76k
    // Avoid introducing shuffles with illegal mask.
19149
3.76k
    if (!TLI.isShuffleMaskLegal(Mask, VT)) {
19150
4
      ShuffleVectorSDNode::commuteMask(Mask);
19151
4
19152
4
      if (!TLI.isShuffleMaskLegal(Mask, VT))
19153
4
        return SDValue();
19154
0
19155
0
      //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
19156
0
      //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
19157
0
      //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
19158
0
      std::swap(SV0, SV1);
19159
0
    }
19160
3.76k
19161
3.76k
    //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
19162
3.76k
    //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
19163
3.76k
    //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
19164
3.76k
    
return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask)3.76k
;
19165
107k
  }
19166
107k
19167
107k
  if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
19168
146
    return V;
19169
107k
19170
107k
  return SDValue();
19171
107k
}
19172
19173
41.8k
SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
19174
41.8k
  SDValue InVal = N->getOperand(0);
19175
41.8k
  EVT VT = N->getValueType(0);
19176
41.8k
19177
41.8k
  // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
19178
41.8k
  // with a VECTOR_SHUFFLE and possible truncate.
19179
41.8k
  if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
19180
12.1k
    SDValue InVec = InVal->getOperand(0);
19181
12.1k
    SDValue EltNo = InVal->getOperand(1);
19182
12.1k
    auto InVecT = InVec.getValueType();
19183
12.1k
    if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
19184
12.1k
      SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
19185
12.1k
      int Elt = C0->getZExtValue();
19186
12.1k
      NewMask[0] = Elt;
19187
12.1k
      SDValue Val;
19188
12.1k
      // If we have an implict truncate do truncate here as long as it's legal.
19189
12.1k
      // if it's not legal, this should
19190
12.1k
      if (VT.getScalarType() != InVal.getValueType() &&
19191
12.1k
          
InVal.getValueType().isScalarInteger()34
&&
19192
12.1k
          
isTypeLegal(VT.getScalarType())34
) {
19193
0
        Val =
19194
0
            DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
19195
0
        return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
19196
0
      }
19197
12.1k
      if (VT.getScalarType() == InVecT.getScalarType() &&
19198
12.1k
          
VT.getVectorNumElements() <= InVecT.getVectorNumElements()980
&&
19199
12.1k
          
TLI.isShuffleMaskLegal(NewMask, VT)979
) {
19200
963
        Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
19201
963
                                   DAG.getUNDEF(InVecT), NewMask);
19202
963
        // If the initial vector is the correct size this shuffle is a
19203
963
        // valid result.
19204
963
        if (VT == InVecT)
19205
526
          return Val;
19206
437
        // If not we must truncate the vector.
19207
437
        if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
19208
437
          MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
19209
437
          SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
19210
437
          EVT SubVT =
19211
437
              EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
19212
437
                               VT.getVectorNumElements());
19213
437
          Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
19214
437
                            ZeroIdx);
19215
437
          return Val;
19216
437
        }
19217
40.8k
      }
19218
12.1k
    }
19219
12.1k
  }
19220
40.8k
19221
40.8k
  return SDValue();
19222
40.8k
}
19223
19224
54.7k
SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
19225
54.7k
  EVT VT = N->getValueType(0);
19226
54.7k
  SDValue N0 = N->getOperand(0);
19227
54.7k
  SDValue N1 = N->getOperand(1);
19228
54.7k
  SDValue N2 = N->getOperand(2);
19229
54.7k
19230
54.7k
  // If inserting an UNDEF, just return the original vector.
19231
54.7k
  if (N1.isUndef())
19232
34
    return N0;
19233
54.6k
19234
54.6k
  // If this is an insert of an extracted vector into an undef vector, we can
19235
54.6k
  // just use the input to the extract.
19236
54.6k
  if (N0.isUndef() && 
N1.getOpcode() == ISD::EXTRACT_SUBVECTOR44.5k
&&
19237
54.6k
      
N1.getOperand(1) == N27.13k
&&
N1.getOperand(0).getValueType() == VT144
)
19238
64
    return N1.getOperand(0);
19239
54.6k
19240
54.6k
  // If we are inserting a bitcast value into an undef, with the same
19241
54.6k
  // number of elements, just use the bitcast input of the extract.
19242
54.6k
  // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
19243
54.6k
  //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
19244
54.6k
  if (N0.isUndef() && 
N1.getOpcode() == ISD::BITCAST44.4k
&&
19245
54.6k
      
N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR3.45k
&&
19246
54.6k
      
N1.getOperand(0).getOperand(1) == N21.03k
&&
19247
54.6k
      N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
19248
24
          VT.getVectorNumElements() &&
19249
54.6k
      N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
19250
8
          VT.getSizeInBits()) {
19251
8
    return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
19252
8
  }
19253
54.6k
19254
54.6k
  // If both N1 and N2 are bitcast values on which insert_subvector
19255
54.6k
  // would makes sense, pull the bitcast through.
19256
54.6k
  // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
19257
54.6k
  //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
19258
54.6k
  if (N0.getOpcode() == ISD::BITCAST && 
N1.getOpcode() == ISD::BITCAST1.35k
) {
19259
694
    SDValue CN0 = N0.getOperand(0);
19260
694
    SDValue CN1 = N1.getOperand(0);
19261
694
    EVT CN0VT = CN0.getValueType();
19262
694
    EVT CN1VT = CN1.getValueType();
19263
694
    if (CN0VT.isVector() && CN1VT.isVector() &&
19264
694
        CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
19265
694
        
CN0VT.getVectorNumElements() == VT.getVectorNumElements()649
) {
19266
169
      SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
19267
169
                                      CN0.getValueType(), CN0, CN1, N2);
19268
169
      return DAG.getBitcast(VT, NewINSERT);
19269
169
    }
19270
54.4k
  }
19271
54.4k
19272
54.4k
  // Combine INSERT_SUBVECTORs where we are inserting to the same index.
19273
54.4k
  // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
19274
54.4k
  // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
19275
54.4k
  if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
19276
54.4k
      
N0.getOperand(1).getValueType() == N1.getValueType()5.37k
&&
19277
54.4k
      
N0.getOperand(2) == N25.37k
)
19278
68
    return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
19279
68
                       N1, N2);
19280
54.3k
19281
54.3k
  // Eliminate an intermediate insert into an undef vector:
19282
54.3k
  // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
19283
54.3k
  // insert_subvector undef, X, N2
19284
54.3k
  if (N0.isUndef() && 
N1.getOpcode() == ISD::INSERT_SUBVECTOR44.4k
&&
19285
54.3k
      
N1.getOperand(0).isUndef()1.99k
&&
isNullConstant(N1.getOperand(2))1.84k
)
19286
1.84k
    return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
19287
1.84k
                       N1.getOperand(1), N2);
19288
52.5k
19289
52.5k
  if (!isa<ConstantSDNode>(N2))
19290
0
    return SDValue();
19291
52.5k
19292
52.5k
  uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
19293
52.5k
19294
52.5k
  // Push subvector bitcasts to the output, adjusting the index as we go.
19295
52.5k
  // insert_subvector(bitcast(v), bitcast(s), c1)
19296
52.5k
  // -> bitcast(insert_subvector(v, s, c2))
19297
52.5k
  if ((N0.isUndef() || 
N0.getOpcode() == ISD::BITCAST9.91k
) &&
19298
52.5k
      
N1.getOpcode() == ISD::BITCAST43.8k
) {
19299
3.97k
    SDValue N0Src = peekThroughBitcasts(N0);
19300
3.97k
    SDValue N1Src = peekThroughBitcasts(N1);
19301
3.97k
    EVT N0SrcSVT = N0Src.getValueType().getScalarType();
19302
3.97k
    EVT N1SrcSVT = N1Src.getValueType().getScalarType();
19303
3.97k
    if ((N0.isUndef() || 
N0SrcSVT == N1SrcSVT525
) &&
19304
3.97k
        
N0Src.getValueType().isVector()3.93k
&&
N1Src.getValueType().isVector()3.93k
) {
19305
3.85k
      EVT NewVT;
19306
3.85k
      SDLoc DL(N);
19307
3.85k
      SDValue NewIdx;
19308
3.85k
      MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
19309
3.85k
      LLVMContext &Ctx = *DAG.getContext();
19310
3.85k
      unsigned NumElts = VT.getVectorNumElements();
19311
3.85k
      unsigned EltSizeInBits = VT.getScalarSizeInBits();
19312
3.85k
      if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
19313
3.17k
        unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
19314
3.17k
        NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
19315
3.17k
        NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT);
19316
3.17k
      } else 
if (681
(N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0681
) {
19317
681
        unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
19318
681
        if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
19319
681
          NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
19320
681
          NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT);
19321
681
        }
19322
681
      }
19323
3.85k
      if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
19324
3.85k
        SDValue Res = DAG.getBitcast(NewVT, N0Src);
19325
3.85k
        Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
19326
3.85k
        return DAG.getBitcast(VT, Res);
19327
3.85k
      }
19328
48.6k
    }
19329
3.97k
  }
19330
48.6k
19331
48.6k
  // Canonicalize insert_subvector dag nodes.
19332
48.6k
  // Example:
19333
48.6k
  // (insert_subvector (insert_subvector A, Idx0), Idx1)
19334
48.6k
  // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
19335
48.6k
  if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && 
N0.hasOneUse()5.31k
&&
19336
48.6k
      
N1.getValueType() == N0.getOperand(1).getValueType()5.13k
&&
19337
48.6k
      
isa<ConstantSDNode>(N0.getOperand(2))5.13k
) {
19338
5.13k
    unsigned OtherIdx = N0.getConstantOperandVal(2);
19339
5.13k
    if (InsIdx < OtherIdx) {
19340
0
      // Swap nodes.
19341
0
      SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
19342
0
                                  N0.getOperand(0), N1, N2);
19343
0
      AddToWorklist(NewOp.getNode());
19344
0
      return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
19345
0
                         VT, NewOp, N0.getOperand(1), N0.getOperand(2));
19346
0
    }
19347
48.6k
  }
19348
48.6k
19349
48.6k
  // If the input vector is a concatenation, and the insert replaces
19350
48.6k
  // one of the pieces, we can optimize into a single concat_vectors.
19351
48.6k
  if (N0.getOpcode() == ISD::CONCAT_VECTORS && 
N0.hasOneUse()0
&&
19352
48.6k
      
N0.getOperand(0).getValueType() == N1.getValueType()0
) {
19353
0
    unsigned Factor = N1.getValueType().getVectorNumElements();
19354
0
19355
0
    SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
19356
0
    Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
19357
0
19358
0
    return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19359
0
  }
19360
48.6k
19361
48.6k
  // Simplify source operands based on insertion.
19362
48.6k
  if (SimplifyDemandedVectorElts(SDValue(N, 0)))
19363
243
    return SDValue(N, 0);
19364
48.4k
19365
48.4k
  return SDValue();
19366
48.4k
}
19367
19368
5.10k
SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
19369
5.10k
  SDValue N0 = N->getOperand(0);
19370
5.10k
19371
5.10k
  // fold (fp_to_fp16 (fp16_to_fp op)) -> op
19372
5.10k
  if (N0->getOpcode() == ISD::FP16_TO_FP)
19373
1.08k
    return N0->getOperand(0);
19374
4.02k
19375
4.02k
  return SDValue();
19376
4.02k
}
19377
19378
7.31k
SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
19379
7.31k
  SDValue N0 = N->getOperand(0);
19380
7.31k
19381
7.31k
  // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
19382
7.31k
  if (N0->getOpcode() == ISD::AND) {
19383
1.35k
    ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
19384
1.35k
    if (AndConst && AndConst->getAPIntValue() == 0xffff) {
19385
1.30k
      return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
19386
1.30k
                         N0.getOperand(0));
19387
1.30k
    }
19388
6.01k
  }
19389
6.01k
19390
6.01k
  return SDValue();
19391
6.01k
}
19392
19393
1.12k
SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
19394
1.12k
  SDValue N0 = N->getOperand(0);
19395
1.12k
  EVT VT = N0.getValueType();
19396
1.12k
  unsigned Opcode = N->getOpcode();
19397
1.12k
19398
1.12k
  // VECREDUCE over 1-element vector is just an extract.
19399
1.12k
  if (VT.getVectorNumElements() == 1) {
19400
31
    SDLoc dl(N);
19401
31
    SDValue Res = DAG.getNode(
19402
31
        ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
19403
31
        DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
19404
31
    if (Res.getValueType() != N->getValueType(0))
19405
0
      Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
19406
31
    return Res;
19407
31
  }
19408
1.09k
19409
1.09k
  // On an boolean vector an and/or reduction is the same as a umin/umax
19410
1.09k
  // reduction. Convert them if the latter is legal while the former isn't.
19411
1.09k
  if (Opcode == ISD::VECREDUCE_AND || 
Opcode == ISD::VECREDUCE_OR1.07k
) {
19412
34
    unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
19413
34
        ? 
ISD::VECREDUCE_UMIN24
:
ISD::VECREDUCE_UMAX10
;
19414
34
    if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
19415
34
        TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
19416
34
        
DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits()17
)
19417
10
      return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
19418
1.08k
  }
19419
1.08k
19420
1.08k
  return SDValue();
19421
1.08k
}
19422
19423
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
19424
/// with the destination vector and a zero vector.
19425
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
19426
///      vector_shuffle V, Zero, <0, 4, 2, 4>
19427
92.0k
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
19428
92.0k
  assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
19429
92.0k
19430
92.0k
  EVT VT = N->getValueType(0);
19431
92.0k
  SDValue LHS = N->getOperand(0);
19432
92.0k
  SDValue RHS = peekThroughBitcasts(N->getOperand(1));
19433
92.0k
  SDLoc DL(N);
19434
92.0k
19435
92.0k
  // Make sure we're not running after operation legalization where it
19436
92.0k
  // may have custom lowered the vector shuffles.
19437
92.0k
  if (LegalOperations)
19438
63.4k
    return SDValue();
19439
28.5k
19440
28.5k
  if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19441
9.73k
    return SDValue();
19442
18.8k
19443
18.8k
  EVT RVT = RHS.getValueType();
19444
18.8k
  unsigned NumElts = RHS.getNumOperands();
19445
18.8k
19446
18.8k
  // Attempt to create a valid clear mask, splitting the mask into
19447
18.8k
  // sub elements and checking to see if each is
19448
18.8k
  // all zeros or all ones - suitable for shuffle masking.
19449
52.7k
  auto BuildClearMask = [&](int Split) {
19450
52.7k
    int NumSubElts = NumElts * Split;
19451
52.7k
    int NumSubBits = RVT.getScalarSizeInBits() / Split;
19452
52.7k
19453
52.7k
    SmallVector<int, 8> Indices;
19454
213k
    for (int i = 0; i != NumSubElts; 
++i160k
) {
19455
198k
      int EltIdx = i / Split;
19456
198k
      int SubIdx = i % Split;
19457
198k
      SDValue Elt = RHS.getOperand(EltIdx);
19458
198k
      if (Elt.isUndef()) {
19459
298
        Indices.push_back(-1);
19460
298
        continue;
19461
298
      }
19462
197k
19463
197k
      APInt Bits;
19464
197k
      if (isa<ConstantSDNode>(Elt))
19465
196k
        Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
19466
952
      else if (isa<ConstantFPSDNode>(Elt))
19467
386
        Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
19468
566
      else
19469
566
        return SDValue();
19470
197k
19471
197k
      // Extract the sub element from the constant bit mask.
19472
197k
      if (DAG.getDataLayout().isBigEndian()) {
19473
936
        Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
19474
196k
      } else {
19475
196k
        Bits.lshrInPlace(SubIdx * NumSubBits);
19476
196k
      }
19477
197k
19478
197k
      if (Split > 1)
19479
176k
        Bits = Bits.trunc(NumSubBits);
19480
197k
19481
197k
      if (Bits.isAllOnesValue())
19482
53.3k
        Indices.push_back(i);
19483
143k
      else if (Bits == 0)
19484
107k
        Indices.push_back(i + NumSubElts);
19485
36.6k
      else
19486
36.6k
        return SDValue();
19487
197k
    }
19488
52.7k
19489
52.7k
    // Let's see if the target supports this vector_shuffle.
19490
52.7k
    EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
19491
15.4k
    EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
19492
15.4k
    if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
19493
12.9k
      return SDValue();
19494
2.59k
19495
2.59k
    SDValue Zero = DAG.getConstant(0, DL, ClearVT);
19496
2.59k
    return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
19497
2.59k
                                                   DAG.getBitcast(ClearVT, LHS),
19498
2.59k
                                                   Zero, Indices));
19499
2.59k
  };
19500
18.8k
19501
18.8k
  // Determine maximum split level (byte level masking).
19502
18.8k
  int MaxSplit = 1;
19503
18.8k
  if (RVT.getScalarSizeInBits() % 8 == 0)
19504
18.8k
    MaxSplit = RVT.getScalarSizeInBits() / 8;
19505
18.8k
19506
89.8k
  for (int Split = 1; Split <= MaxSplit; 
++Split70.9k
)
19507
73.5k
    if (RVT.getScalarSizeInBits() % Split == 0)
19508
52.7k
      if (SDValue S = BuildClearMask(Split))
19509
2.59k
        return S;
19510
18.8k
19511
18.8k
  
return SDValue()16.2k
;
19512
18.8k
}
19513
19514
/// If a vector binop is performed on splat values, it may be profitable to
19515
/// extract, scalarize, and insert/splat.
19516
571k
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
19517
571k
  SDValue N0 = N->getOperand(0);
19518
571k
  SDValue N1 = N->getOperand(1);
19519
571k
  unsigned Opcode = N->getOpcode();
19520
571k
  EVT VT = N->getValueType(0);
19521
571k
  EVT EltVT = VT.getVectorElementType();
19522
571k
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19523
571k
19524
571k
  // TODO: Remove/replace the extract cost check? If the elements are available
19525
571k
  //       as scalars, then there may be no extract cost. Should we ask if
19526
571k
  //       inserting a scalar back into a vector is cheap instead?
19527
571k
  int Index0, Index1;
19528
571k
  SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
19529
571k
  SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
19530
571k
  if (!Src0 || 
!Src138.8k
||
Index0 != Index13.19k
||
19531
571k
      
Src0.getValueType().getVectorElementType() != EltVT2.82k
||
19532
571k
      
Src1.getValueType().getVectorElementType() != EltVT2.82k
||
19533
571k
      
!TLI.isExtractVecEltCheap(VT, Index0)2.82k
||
19534
571k
      
!TLI.isOperationLegalOrCustom(Opcode, EltVT)60
)
19535
570k
    return SDValue();
19536
60
19537
60
  SDLoc DL(N);
19538
60
  SDValue IndexC =
19539
60
      DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
19540
60
  SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
19541
60
  SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
19542
60
  SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
19543
60
19544
60
  // If all lanes but 1 are undefined, no need to splat the scalar result.
19545
60
  // TODO: Keep track of undefs and use that info in the general case.
19546
60
  if (N0.getOpcode() == ISD::BUILD_VECTOR && 
N0.getOpcode() == N1.getOpcode()54
&&
19547
160
      
count_if(N0->ops(), [](SDValue V) 40
{ return !V.isUndef(); }) == 1 &&
19548
160
      
count_if(N1->ops(), [](SDValue V) 40
{ return !V.isUndef(); }) == 1) {
19549
40
    // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
19550
40
    // build_vec ..undef, (bo X, Y), undef...
19551
40
    SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
19552
40
    Ops[Index0] = ScalarBO;
19553
40
    return DAG.getBuildVector(VT, DL, Ops);
19554
40
  }
19555
20
19556
20
  // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
19557
20
  SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
19558
20
  return DAG.getBuildVector(VT, DL, Ops);
19559
20
}
19560
19561
/// Visit a binary vector operation, like ADD.
19562
571k
SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
19563
571k
  assert(N->getValueType(0).isVector() &&
19564
571k
         "SimplifyVBinOp only works on vectors!");
19565
571k
19566
571k
  SDValue LHS = N->getOperand(0);
19567
571k
  SDValue RHS = N->getOperand(1);
19568
571k
  SDValue Ops[] = {LHS, RHS};
19569
571k
  EVT VT = N->getValueType(0);
19570
571k
  unsigned Opcode = N->getOpcode();
19571
571k
19572
571k
  // See if we can constant fold the vector operation.
19573
571k
  if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
19574
700
          Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
19575
700
    return Fold;
19576
571k
19577
571k
  // Move unary shuffles with identical masks after a vector binop:
19578
571k
  // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
19579
571k
  //   --> shuffle (VBinOp A, B), Undef, Mask
19580
571k
  // This does not require type legality checks because we are creating the
19581
571k
  // same types of operations that are in the original sequence. We do have to
19582
571k
  // restrict ops like integer div that have immediate UB (eg, div-by-zero)
19583
571k
  // though. This code is adapted from the identical transform in instcombine.
19584
571k
  if (Opcode != ISD::UDIV && 
Opcode != ISD::SDIV570k
&&
19585
571k
      
Opcode != ISD::UREM570k
&&
Opcode != ISD::SREM570k
&&
19586
571k
      
Opcode != ISD::UDIVREM570k
&&
Opcode != ISD::SDIVREM570k
) {
19587
570k
    auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
19588
570k
    auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
19589
570k
    if (Shuf0 && 
Shuf18.69k
&&
Shuf0->getMask().equals(Shuf1->getMask())3.44k
&&
19590
570k
        
LHS.getOperand(1).isUndef()693
&&
RHS.getOperand(1).isUndef()203
&&
19591
570k
        
(203
LHS.hasOneUse()203
||
RHS.hasOneUse()104
||
LHS == RHS102
)) {
19592
139
      SDLoc DL(N);
19593
139
      SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
19594
139
                                     RHS.getOperand(0), N->getFlags());
19595
139
      SDValue UndefV = LHS.getOperand(1);
19596
139
      return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
19597
139
    }
19598
571k
  }
19599
571k
19600
571k
  // The following pattern is likely to emerge with vector reduction ops. Moving
19601
571k
  // the binary operation ahead of insertion may allow using a narrower vector
19602
571k
  // instruction that has better performance than the wide version of the op:
19603
571k
  // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
19604
571k
  if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && 
LHS.getOperand(0).isUndef()468
&&
19605
571k
      
RHS.getOpcode() == ISD::INSERT_SUBVECTOR58
&&
RHS.getOperand(0).isUndef()10
&&
19606
571k
      
LHS.getOperand(2) == RHS.getOperand(2)10
&&
19607
571k
      
(10
LHS.hasOneUse()10
||
RHS.hasOneUse()0
)) {
19608
10
    SDValue X = LHS.getOperand(1);
19609
10
    SDValue Y = RHS.getOperand(1);
19610
10
    SDValue Z = LHS.getOperand(2);
19611
10
    EVT NarrowVT = X.getValueType();
19612
10
    if (NarrowVT == Y.getValueType() &&
19613
10
        TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
19614
10
      // (binop undef, undef) may not return undef, so compute that result.
19615
10
      SDLoc DL(N);
19616
10
      SDValue VecC =
19617
10
          DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
19618
10
      SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
19619
10
      return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
19620
10
    }
19621
571k
  }
19622
571k
19623
571k
  if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
19624
60
    return V;
19625
570k
19626
570k
  return SDValue();
19627
570k
}
19628
19629
SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
19630
43.1k
                                    SDValue N2) {
19631
43.1k
  assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
19632
43.1k
19633
43.1k
  SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
19634
43.1k
                                 cast<CondCodeSDNode>(N0.getOperand(2))->get());
19635
43.1k
19636
43.1k
  // If we got a simplified select_cc node back from SimplifySelectCC, then
19637
43.1k
  // break it down into a new SETCC node, and a new SELECT node, and then return
19638
43.1k
  // the SELECT node, since we were called with a SELECT node.
19639
43.1k
  if (SCC.getNode()) {
19640
810
    // Check to see if we got a select_cc back (to turn into setcc/select).
19641
810
    // Otherwise, just return whatever node we got back, like fabs.
19642
810
    if (SCC.getOpcode() == ISD::SELECT_CC) {
19643
0
      const SDNodeFlags Flags = N0.getNode()->getFlags();
19644
0
      SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
19645
0
                                  N0.getValueType(),
19646
0
                                  SCC.getOperand(0), SCC.getOperand(1),
19647
0
                                  SCC.getOperand(4), Flags);
19648
0
      AddToWorklist(SETCC.getNode());
19649
0
      SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
19650
0
                                         SCC.getOperand(2), SCC.getOperand(3));
19651
0
      SelectNode->setFlags(Flags);
19652
0
      return SelectNode;
19653
0
    }
19654
810
19655
810
    return SCC;
19656
810
  }
19657
42.3k
  return SDValue();
19658
42.3k
}
19659
19660
/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
19661
/// being selected between, see if we can simplify the select.  Callers of this
19662
/// should assume that TheSelect is deleted if this returns true.  As such, they
19663
/// should return the appropriate thing (e.g. the node) back to the top-level of
19664
/// the DAG combiner loop to avoid it being looked at.
19665
bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
19666
183k
                                    SDValue RHS) {
19667
183k
  // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
19668
183k
  // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
19669
183k
  if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
19670
7.30k
    if (NaN->isNaN() && 
RHS.getOpcode() == ISD::FSQRT129
) {
19671
15
      // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
19672
15
      SDValue Sqrt = RHS;
19673
15
      ISD::CondCode CC;
19674
15
      SDValue CmpLHS;
19675
15
      const ConstantFPSDNode *Zero = nullptr;
19676
15
19677
15
      if (TheSelect->getOpcode() == ISD::SELECT_CC) {
19678
0
        CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
19679
0
        CmpLHS = TheSelect->getOperand(0);
19680
0
        Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
19681
15
      } else {
19682
15
        // SELECT or VSELECT
19683
15
        SDValue Cmp = TheSelect->getOperand(0);
19684
15
        if (Cmp.getOpcode() == ISD::SETCC) {
19685
15
          CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
19686
15
          CmpLHS = Cmp.getOperand(0);
19687
15
          Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
19688
15
        }
19689
15
      }
19690
15
      if (Zero && Zero->isZero() &&
19691
15
          Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
19692
15
          
CC == ISD::SETULT6
||
CC == ISD::SETLT0
)) {
19693
15
        // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
19694
15
        CombineTo(TheSelect, Sqrt);
19695
15
        return true;
19696
15
      }
19697
183k
    }
19698
7.30k
  }
19699
183k
  // Cannot simplify select with vector condition
19700
183k
  if (TheSelect->getOperand(0).getValueType().isVector()) 
return false48.0k
;
19701
135k
19702
135k
  // If this is a select from two identical things, try to pull the operation
19703
135k
  // through the select.
19704
135k
  if (LHS.getOpcode() != RHS.getOpcode() ||
19705
135k
      
!LHS.hasOneUse()60.0k
||
!RHS.hasOneUse()35.2k
)
19706
104k
    return false;
19707
30.4k
19708
30.4k
  // If this is a load and the token chain is identical, replace the select
19709
30.4k
  // of two loads with a load through a select of the address to load from.
19710
30.4k
  // This triggers in things like "select bool X, 10.0, 123.0" after the FP
19711
30.4k
  // constants have been dropped into the constant pool.
19712
30.4k
  if (LHS.getOpcode() == ISD::LOAD) {
19713
494
    LoadSDNode *LLD = cast<LoadSDNode>(LHS);
19714
494
    LoadSDNode *RLD = cast<LoadSDNode>(RHS);
19715
494
19716
494
    // Token chains must be identical.
19717
494
    if (LHS.getOperand(0) != RHS.getOperand(0) ||
19718
494
        // Do not let this transformation reduce the number of volatile loads.
19719
494
        
LLD->isVolatile()303
||
RLD->isVolatile()303
||
19720
494
        // FIXME: If either is a pre/post inc/dec load,
19721
494
        // we'd need to split out the address adjustment.
19722
494
        
LLD->isIndexed()303
||
RLD->isIndexed()303
||
19723
494
        // If this is an EXTLOAD, the VT's must match.
19724
494
        
LLD->getMemoryVT() != RLD->getMemoryVT()303
||
19725
494
        // If this is an EXTLOAD, the kind of extension must match.
19726
494
        
(301
LLD->getExtensionType() != RLD->getExtensionType()301
&&
19727
301
         // The only exception is if one of the extensions is anyext.
19728
301
         
LLD->getExtensionType() != ISD::EXTLOAD2
&&
19729
301
         
RLD->getExtensionType() != ISD::EXTLOAD2
) ||
19730
494
        // FIXME: this discards src value information.  This is
19731
494
        // over-conservative. It would be beneficial to be able to remember
19732
494
        // both potential memory locations.  Since we are discarding
19733
494
        // src value info, don't do the transformation if the memory
19734
494
        // locations are not in the default address space.
19735
494
        
LLD->getPointerInfo().getAddrSpace() != 0299
||
19736
494
        
RLD->getPointerInfo().getAddrSpace() != 0144
||
19737
494
        // We can't produce a CMOV of a TargetFrameIndex since we won't
19738
494
        // generate the address generation required.
19739
494
        
LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex144
||
19740
494
        
RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex142
||
19741
494
        !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
19742
142
                                      LLD->getBasePtr().getValueType()))
19743
352
      return false;
19744
142
19745
142
    // The loads must not depend on one another.
19746
142
    if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
19747
0
      return false;
19748
142
19749
142
    // Check that the select condition doesn't reach either load.  If so,
19750
142
    // folding this will induce a cycle into the DAG.  If not, this is safe to
19751
142
    // xform, so create a select of the addresses.
19752
142
19753
142
    SmallPtrSet<const SDNode *, 32> Visited;
19754
142
    SmallVector<const SDNode *, 16> Worklist;
19755
142
19756
142
    // Always fail if LLD and RLD are not independent. TheSelect is a
19757
142
    // predecessor to all Nodes in question so we need not search past it.
19758
142
19759
142
    Visited.insert(TheSelect);
19760
142
    Worklist.push_back(LLD);
19761
142
    Worklist.push_back(RLD);
19762
142
19763
142
    if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
19764
142
        SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
19765
0
      return false;
19766
142
19767
142
    SDValue Addr;
19768
142
    if (TheSelect->getOpcode() == ISD::SELECT) {
19769
142
      // We cannot do this optimization if any pair of {RLD, LLD} is a
19770
142
      // predecessor to {RLD, LLD, CondNode}. As we've already compared the
19771
142
      // Loads, we only need to check if CondNode is a successor to one of the
19772
142
      // loads. We can further avoid this if there's no use of their chain
19773
142
      // value.
19774
142
      SDNode *CondNode = TheSelect->getOperand(0).getNode();
19775
142
      Worklist.push_back(CondNode);
19776
142
19777
142
      if ((LLD->hasAnyUseOfValue(1) &&
19778
142
           
SDNode::hasPredecessorHelper(LLD, Visited, Worklist)32
) ||
19779
142
          
(140
RLD->hasAnyUseOfValue(1)140
&&
19780
140
           
SDNode::hasPredecessorHelper(RLD, Visited, Worklist)30
))
19781
2
        return false;
19782
140
19783
140
      Addr = DAG.getSelect(SDLoc(TheSelect),
19784
140
                           LLD->getBasePtr().getValueType(),
19785
140
                           TheSelect->getOperand(0), LLD->getBasePtr(),
19786
140
                           RLD->getBasePtr());
19787
140
    } else {  // Otherwise SELECT_CC
19788
0
      // We cannot do this optimization if any pair of {RLD, LLD} is a
19789
0
      // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
19790
0
      // the Loads, we only need to check if CondLHS/CondRHS is a successor to
19791
0
      // one of the loads. We can further avoid this if there's no use of their
19792
0
      // chain value.
19793
0
19794
0
      SDNode *CondLHS = TheSelect->getOperand(0).getNode();
19795
0
      SDNode *CondRHS = TheSelect->getOperand(1).getNode();
19796
0
      Worklist.push_back(CondLHS);
19797
0
      Worklist.push_back(CondRHS);
19798
0
19799
0
      if ((LLD->hasAnyUseOfValue(1) &&
19800
0
           SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
19801
0
          (RLD->hasAnyUseOfValue(1) &&
19802
0
           SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
19803
0
        return false;
19804
0
19805
0
      Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
19806
0
                         LLD->getBasePtr().getValueType(),
19807
0
                         TheSelect->getOperand(0),
19808
0
                         TheSelect->getOperand(1),
19809
0
                         LLD->getBasePtr(), RLD->getBasePtr(),
19810
0
                         TheSelect->getOperand(4));
19811
0
    }
19812
142
19813
142
    SDValue Load;
19814
140
    // It is safe to replace the two loads if they have different alignments,
19815
140
    // but the new load must be the minimum (most restrictive) alignment of the
19816
140
    // inputs.
19817
140
    unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
19818
140
    MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
19819
140
    if (!RLD->isInvariant())
19820
140
      MMOFlags &= ~MachineMemOperand::MOInvariant;
19821
140
    if (!RLD->isDereferenceable())
19822
108
      MMOFlags &= ~MachineMemOperand::MODereferenceable;
19823
140
    if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
19824
139
      // FIXME: Discards pointer and AA info.
19825
139
      Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
19826
139
                         LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
19827
139
                         MMOFlags);
19828
139
    } else {
19829
1
      // FIXME: Discards pointer and AA info.
19830
1
      Load = DAG.getExtLoad(
19831
1
          LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
19832
1
                                                  : 
LLD->getExtensionType()0
,
19833
1
          SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
19834
1
          MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
19835
1
    }
19836
140
19837
140
    // Users of the select now use the result of the load.
19838
140
    CombineTo(TheSelect, Load);
19839
140
19840
140
    // Users of the old loads now use the new load's chain.  We know the
19841
140
    // old-load value is dead now.
19842
140
    CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
19843
140
    CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
19844
140
    return true;
19845
29.9k
  }
19846
29.9k
19847
29.9k
  return false;
19848
29.9k
}
19849
19850
/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
19851
/// bitwise 'and'.
19852
SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
19853
                                            SDValue N1, SDValue N2, SDValue N3,
19854
121k
                                            ISD::CondCode CC) {
19855
121k
  // If this is a select where the false operand is zero and the compare is a
19856
121k
  // check of the sign bit, see if we can perform the "gzip trick":
19857
121k
  // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
19858
121k
  // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
19859
121k
  EVT XType = N0.getValueType();
19860
121k
  EVT AType = N2.getValueType();
19861
121k
  if (!isNullConstant(N3) || 
!XType.bitsGE(AType)37.7k
)
19862
94.7k
    return SDValue();
19863
26.8k
19864
26.8k
  // If the comparison is testing for a positive value, we have to invert
19865
26.8k
  // the sign bit mask, so only do that transform if the target has a bitwise
19866
26.8k
  // 'and not' instruction (the invert is free).
19867
26.8k
  if (CC == ISD::SETGT && 
TLI.hasAndNot(N2)1.84k
) {
19868
696
    // (X > -1) ? A : 0
19869
696
    // (X >  0) ? X : 0 <-- This is canonical signed max.
19870
696
    if (!(isAllOnesConstant(N1) || 
(674
isNullConstant(N1)674
&&
N0 == N2476
)))
19871
298
      return SDValue();
19872
26.1k
  } else if (CC == ISD::SETLT) {
19873
775
    // (X <  0) ? A : 0
19874
775
    // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
19875
775
    if (!(isNullConstant(N1) || 
(462
isOneConstant(N1)462
&&
N0 == N271
)))
19876
458
      return SDValue();
19877
25.3k
  } else {
19878
25.3k
    return SDValue();
19879
25.3k
  }
19880
715
19881
715
  // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
19882
715
  // constant.
19883
715
  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
19884
715
  auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
19885
715
  if (N2C && 
((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)271
) {
19886
152
    unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
19887
152
    SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
19888
152
    SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
19889
152
    AddToWorklist(Shift.getNode());
19890
152
19891
152
    if (XType.bitsGT(AType)) {
19892
65
      Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
19893
65
      AddToWorklist(Shift.getNode());
19894
65
    }
19895
152
19896
152
    if (CC == ISD::SETGT)
19897
7
      Shift = DAG.getNOT(DL, Shift, AType);
19898
152
19899
152
    return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
19900
152
  }
19901
563
19902
563
  SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
19903
563
  SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
19904
563
  AddToWorklist(Shift.getNode());
19905
563
19906
563
  if (XType.bitsGT(AType)) {
19907
65
    Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
19908
65
    AddToWorklist(Shift.getNode());
19909
65
  }
19910
563
19911
563
  if (CC == ISD::SETGT)
19912
391
    Shift = DAG.getNOT(DL, Shift, AType);
19913
563
19914
563
  return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
19915
563
}
19916
19917
/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
19918
/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
19919
/// in it. This may be a win when the constant is not otherwise available
19920
/// because it replaces two constant pool loads with one.
19921
SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
19922
    const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
19923
123k
    ISD::CondCode CC) {
19924
123k
  if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
19925
991
    return SDValue();
19926
122k
19927
122k
  // If we are before legalize types, we want the other legalization to happen
19928
122k
  // first (for example, to avoid messing with soft float).
19929
122k
  auto *TV = dyn_cast<ConstantFPSDNode>(N2);
19930
122k
  auto *FV = dyn_cast<ConstantFPSDNode>(N3);
19931
122k
  EVT VT = N2.getValueType();
19932
122k
  if (!TV || 
!FV4.36k
||
!TLI.isTypeLegal(VT)3.41k
)
19933
119k
    return SDValue();
19934
3.25k
19935
3.25k
  // If a constant can be materialized without loads, this does not make sense.
19936
3.25k
  if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
19937
3.25k
      
TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize)2.95k
||
19938
3.25k
      
TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize)2.13k
)
19939
1.29k
    return SDValue();
19940
1.95k
19941
1.95k
  // If both constants have multiple uses, then we won't need to do an extra
19942
1.95k
  // load. The values are likely around in registers for other users.
19943
1.95k
  if (!TV->hasOneUse() && 
!FV->hasOneUse()101
)
19944
11
    return SDValue();
19945
1.94k
19946
1.94k
  Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
19947
1.94k
                       const_cast<ConstantFP*>(TV->getConstantFPValue()) };
19948
1.94k
  Type *FPTy = Elts[0]->getType();
19949
1.94k
  const DataLayout &TD = DAG.getDataLayout();
19950
1.94k
19951
1.94k
  // Create a ConstantArray of the two constants.
19952
1.94k
  Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
19953
1.94k
  SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
19954
1.94k
                                      TD.getPrefTypeAlignment(FPTy));
19955
1.94k
  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
19956
1.94k
19957
1.94k
  // Get offsets to the 0 and 1 elements of the array, so we can select between
19958
1.94k
  // them.
19959
1.94k
  SDValue Zero = DAG.getIntPtrConstant(0, DL);
19960
1.94k
  unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
19961
1.94k
  SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
19962
1.94k
  SDValue Cond =
19963
1.94k
      DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
19964
1.94k
  AddToWorklist(Cond.getNode());
19965
1.94k
  SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
19966
1.94k
  AddToWorklist(CstOffset.getNode());
19967
1.94k
  CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
19968
1.94k
  AddToWorklist(CPIdx.getNode());
19969
1.94k
  return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
19970
1.94k
                     MachinePointerInfo::getConstantPool(
19971
1.94k
                         DAG.getMachineFunction()), Alignment);
19972
1.94k
}
19973
19974
/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
19975
/// where 'cond' is the comparison specified by CC.
19976
SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
19977
                                      SDValue N2, SDValue N3, ISD::CondCode CC,
19978
123k
                                      bool NotExtCompare) {
19979
123k
  // (x ? y : y) -> y.
19980
123k
  if (N2 == N3) 
return N20
;
19981
123k
19982
123k
  EVT CmpOpVT = N0.getValueType();
19983
123k
  EVT CmpResVT = getSetCCResultType(CmpOpVT);
19984
123k
  EVT VT = N2.getValueType();
19985
123k
  auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
19986
123k
  auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
19987
123k
  auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
19988
123k
19989
123k
  // Determine if the condition we're dealing with is constant.
19990
123k
  if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
19991
8
    AddToWorklist(SCC.getNode());
19992
8
    if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
19993
8
      // fold select_cc true, x, y -> x
19994
8
      // fold select_cc false, x, y -> y
19995
8
      return !(SCCC->isNullValue()) ? 
N22
:
N36
;
19996
8
    }
19997
123k
  }
19998
123k
19999
123k
  if (SDValue V =
20000
1.94k
          convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
20001
1.94k
    return V;
20002
121k
20003
121k
  if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
20004
715
    return V;
20005
120k
20006
120k
  // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
20007
120k
  // where y is has a single bit set.
20008
120k
  // A plaintext description would be, we can turn the SELECT_CC into an AND
20009
120k
  // when the condition can be materialized as an all-ones register.  Any
20010
120k
  // single bit-test can be materialized as an all-ones register with
20011
120k
  // shift-left and shift-right-arith.
20012
120k
  if (CC == ISD::SETEQ && 
N0->getOpcode() == ISD::AND46.3k
&&
20013
120k
      
N0->getValueType(0) == VT11.0k
&&
isNullConstant(N1)1.62k
&&
isNullConstant(N2)1.30k
) {
20014
82
    SDValue AndLHS = N0->getOperand(0);
20015
82
    auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
20016
82
    if (ConstAndRHS && 
ConstAndRHS->getAPIntValue().countPopulation() == 115
) {
20017
5
      // Shift the tested bit over the sign bit.
20018
5
      const APInt &AndMask = ConstAndRHS->getAPIntValue();
20019
5
      SDValue ShlAmt =
20020
5
        DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
20021
5
                        getShiftAmountTy(AndLHS.getValueType()));
20022
5
      SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
20023
5
20024
5
      // Now arithmetic right shift it all the way over, so the result is either
20025
5
      // all-ones, or zero.
20026
5
      SDValue ShrAmt =
20027
5
        DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
20028
5
                        getShiftAmountTy(Shl.getValueType()));
20029
5
      SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
20030
5
20031
5
      return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
20032
5
    }
20033
120k
  }
20034
120k
20035
120k
  // fold select C, 16, 0 -> shl C, 4
20036
120k
  bool Fold = N2C && 
isNullConstant(N3)48.8k
&&
N2C->getAPIntValue().isPowerOf2()33.8k
;
20037
120k
  bool Swap = N3C && 
isNullConstant(N2)57.6k
&&
N3C->getAPIntValue().isPowerOf2()821
;
20038
120k
20039
120k
  if ((Fold || 
Swap93.5k
) &&
20040
120k
      TLI.getBooleanContents(CmpOpVT) ==
20041
27.9k
          TargetLowering::ZeroOrOneBooleanContent &&
20042
120k
      
(25.9k
!LegalOperations25.9k
||
TLI.isOperationLegal(ISD::SETCC, CmpOpVT)1.07k
)) {
20043
25.8k
20044
25.8k
    if (Swap) {
20045
484
      CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
20046
484
      std::swap(N2C, N3C);
20047
484
    }
20048
25.8k
20049
25.8k
    // If the caller doesn't want us to simplify this into a zext of a compare,
20050
25.8k
    // don't do it.
20051
25.8k
    if (NotExtCompare && 
N2C->isOne()23.2k
)
20052
23.2k
      return SDValue();
20053
2.57k
20054
2.57k
    SDValue Temp, SCC;
20055
2.57k
    // zext (setcc n0, n1)
20056
2.57k
    if (LegalTypes) {
20057
389
      SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
20058
389
      if (VT.bitsLT(SCC.getValueType()))
20059
0
        Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
20060
389
      else
20061
389
        Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
20062
2.18k
    } else {
20063
2.18k
      SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
20064
2.18k
      Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
20065
2.18k
    }
20066
2.57k
20067
2.57k
    AddToWorklist(SCC.getNode());
20068
2.57k
    AddToWorklist(Temp.getNode());
20069
2.57k
20070
2.57k
    if (N2C->isOne())
20071
18
      return Temp;
20072
2.55k
20073
2.55k
    // shl setcc result by log2 n2c
20074
2.55k
    return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
20075
2.55k
                       DAG.getConstant(N2C->getAPIntValue().logBase2(),
20076
2.55k
                                       SDLoc(Temp),
20077
2.55k
                                       getShiftAmountTy(Temp.getValueType())));
20078
2.55k
  }
20079
95.0k
20080
95.0k
  // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
20081
95.0k
  // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
20082
95.0k
  // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
20083
95.0k
  // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
20084
95.0k
  // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
20085
95.0k
  // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
20086
95.0k
  // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
20087
95.0k
  // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
20088
95.0k
  if (N1C && 
N1C->isNullValue()62.3k
&&
(32.7k
CC == ISD::SETEQ32.7k
||
CC == ISD::SETNE10.9k
)) {
20089
27.9k
    SDValue ValueOnZero = N2;
20090
27.9k
    SDValue Count = N3;
20091
27.9k
    // If the condition is NE instead of E, swap the operands.
20092
27.9k
    if (CC == ISD::SETNE)
20093
6.18k
      std::swap(ValueOnZero, Count);
20094
27.9k
    // Check if the value on zero is a constant equal to the bits in the type.
20095
27.9k
    if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
20096
8.48k
      if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
20097
168
        // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
20098
168
        // legal, combine to just cttz.
20099
168
        if ((Count.getOpcode() == ISD::CTTZ ||
20100
168
             Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
20101
168
            
N0 == Count.getOperand(0)24
&&
20102
168
            
(22
!LegalOperations22
||
TLI.isOperationLegal(ISD::CTTZ, VT)0
))
20103
22
          return DAG.getNode(ISD::CTTZ, DL, VT, N0);
20104
146
        // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
20105
146
        // legal, combine to just ctlz.
20106
146
        if ((Count.getOpcode() == ISD::CTLZ ||
20107
146
             Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
20108
146
            
N0 == Count.getOperand(0)15
&&
20109
146
            
(15
!LegalOperations15
||
TLI.isOperationLegal(ISD::CTLZ, VT)0
))
20110
15
          return DAG.getNode(ISD::CTLZ, DL, VT, N0);
20111
95.0k
      }
20112
8.48k
    }
20113
27.9k
  }
20114
95.0k
20115
95.0k
  return SDValue();
20116
95.0k
}
20117
20118
/// This is a stub for TargetLowering::SimplifySetCC.
20119
SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
20120
                                   ISD::CondCode Cond, const SDLoc &DL,
20121
1.22M
                                   bool foldBooleans) {
20122
1.22M
  TargetLowering::DAGCombinerInfo
20123
1.22M
    DagCombineInfo(DAG, Level, false, this);
20124
1.22M
  return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
20125
1.22M
}
20126
20127
/// Given an ISD::SDIV node expressing a divide by constant, return
20128
/// a DAG expression to select that will generate the same value by multiplying
20129
/// by a magic number.
20130
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
20131
1.91k
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
20132
1.91k
  // when optimising for minimum size, we don't want to expand a div to a mul
20133
1.91k
  // and a shift.
20134
1.91k
  if (DAG.getMachineFunction().getFunction().hasMinSize())
20135
43
    return SDValue();
20136
1.86k
20137
1.86k
  SmallVector<SDNode *, 8> Built;
20138
1.86k
  if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
20139
1.69k
    for (SDNode *N : Built)
20140
6.08k
      AddToWorklist(N);
20141
1.69k
    return S;
20142
1.69k
  }
20143
176
20144
176
  return SDValue();
20145
176
}
20146
20147
/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
20148
/// DAG expression that will generate the same value by right shifting.
20149
946
SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
20150
946
  ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
20151
946
  if (!C)
20152
112
    return SDValue();
20153
834
20154
834
  // Avoid division by zero.
20155
834
  if (C->isNullValue())
20156
0
    return SDValue();
20157
834
20158
834
  SmallVector<SDNode *, 8> Built;
20159
834
  if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
20160
455
    for (SDNode *N : Built)
20161
1.12k
      AddToWorklist(N);
20162
455
    return S;
20163
455
  }
20164
379
20165
379
  return SDValue();
20166
379
}
20167
20168
/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
20169
/// expression that will generate the same value by multiplying by a magic
20170
/// number.
20171
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
20172
1.83k
SDValue DAGCombiner::BuildUDIV(SDNode *N) {
20173
1.83k
  // when optimising for minimum size, we don't want to expand a div to a mul
20174
1.83k
  // and a shift.
20175
1.83k
  if (DAG.getMachineFunction().getFunction().hasMinSize())
20176
32
    return SDValue();
20177
1.80k
20178
1.80k
  SmallVector<SDNode *, 8> Built;
20179
1.80k
  if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
20180
1.59k
    for (SDNode *N : Built)
20181
5.61k
      AddToWorklist(N);
20182
1.59k
    return S;
20183
1.59k
  }
20184
213
20185
213
  return SDValue();
20186
213
}
20187
20188
/// Determines the LogBase2 value for a non-null input value using the
20189
/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
20190
6.15k
SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
20191
6.15k
  EVT VT = V.getValueType();
20192
6.15k
  unsigned EltBits = VT.getScalarSizeInBits();
20193
6.15k
  SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
20194
6.15k
  SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
20195
6.15k
  SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
20196
6.15k
  return LogBase2;
20197
6.15k
}
20198
20199
/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20200
/// For the reciprocal, we need to find the zero of the function:
20201
///   F(X) = A X - 1 [which has a zero at X = 1/A]
20202
///     =>
20203
///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
20204
///     does not require additional intermediate precision]
20205
1.25k
SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
20206
1.25k
  if (Level >= AfterLegalizeDAG)
20207
209
    return SDValue();
20208
1.05k
20209
1.05k
  // TODO: Handle half and/or extended types?
20210
1.05k
  EVT VT = Op.getValueType();
20211
1.05k
  if (VT.getScalarType() != MVT::f32 && 
VT.getScalarType() != MVT::f64107
)
20212
23
    return SDValue();
20213
1.02k
20214
1.02k
  // If estimates are explicitly disabled for this function, we're done.
20215
1.02k
  MachineFunction &MF = DAG.getMachineFunction();
20216
1.02k
  int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
20217
1.02k
  if (Enabled == TLI.ReciprocalEstimate::Disabled)
20218
80
    return SDValue();
20219
947
20220
947
  // Estimates may be explicitly enabled for this type with a custom number of
20221
947
  // refinement steps.
20222
947
  int Iterations = TLI.getDivRefinementSteps(VT, MF);
20223
947
  if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
20224
660
    AddToWorklist(Est.getNode());
20225
660
20226
660
    if (Iterations) {
20227
279
      SDLoc DL(Op);
20228
279
      SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
20229
279
20230
279
      // Newton iterations: Est = Est + Est (1 - Arg * Est)
20231
669
      for (int i = 0; i < Iterations; 
++i390
) {
20232
390
        SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
20233
390
        AddToWorklist(NewEst.getNode());
20234
390
20235
390
        NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
20236
390
        AddToWorklist(NewEst.getNode());
20237
390
20238
390
        NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
20239
390
        AddToWorklist(NewEst.getNode());
20240
390
20241
390
        Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
20242
390
        AddToWorklist(Est.getNode());
20243
390
      }
20244
279
    }
20245
660
    return Est;
20246
660
  }
20247
287
20248
287
  return SDValue();
20249
287
}
20250
20251
/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20252
/// For the reciprocal sqrt, we need to find the zero of the function:
20253
///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
20254
///     =>
20255
///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
20256
/// As a result, we precompute A/2 prior to the iteration loop.
20257
SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
20258
                                         unsigned Iterations,
20259
6
                                         SDNodeFlags Flags, bool Reciprocal) {
20260
6
  EVT VT = Arg.getValueType();
20261
6
  SDLoc DL(Arg);
20262
6
  SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
20263
6
20264
6
  // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
20265
6
  // this entire sequence requires only one FP constant.
20266
6
  SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
20267
6
  AddToWorklist(HalfArg.getNode());
20268
6
20269
6
  HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
20270
6
  AddToWorklist(HalfArg.getNode());
20271
6
20272
6
  // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
20273
15
  for (unsigned i = 0; i < Iterations; 
++i9
) {
20274
9
    SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
20275
9
    AddToWorklist(NewEst.getNode());
20276
9
20277
9
    NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
20278
9
    AddToWorklist(NewEst.getNode());
20279
9
20280
9
    NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
20281
9
    AddToWorklist(NewEst.getNode());
20282
9
20283
9
    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
20284
9
    AddToWorklist(Est.getNode());
20285
9
  }
20286
6
20287
6
  // If non-reciprocal square root is requested, multiply the result by Arg.
20288
6
  if (!Reciprocal) {
20289
2
    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
20290
2
    AddToWorklist(Est.getNode());
20291
2
  }
20292
6
20293
6
  return Est;
20294
6
}
20295
20296
/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20297
/// For the reciprocal sqrt, we need to find the zero of the function:
20298
///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
20299
///     =>
20300
///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
20301
SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
20302
                                         unsigned Iterations,
20303
69
                                         SDNodeFlags Flags, bool Reciprocal) {
20304
69
  EVT VT = Arg.getValueType();
20305
69
  SDLoc DL(Arg);
20306
69
  SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
20307
69
  SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
20308
69
20309
69
  // This routine must enter the loop below to work correctly
20310
69
  // when (Reciprocal == false).
20311
69
  assert(Iterations > 0);
20312
69
20313
69
  // Newton iterations for reciprocal square root:
20314
69
  // E = (E * -0.5) * ((A * E) * E + -3.0)
20315
147
  for (unsigned i = 0; i < Iterations; 
++i78
) {
20316
78
    SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
20317
78
    AddToWorklist(AE.getNode());
20318
78
20319
78
    SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
20320
78
    AddToWorklist(AEE.getNode());
20321
78
20322
78
    SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
20323
78
    AddToWorklist(RHS.getNode());
20324
78
20325
78
    // When calculating a square root at the last iteration build:
20326
78
    // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
20327
78
    // (notice a common subexpression)
20328
78
    SDValue LHS;
20329
78
    if (Reciprocal || 
(i + 1) < Iterations37
) {
20330
44
      // RSQRT: LHS = (E * -0.5)
20331
44
      LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
20332
44
    } else {
20333
34
      // SQRT: LHS = (A * E) * -0.5
20334
34
      LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
20335
34
    }
20336
78
    AddToWorklist(LHS.getNode());
20337
78
20338
78
    Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
20339
78
    AddToWorklist(Est.getNode());
20340
78
  }
20341
69
20342
69
  return Est;
20343
69
}
20344
20345
/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
20346
/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
20347
/// Op can be zero.
20348
SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
20349
426
                                           bool Reciprocal) {
20350
426
  if (Level >= AfterLegalizeDAG)
20351
135
    return SDValue();
20352
291
20353
291
  // TODO: Handle half and/or extended types?
20354
291
  EVT VT = Op.getValueType();
20355
291
  if (VT.getScalarType() != MVT::f32 && 
VT.getScalarType() != MVT::f64101
)
20356
24
    return SDValue();
20357
267
20358
267
  // If estimates are explicitly disabled for this function, we're done.
20359
267
  MachineFunction &MF = DAG.getMachineFunction();
20360
267
  int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
20361
267
  if (Enabled == TLI.ReciprocalEstimate::Disabled)
20362
43
    return SDValue();
20363
224
20364
224
  // Estimates may be explicitly enabled for this type with a custom number of
20365
224
  // refinement steps.
20366
224
  int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
20367
224
20368
224
  bool UseOneConstNR = false;
20369
224
  if (SDValue Est =
20370
115
      TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
20371
115
                          Reciprocal)) {
20372
115
    AddToWorklist(Est.getNode());
20373
115
20374
115
    if (Iterations) {
20375
75
      Est = UseOneConstNR
20376
75
            ? 
buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)6
20377
75
            : 
buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal)69
;
20378
75
20379
75
      if (!Reciprocal) {
20380
36
        // The estimate is now completely wrong if the input was exactly 0.0 or
20381
36
        // possibly a denormal. Force the answer to 0.0 for those cases.
20382
36
        SDLoc DL(Op);
20383
36
        EVT CCVT = getSetCCResultType(VT);
20384
36
        ISD::NodeType SelOpcode = VT.isVector() ? 
ISD::VSELECT18
:
ISD::SELECT18
;
20385
36
        const Function &F = DAG.getMachineFunction().getFunction();
20386
36
        Attribute Denorms = F.getFnAttribute("denormal-fp-math");
20387
36
        if (Denorms.getValueAsString().equals("ieee")) {
20388
6
          // fabs(X) < SmallestNormal ? 0.0 : Est
20389
6
          const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
20390
6
          APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
20391
6
          SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
20392
6
          SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
20393
6
          SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
20394
6
          SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
20395
6
          Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
20396
6
          AddToWorklist(Fabs.getNode());
20397
6
          AddToWorklist(IsDenorm.getNode());
20398
6
          AddToWorklist(Est.getNode());
20399
30
        } else {
20400
30
          // X == 0.0 ? 0.0 : Est
20401
30
          SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
20402
30
          SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
20403
30
          Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
20404
30
          AddToWorklist(IsZero.getNode());
20405
30
          AddToWorklist(Est.getNode());
20406
30
        }
20407
36
      }
20408
75
    }
20409
115
    return Est;
20410
115
  }
20411
109
20412
109
  return SDValue();
20413
109
}
20414
20415
130
SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
20416
130
  return buildSqrtEstimateImpl(Op, Flags, true);
20417
130
}
20418
20419
296
SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
20420
296
  return buildSqrtEstimateImpl(Op, Flags, false);
20421
296
}
20422
20423
/// Return true if there is any possibility that the two addresses overlap.
20424
3.59M
bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
20425
3.59M
20426
3.59M
  struct MemUseCharacteristics {
20427
3.59M
    bool IsVolatile;
20428
3.59M
    SDValue BasePtr;
20429
3.59M
    int64_t Offset;
20430
3.59M
    Optional<int64_t> NumBytes;
20431
3.59M
    MachineMemOperand *MMO;
20432
3.59M
  };
20433
3.59M
20434
7.18M
  auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
20435
7.18M
    if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
20436
6.84M
      int64_t Offset = 0;
20437
6.84M
      if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
20438
1.18k
        Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
20439
1.18k
                     ? 
C->getSExtValue()920
20440
1.18k
                     : (LSN->getAddressingMode() == ISD::PRE_DEC)
20441
266
                           ? 
-1 * C->getSExtValue()0
20442
266
                           : 0;
20443
6.84M
      return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/,
20444
6.84M
              Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
20445
6.84M
              LSN->getMemOperand()};
20446
6.84M
    }
20447
339k
    if (const auto *LN = cast<LifetimeSDNode>(N))
20448
339k
      return {false /*isVolatile*/, LN->getOperand(1),
20449
339k
              (LN->hasOffset()) ? 
LN->getOffset()338k
:
0457
,
20450
339k
              (LN->hasOffset()) ? 
Optional<int64_t>(LN->getSize())338k
20451
339k
                                : 
Optional<int64_t>()457
,
20452
339k
              (MachineMemOperand *)nullptr};
20453
0
    // Default.
20454
0
    return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/,
20455
0
            Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
20456
0
  };
20457
3.59M
20458
3.59M
  MemUseCharacteristics MUC0 = getCharacteristics(Op0),
20459
3.59M
                        MUC1 = getCharacteristics(Op1);
20460
3.59M
20461
3.59M
  // If they are to the same address, then they must be aliases.
20462
3.59M
  if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
20463
3.59M
      
MUC0.Offset == MUC1.Offset181k
)
20464
181k
    return true;
20465
3.41M
20466
3.41M
  // If they are both volatile then they cannot be reordered.
20467
3.41M
  if (MUC0.IsVolatile && 
MUC1.IsVolatile98.7k
)
20468
81.3k
    return true;
20469
3.32M
20470
3.32M
  if (MUC0.MMO && 
MUC1.MMO3.31M
) {
20471
3.01M
    if ((MUC0.MMO->isInvariant() && 
MUC1.MMO->isStore()10
) ||
20472
3.01M
        
(3.01M
MUC1.MMO->isInvariant()3.01M
&&
MUC0.MMO->isStore()19.5k
))
20473
16.5k
      return false;
20474
3.31M
  }
20475
3.31M
20476
3.31M
  // Try to prove that there is aliasing, or that there is no aliasing. Either
20477
3.31M
  // way, we can return now. If nothing can be proved, proceed with more tests.
20478
3.31M
  bool IsAlias;
20479
3.31M
  if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
20480
3.31M
                                       DAG, IsAlias))
20481
1.12M
    return IsAlias;
20482
2.19M
20483
2.19M
  // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
20484
2.19M
  // either are not known.
20485
2.19M
  if (!MUC0.MMO || !MUC1.MMO)
20486
18.7k
    return true;
20487
2.17M
20488
2.17M
  // If one operation reads from invariant memory, and the other may store, they
20489
2.17M
  // cannot alias. These should really be checking the equivalent of mayWrite,
20490
2.17M
  // but it only matters for memory nodes other than load /store.
20491
2.17M
  if ((MUC0.MMO->isInvariant() && 
MUC1.MMO->isStore()0
) ||
20492
2.17M
      (MUC1.MMO->isInvariant() && 
MUC0.MMO->isStore()2.97k
))
20493
0
    return false;
20494
2.17M
20495
2.17M
  // If we know required SrcValue1 and SrcValue2 have relatively large
20496
2.17M
  // alignment compared to the size and offset of the access, we may be able
20497
2.17M
  // to prove they do not alias. This check is conservative for now to catch
20498
2.17M
  // cases created by splitting vector types.
20499
2.17M
  int64_t SrcValOffset0 = MUC0.MMO->getOffset();
20500
2.17M
  int64_t SrcValOffset1 = MUC1.MMO->getOffset();
20501
2.17M
  unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
20502
2.17M
  unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
20503
2.17M
  if (OrigAlignment0 == OrigAlignment1 && 
SrcValOffset0 != SrcValOffset11.17M
&&
20504
2.17M
      
MUC0.NumBytes.hasValue()357k
&&
MUC1.NumBytes.hasValue()357k
&&
20505
2.17M
      
*MUC0.NumBytes == *MUC1.NumBytes357k
&&
OrigAlignment0 > *MUC0.NumBytes300k
) {
20506
25.7k
    int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
20507
25.7k
    int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
20508
25.7k
20509
25.7k
    // There is no overlap between these relatively aligned accesses of
20510
25.7k
    // similar size. Return no alias.
20511
25.7k
    if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
20512
25.7k
        
(OffAlign1 + *MUC1.NumBytes) <= OffAlign023.5k
)
20513
4.82k
      return false;
20514
2.16M
  }
20515
2.16M
20516
2.16M
  bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
20517
2.16M
                   ? 
CombinerGlobalAA16
20518
2.16M
                   : 
DAG.getSubtarget().useAA()2.16M
;
20519
#ifndef NDEBUG
20520
  if (CombinerAAOnlyFunc.getNumOccurrences() &&
20521
      CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
20522
    UseAA = false;
20523
#endif
20524
20525
2.16M
  if (UseAA && 
AA19.8k
&&
MUC0.MMO->getValue()19.8k
&&
MUC1.MMO->getValue()18.2k
) {
20526
18.1k
    // Use alias analysis information.
20527
18.1k
    int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
20528
18.1k
    int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
20529
18.1k
    int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
20530
18.1k
    AliasResult AAResult = AA->alias(
20531
18.1k
        MemoryLocation(MUC0.MMO->getValue(), Overlap0,
20532
18.1k
                       UseTBAA ? MUC0.MMO->getAAInfo() : 
AAMDNodes()0
),
20533
18.1k
        MemoryLocation(MUC1.MMO->getValue(), Overlap1,
20534
18.1k
                       UseTBAA ? MUC1.MMO->getAAInfo() : 
AAMDNodes()0
));
20535
18.1k
    if (AAResult == NoAlias)
20536
7.90k
      return false;
20537
2.15M
  }
20538
2.15M
20539
2.15M
  // Otherwise we have to assume they alias.
20540
2.15M
  return true;
20541
2.15M
}
20542
20543
/// Walk up chain skipping non-aliasing memory nodes,
20544
/// looking for aliasing nodes and adding them to the Aliases vector.
20545
void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
20546
6.16M
                                   SmallVectorImpl<SDValue> &Aliases) {
20547
6.16M
  SmallVector<SDValue, 8> Chains;     // List of chains to visit.
20548
6.16M
  SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
20549
6.16M
20550
6.16M
  // Get alias information for node.
20551
6.16M
  const bool IsLoad = isa<LoadSDNode>(N) && 
!cast<LoadSDNode>(N)->isVolatile()2.78M
;
20552
6.16M
20553
6.16M
  // Starting off.
20554
6.16M
  Chains.push_back(OriginalChain);
20555
6.16M
  unsigned Depth = 0;
20556
6.16M
20557
6.16M
  // Attempt to improve chain by a single step
20558
8.61M
  std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
20559
8.61M
    switch (C.getOpcode()) {
20560
8.61M
    case ISD::EntryToken:
20561
3.41M
      // No need to mark EntryToken.
20562
3.41M
      C = SDValue();
20563
3.41M
      return true;
20564
8.61M
    case ISD::LOAD:
20565
3.42M
    case ISD::STORE: {
20566
3.42M
      // Get alias information for C.
20567
3.42M
      bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
20568
3.42M
                      
!cast<LSBaseSDNode>(C.getNode())->isVolatile()1.95M
;
20569
3.42M
      if ((IsLoad && 
IsOpLoad795k
) ||
!isAlias(N, C.getNode())3.27M
) {
20570
953k
        // Look further up the chain.
20571
953k
        C = C.getOperand(0);
20572
953k
        return true;
20573
953k
      }
20574
2.46M
      // Alias, so stop here.
20575
2.46M
      return false;
20576
2.46M
    }
20577
2.46M
20578
2.46M
    case ISD::CopyFromReg:
20579
102k
      // Always forward past past CopyFromReg.
20580
102k
      C = C.getOperand(0);
20581
102k
      return true;
20582
2.46M
20583
2.46M
    case ISD::LIFETIME_START:
20584
300k
    case ISD::LIFETIME_END: {
20585
300k
      // We can forward past any lifetime start/end that can be proven not to
20586
300k
      // alias the memory access.
20587
300k
      if (!isAlias(N, C.getNode())) {
20588
66.1k
        // Look further up the chain.
20589
66.1k
        C = C.getOperand(0);
20590
66.1k
        return true;
20591
66.1k
      }
20592
234k
      return false;
20593
234k
    }
20594
1.36M
    default:
20595
1.36M
      return false;
20596
8.61M
    }
20597
8.61M
  };
20598
6.16M
20599
6.16M
  // Look at each chain and determine if it is an alias.  If so, add it to the
20600
6.16M
  // aliases list.  If not, then continue up the chain looking for the next
20601
6.16M
  // candidate.
20602
15.9M
  while (!Chains.empty()) {
20603
9.85M
    SDValue Chain = Chains.pop_back_val();
20604
9.85M
20605
9.85M
    // Don't bother if we've seen Chain before.
20606
9.85M
    if (!Visited.insert(Chain.getNode()).second)
20607
334k
      continue;
20608
9.52M
20609
9.52M
    // For TokenFactor nodes, look at each operand and only continue up the
20610
9.52M
    // chain until we reach the depth limit.
20611
9.52M
    //
20612
9.52M
    // FIXME: The depth check could be made to return the last non-aliasing
20613
9.52M
    // chain we found before we hit a tokenfactor rather than the original
20614
9.52M
    // chain.
20615
9.52M
    if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
20616
44.3k
      Aliases.clear();
20617
44.3k
      Aliases.push_back(OriginalChain);
20618
44.3k
      return;
20619
44.3k
    }
20620
9.47M
20621
9.47M
    if (Chain.getOpcode() == ISD::TokenFactor) {
20622
867k
      // We have to check each of the operands of the token factor for "small"
20623
867k
      // token factors, so we queue them up.  Adding the operands to the queue
20624
867k
      // (stack) in reverse order maintains the original order and increases the
20625
867k
      // likelihood that getNode will find a matching token factor (CSE.)
20626
867k
      if (Chain.getNumOperands() > 16) {
20627
92.0k
        Aliases.push_back(Chain);
20628
92.0k
        continue;
20629
92.0k
      }
20630
3.57M
      
for (unsigned n = Chain.getNumOperands(); 774k
n;)
20631
2.79M
        Chains.push_back(Chain.getOperand(--n));
20632
774k
      ++Depth;
20633
774k
      continue;
20634
774k
    }
20635
8.61M
    // Everything else
20636
8.61M
    if (ImproveChain(Chain)) {
20637
4.54M
      // Updated Chain Found, Consider new chain if one exists.
20638
4.54M
      if (Chain.getNode())
20639
1.12M
        Chains.push_back(Chain);
20640
4.54M
      ++Depth;
20641
4.54M
      continue;
20642
4.54M
    }
20643
4.07M
    // No Improved Chain Possible, treat as Alias.
20644
4.07M
    Aliases.push_back(Chain);
20645
4.07M
  }
20646
6.16M
}
20647
20648
/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
20649
/// (aliasing node.)
20650
6.18M
SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
20651
6.18M
  if (OptLevel == CodeGenOpt::None)
20652
15.5k
    return OldChain;
20653
6.16M
20654
6.16M
  // Ops for replacing token factor.
20655
6.16M
  SmallVector<SDValue, 8> Aliases;
20656
6.16M
20657
6.16M
  // Accumulate all the aliases to this node.
20658
6.16M
  GatherAllAliases(N, OldChain, Aliases);
20659
6.16M
20660
6.16M
  // If no operands then chain to entry token.
20661
6.16M
  if (Aliases.size() == 0)
20662
3.38M
    return DAG.getEntryNode();
20663
2.78M
20664
2.78M
  // If a single operand then chain to it.  We don't need to revisit it.
20665
2.78M
  if (Aliases.size() == 1)
20666
2.30M
    return Aliases[0];
20667
478k
20668
478k
  // Construct a custom tailored token factor.
20669
478k
  return DAG.getTokenFactor(SDLoc(N), Aliases);
20670
478k
}
20671
20672
namespace {
20673
// TODO: Replace with with std::monostate when we move to C++17.
20674
struct UnitT { } Unit;
20675
220k
bool operator==(const UnitT &, const UnitT &) { return true; }
20676
0
bool operator!=(const UnitT &, const UnitT &) { return false; }
20677
} // namespace
20678
20679
// This function tries to collect a bunch of potentially interesting
20680
// nodes to improve the chains of, all at once. This might seem
20681
// redundant, as this function gets called when visiting every store
20682
// node, so why not let the work be done on each store as it's visited?
20683
//
20684
// I believe this is mainly important because MergeConsecutiveStores
20685
// is unable to deal with merging stores of different sizes, so unless
20686
// we improve the chains of all the potential candidates up-front
20687
// before running MergeConsecutiveStores, it might only see some of
20688
// the nodes that will eventually be candidates, and then not be able
20689
// to go from a partially-merged state to the desired final
20690
// fully-merged state.
20691
20692
3.15M
bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
20693
3.15M
  SmallVector<StoreSDNode *, 8> ChainedStores;
20694
3.15M
  StoreSDNode *STChain = St;
20695
3.15M
  // Intervals records which offsets from BaseIndex have been covered. In
20696
3.15M
  // the common case, every store writes to the immediately previous address
20697
3.15M
  // space and thus merged with the previous interval at insertion time.
20698
3.15M
20699
3.15M
  using IMap =
20700
3.15M
      llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
20701
3.15M
  IMap::Allocator A;
20702
3.15M
  IMap Intervals(A);
20703
3.15M
20704
3.15M
  // This holds the base pointer, index, and the offset in bytes from the base
20705
3.15M
  // pointer.
20706
3.15M
  const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20707
3.15M
20708
3.15M
  // We must have a base and an offset.
20709
3.15M
  if (!BasePtr.getBase().getNode())
20710
0
    return false;
20711
3.15M
20712
3.15M
  // Do not handle stores to undef base pointers.
20713
3.15M
  if (BasePtr.getBase().isUndef())
20714
0
    return false;
20715
3.15M
20716
3.15M
  // Add ST's interval.
20717
3.15M
  Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
20718
3.15M
20719
3.38M
  while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
20720
338k
    // If the chain has more than one use, then we can't reorder the mem ops.
20721
338k
    if (!SDValue(Chain, 0)->hasOneUse())
20722
45.8k
      break;
20723
292k
    if (Chain->isVolatile() || 
Chain->isIndexed()278k
)
20724
14.4k
      break;
20725
278k
20726
278k
    // Find the base pointer and offset for this memory node.
20727
278k
    const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
20728
278k
    // Check that the base pointer is the same as the original one.
20729
278k
    int64_t Offset;
20730
278k
    if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
20731
33.9k
      break;
20732
244k
    int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
20733
244k
    // Make sure we don't overlap with other intervals by checking the ones to
20734
244k
    // the left or right before inserting.
20735
244k
    auto I = Intervals.find(Offset);
20736
244k
    // If there's a next interval, we should end before it.
20737
244k
    if (I != Intervals.end() && 
I.start() < (Offset + Length)225k
)
20738
1.71k
      break;
20739
242k
    // If there's a previous interval, we should start after it.
20740
242k
    if (I != Intervals.begin() && 
(--I).stop() <= Offset22.3k
)
20741
22.3k
      break;
20742
220k
    Intervals.insert(Offset, Offset + Length, Unit);
20743
220k
20744
220k
    ChainedStores.push_back(Chain);
20745
220k
    STChain = Chain;
20746
220k
  }
20747
3.15M
20748
3.15M
  // If we didn't find a chained store, exit.
20749
3.15M
  if (ChainedStores.size() == 0)
20750
3.09M
    return false;
20751
59.9k
20752
59.9k
  // Improve all chained stores (St and ChainedStores members) starting from
20753
59.9k
  // where the store chain ended and return single TokenFactor.
20754
59.9k
  SDValue NewChain = STChain->getChain();
20755
59.9k
  SmallVector<SDValue, 8> TFOps;
20756
280k
  for (unsigned I = ChainedStores.size(); I;) {
20757
220k
    StoreSDNode *S = ChainedStores[--I];
20758
220k
    SDValue BetterChain = FindBetterChain(S, NewChain);
20759
220k
    S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
20760
220k
        S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
20761
220k
    TFOps.push_back(SDValue(S, 0));
20762
220k
    ChainedStores[I] = S;
20763
220k
  }
20764
59.9k
20765
59.9k
  // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
20766
59.9k
  SDValue BetterChain = FindBetterChain(St, NewChain);
20767
59.9k
  SDValue NewST;
20768
59.9k
  if (St->isTruncatingStore())
20769
26
    NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
20770
26
                              St->getBasePtr(), St->getMemoryVT(),
20771
26
                              St->getMemOperand());
20772
59.9k
  else
20773
59.9k
    NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
20774
59.9k
                         St->getBasePtr(), St->getMemOperand());
20775
59.9k
20776
59.9k
  TFOps.push_back(NewST);
20777
59.9k
20778
59.9k
  // If we improved every element of TFOps, then we've lost the dependence on
20779
59.9k
  // NewChain to successors of St and we need to add it back to TFOps. Do so at
20780
59.9k
  // the beginning to keep relative order consistent with FindBetterChains.
20781
85.6k
  auto hasImprovedChain = [&](SDValue ST) -> bool {
20782
85.6k
    return ST->getOperand(0) != NewChain;
20783
85.6k
  };
20784
59.9k
  bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
20785
59.9k
  if (AddNewChain)
20786
16.1k
    TFOps.insert(TFOps.begin(), NewChain);
20787
59.9k
20788
59.9k
  SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
20789
59.9k
  CombineTo(St, TF);
20790
59.9k
20791
59.9k
  AddToWorklist(STChain);
20792
59.9k
  // Add TF operands worklist in reverse order.
20793
356k
  for (auto I = TF->getNumOperands(); I;)
20794
296k
    AddToWorklist(TF->getOperand(--I).getNode());
20795
59.9k
  AddToWorklist(TF.getNode());
20796
59.9k
  return true;
20797
59.9k
}
20798
20799
3.22M
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
20800
3.22M
  if (OptLevel == CodeGenOpt::None)
20801
47.3k
    return false;
20802
3.17M
20803
3.17M
  const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20804
3.17M
20805
3.17M
  // We must have a base and an offset.
20806
3.17M
  if (!BasePtr.getBase().getNode())
20807
0
    return false;
20808
3.17M
20809
3.17M
  // Do not handle stores to undef base pointers.
20810
3.17M
  if (BasePtr.getBase().isUndef())
20811
14.2k
    return false;
20812
3.15M
20813
3.15M
  // Directly improve a chain of disjoint stores starting at St.
20814
3.15M
  if (parallelizeChainedStores(St))
20815
59.9k
    return true;
20816
3.09M
20817
3.09M
  // Improve St's Chain..
20818
3.09M
  SDValue BetterChain = FindBetterChain(St, St->getChain());
20819
3.09M
  if (St->getChain() != BetterChain) {
20820
102k
    replaceStoreChain(St, BetterChain);
20821
102k
    return true;
20822
102k
  }
20823
2.99M
  return false;
20824
2.99M
}
20825
20826
/// This is the entry point for the file.
20827
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
20828
2.88M
                           CodeGenOpt::Level OptLevel) {
20829
2.88M
  /// This is the main entry point to this class.
20830
2.88M
  DAGCombiner(*this, AA, OptLevel).Run(Level);
20831
2.88M
}